From 5191f4d82daf22b7ee9446f83527d2795e225974 Mon Sep 17 00:00:00 2001
From: Arturo Borrero
Date: Thu, 29 Jan 2015 19:34:42 +0100
Subject: netfilter: nft_compat: add ebtables support

This patch extends nft_compat to support ebtables extensions.

ebtables verdict codes are translated to the ones used by the nf_tables engine,
so we can properly use ebtables target extensions from nft_compat.

This patch extends previous work by Giuseppe Longo <giuseppelng@gmail.com>.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_compat.c | 63 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 57 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 265e190f2218..c598f74063a1 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -19,6 +19,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
 #include <net/netfilter/nf_tables.h>
 
 static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -40,6 +41,7 @@ static int nft_compat_chain_validate_dependency(const char *tablename,
 union nft_entry {
 	struct ipt_entry e4;
 	struct ip6t_entry e6;
+	struct ebt_entry ebt;
 };
 
 static inline void
@@ -50,9 +52,9 @@ nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
 	par->hotdrop	= false;
 }
 
-static void nft_target_eval(const struct nft_expr *expr,
-			    struct nft_data data[NFT_REG_MAX + 1],
-			    const struct nft_pktinfo *pkt)
+static void nft_target_eval_xt(const struct nft_expr *expr,
+			       struct nft_data data[NFT_REG_MAX + 1],
+			       const struct nft_pktinfo *pkt)
 {
 	void *info = nft_expr_priv(expr);
 	struct xt_target *target = expr->ops->data;
@@ -66,7 +68,7 @@ static void nft_target_eval(const struct nft_expr *expr,
 	if (pkt->xt.hotdrop)
 		ret = NF_DROP;
 
-	switch(ret) {
+	switch (ret) {
 	case XT_CONTINUE:
 		data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
 		break;
@@ -74,7 +76,41 @@ static void nft_target_eval(const struct nft_expr *expr,
 		data[NFT_REG_VERDICT].verdict = ret;
 		break;
 	}
-	return;
+}
+
+static void nft_target_eval_bridge(const struct nft_expr *expr,
+				   struct nft_data data[NFT_REG_MAX + 1],
+				   const struct nft_pktinfo *pkt)
+{
+	void *info = nft_expr_priv(expr);
+	struct xt_target *target = expr->ops->data;
+	struct sk_buff *skb = pkt->skb;
+	int ret;
+
+	nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
+
+	ret = target->target(skb, &pkt->xt);
+
+	if (pkt->xt.hotdrop)
+		ret = NF_DROP;
+
+	switch (ret) {
+	case EBT_ACCEPT:
+		data[NFT_REG_VERDICT].verdict = NF_ACCEPT;
+		break;
+	case EBT_DROP:
+		data[NFT_REG_VERDICT].verdict = NF_DROP;
+		break;
+	case EBT_CONTINUE:
+		data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+		break;
+	case EBT_RETURN:
+		data[NFT_REG_VERDICT].verdict = NFT_RETURN;
+		break;
+	default:
+		data[NFT_REG_VERDICT].verdict = ret;
+		break;
+	}
 }
 
 static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
@@ -100,6 +136,10 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
 		entry->e6.ipv6.proto = proto;
 		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
 		break;
+	case NFPROTO_BRIDGE:
+		entry->ebt.ethproto = proto;
+		entry->ebt.invflags = inv ? EBT_IPROTO : 0;
+		break;
 	}
 	par->entryinfo	= entry;
 	par->target	= target;
@@ -307,6 +347,10 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
 		entry->e6.ipv6.proto = proto;
 		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
 		break;
+	case NFPROTO_BRIDGE:
+		entry->ebt.ethproto = proto;
+		entry->ebt.invflags = inv ? EBT_IPROTO : 0;
+		break;
 	}
 	par->entryinfo	= entry;
 	par->match	= match;
@@ -490,6 +534,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
 	case AF_INET6:
 		fmt = "ip6t_%s";
 		break;
+	case NFPROTO_BRIDGE:
+		fmt = "ebt_%s";
+		break;
 	default:
 		pr_err("nft_compat: unsupported protocol %d\n",
 			nfmsg->nfgen_family);
@@ -663,13 +710,17 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 
 	nft_target->ops.type = &nft_target_type;
 	nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
-	nft_target->ops.eval = nft_target_eval;
 	nft_target->ops.init = nft_target_init;
 	nft_target->ops.destroy = nft_target_destroy;
 	nft_target->ops.dump = nft_target_dump;
 	nft_target->ops.validate = nft_target_validate;
 	nft_target->ops.data = target;
 
+	if (family == NFPROTO_BRIDGE)
+		nft_target->ops.eval = nft_target_eval_bridge;
+	else
+		nft_target->ops.eval = nft_target_eval_xt;
+
 	list_add(&nft_target->head, &nft_target_list);
 
 	return &nft_target->ops;
-- 
cgit v1.2.3


From 4c1017aa80c95a74703139bb95c4ce0d130efe4d Mon Sep 17 00:00:00 2001
From: Patrick McHardy
Date: Fri, 30 Jan 2015 07:46:33 +0000
Subject: netfilter: nft_lookup: add missing attribute validation for
 NFTA_LOOKUP_SET_ID

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_lookup.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 6404a726d17b..9615b8b9fb37 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -39,6 +39,7 @@ static void nft_lookup_eval(const struct nft_expr *expr,
 
 static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
 	[NFTA_LOOKUP_SET]	= { .type = NLA_STRING },
+	[NFTA_LOOKUP_SET_ID]	= { .type = NLA_U32 },
 	[NFTA_LOOKUP_SREG]	= { .type = NLA_U32 },
 	[NFTA_LOOKUP_DREG]	= { .type = NLA_U32 },
 };
-- 
cgit v1.2.3


From bf250a1fa769f2eb8fc7a4e28b3b523e9cb67eef Mon Sep 17 00:00:00 2001
From: Vlad Yasevich
Date: Tue, 10 Feb 2015 11:37:29 -0500
Subject: ipv6: Partial checksum only UDP packets

ip6_append_data is used by other protocols and some of them can't
be partially checksummed.  Only partially checksum UDP protocol.

Fixes: 32dce968dd987a (ipv6: Allow for partial checksums on non-ufo packets)
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Tested-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d33df4cbd872..7deebf102cba 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1273,7 +1273,7 @@ emsgsize:
 	/* If this is the first and only packet and device
 	 * supports checksum offloading, let's use it.
 	 */
-	if (!skb &&
+	if (!skb && sk->sk_protocol == IPPROTO_UDP &&
 	    length + fragheaderlen < mtu &&
 	    rt->dst.dev->features & NETIF_F_V6_CSUM &&
 	    !exthdrlen)
-- 
cgit v1.2.3


From 80ad0d4a7a75158f2824d541e4802c88aba4f063 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan
Date: Tue, 10 Feb 2015 13:33:37 -0500
Subject: rds: rds_cong_queue_updates needs to defer the congestion update
 transmission

When the RDS transport is TCP, we cannot inline the call to rds_send_xmit
from rds_cong_queue_update because
(a) we are already holding the sock_lock in the recv path, and
    will deadlock when tcp_setsockopt/tcp_sendmsg try to get the sock
    lock
(b) cong_queue_update does an irqsave on the rds_cong_lock, and this
    will trigger warnings (for a good reason) from functions called
    out of sock_lock.

This patch reverts the change introduced by
2fa57129d ("RDS: Bypass workqueue when queueing cong updates").

The patch has been verified for both RDS/TCP as well as RDS/RDMA
to ensure that there are not regressions for either transport:
 - for verification of  RDS/TCP a client-server unit-test was used,
   with the server blocked in gdb and thus unable to drain its rcvbuf,
   eventually triggering a RDS congestion update.
 - for RDS/RDMA, the standard IB regression tests were used

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/cong.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/cong.c b/net/rds/cong.c
index e5b65acd650b..e6144b8246fd 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -221,7 +221,21 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
 	list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
 		if (!test_and_set_bit(0, &conn->c_map_queued)) {
 			rds_stats_inc(s_cong_update_queued);
-			rds_send_xmit(conn);
+			/* We cannot inline the call to rds_send_xmit() here
+			 * for two reasons (both pertaining to a TCP transport):
+			 * 1. When we get here from the receive path, we
+			 *    are already holding the sock_lock (held by
+			 *    tcp_v4_rcv()). So inlining calls to
+			 *    tcp_setsockopt and/or tcp_sendmsg will deadlock
+			 *    when it tries to get the sock_lock())
+			 * 2. Interrupts are masked so that we can mark the
+			 *    the port congested from both send and recv paths.
+			 *    (See comment around declaration of rdc_cong_lock).
+			 *    An attempt to get the sock_lock() here will
+			 *    therefore trigger warnings.
+			 * Defer the xmit to rds_send_worker() instead.
+			 */
+			queue_delayed_work(rds_wq, &conn->c_send_w, 0);
 		}
 	}
 
-- 
cgit v1.2.3


From b35725a285768d85b5ba1be7fe5002654a65ce88 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar
Date: Tue, 10 Feb 2015 13:35:16 -0800
Subject: openvswitch: Reset key metadata for packet execution.

Userspace packet execute command pass down flow key for given
packet. But userspace can skip some parameter with zero value.
Therefore kernel needs to initialize key metadata to zero.

Fixes: 0714812134 ("openvswitch: Eliminate memset() from flow_extract.")
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e2c348b8baca..50ec42f170a0 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -717,6 +717,8 @@ int ovs_flow_key_extract_userspace(const struct nlattr *attr,
 {
 	int err;
 
+	memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
+
 	/* Extract metadata from netlink attributes. */
 	err = ovs_nla_get_flow_metadata(attr, key, log);
 	if (err)
-- 
cgit v1.2.3


From 13101602c4a9f653d59af9469040797bc5b361ca Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven
Date: Wed, 11 Feb 2015 11:23:38 +0100
Subject: openvswitch: Add missing initialization in
 validate_and_copy_set_tun()

net/openvswitch/flow_netlink.c: In function ‘validate_and_copy_set_tun’:
net/openvswitch/flow_netlink.c:1749: warning: ‘err’ may be used uninitialized in this function

If ipv4_tun_from_nlattr() returns a different positive value than
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, err will be uninitialized, and
validate_and_copy_set_tun() may return an undefined value instead of a
zero success indicator. Initialize err to zero to fix this.

Fixes: 1dd144cf5b4b47e1 ("openvswitch: Support VXLAN Group Policy extension")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 993281e6278d..3829328c5a76 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1746,7 +1746,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	struct sw_flow_key key;
 	struct ovs_tunnel_info *tun_info;
 	struct nlattr *a;
-	int err, start, opts_type;
+	int err = 0, start, opts_type;
 
 	ovs_match_init(&match, &key, NULL);
 	opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
-- 
cgit v1.2.3


From 26c4f7da3e413da697a7beb22ad496390eda7da0 Mon Sep 17 00:00:00 2001
From: Tom Herbert
Date: Tue, 10 Feb 2015 16:30:27 -0800
Subject: net: Fix remcsum in GRO path to not change packet

Remote checksum offload processing is currently the same for both
the GRO and non-GRO path. When the remote checksum offload option
is encountered, the checksum field referred to is modified in
the packet. So in the GRO case, the packet is modified in the
GRO path and then the operation is skipped when the packet goes
through the normal path based on skb->remcsum_offload. There is
a problem in that the packet may be modified in the GRO path, but
then forwarded off host still containing the remote checksum option.
A remote host will again perform RCO but now the checksum verification
will fail since GRO RCO already modified the checksum.

To fix this, we ensure that GRO restores a packet to it's original
state before returning. In this model, when GRO processes a remote
checksum option it still changes the checksum per the algorithm
but on return from lower layer processing the checksum is restored
to its original value.

In this patch we add define gro_remcsum structure which is passed
to skb_gro_remcsum_process to save offset and delta for the checksum
being changed. After lower layer processing, skb_gro_remcsum_cleanup
is called to restore the checksum before returning from GRO.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c       | 19 +++++++++----------
 include/linux/netdevice.h | 25 +++++++++++++++++++++++--
 include/net/checksum.h    |  5 +++++
 net/ipv4/fou.c            | 20 ++++++++++----------
 4 files changed, 47 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0e57e862c399..30310a63475a 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -555,12 +555,12 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 					  unsigned int off,
 					  struct vxlanhdr *vh, size_t hdrlen,
-					  u32 data)
+					  u32 data, struct gro_remcsum *grc)
 {
 	size_t start, offset, plen;
 
 	if (skb->remcsum_offload)
-		return vh;
+		return NULL;
 
 	if (!NAPI_GRO_CB(skb)->csum_valid)
 		return NULL;
@@ -579,7 +579,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 			return NULL;
 	}
 
-	skb_gro_remcsum_process(skb, (void *)vh + hdrlen, start, offset);
+	skb_gro_remcsum_process(skb, (void *)vh + hdrlen,
+				start, offset, grc);
 
 	skb->remcsum_offload = 1;
 
@@ -597,6 +598,9 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 	struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
 					     udp_offloads);
 	u32 flags;
+	struct gro_remcsum grc;
+
+	skb_gro_remcsum_init(&grc);
 
 	off_vx = skb_gro_offset(skb);
 	hlen = off_vx + sizeof(*vh);
@@ -614,7 +618,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 
 	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
 		vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
-				       ntohl(vh->vx_vni));
+				       ntohl(vh->vx_vni), &grc);
 
 		if (!vh)
 			goto out;
@@ -637,6 +641,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 	pp = eth_gro_receive(head, skb);
 
 out:
+	skb_gro_remcsum_cleanup(skb, &grc);
 	NAPI_GRO_CB(skb)->flush |= flush;
 
 	return pp;
@@ -1154,12 +1159,6 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
 {
 	size_t start, offset, plen;
 
-	if (skb->remcsum_offload) {
-		/* Already processed in GRO path */
-		skb->remcsum_offload = 0;
-		return vh;
-	}
-
 	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
 	offset = start + ((data & VXLAN_RCO_UDP) ?
 			  offsetof(struct udphdr, check) :
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d115256ed5a2..3aa02457fe4f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2321,8 +2321,19 @@ do {									\
 					   compute_pseudo(skb, proto));	\
 } while (0)
 
+struct gro_remcsum {
+	int offset;
+	__wsum delta;
+};
+
+static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
+{
+	grc->delta = 0;
+}
+
 static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
-					   int start, int offset)
+					   int start, int offset,
+					   struct gro_remcsum *grc)
 {
 	__wsum delta;
 
@@ -2331,10 +2342,20 @@ static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
 	delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset);
 
 	/* Adjust skb->csum since we changed the packet */
-	skb->csum = csum_add(skb->csum, delta);
 	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+	grc->offset = (ptr + offset) - (void *)skb->head;
+	grc->delta = delta;
 }
 
+static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
+					   struct gro_remcsum *grc)
+{
+	if (!grc->delta)
+		return;
+
+	remcsum_unadjust((__sum16 *)(skb->head + grc->offset), grc->delta);
+}
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
diff --git a/include/net/checksum.h b/include/net/checksum.h
index e339a9513e29..0a55ac715077 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -167,4 +167,9 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
 	return delta;
 }
 
+static inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
+{
+	*psum = csum_fold(csum_sub(delta, *psum));
+}
+
 #endif
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 92ddea1e6457..7fa8d36e56d4 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -71,12 +71,6 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
 	size_t offset = ntohs(pd[1]);
 	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
 
-	if (skb->remcsum_offload) {
-		/* Already processed in GRO path */
-		skb->remcsum_offload = 0;
-		return guehdr;
-	}
-
 	if (!pskb_may_pull(skb, plen))
 		return NULL;
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
@@ -214,7 +208,8 @@ out_unlock:
 
 static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 				      struct guehdr *guehdr, void *data,
-				      size_t hdrlen, u8 ipproto)
+				      size_t hdrlen, u8 ipproto,
+				      struct gro_remcsum *grc)
 {
 	__be16 *pd = data;
 	size_t start = ntohs(pd[0]);
@@ -222,7 +217,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
 
 	if (skb->remcsum_offload)
-		return guehdr;
+		return NULL;
 
 	if (!NAPI_GRO_CB(skb)->csum_valid)
 		return NULL;
@@ -234,7 +229,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 			return NULL;
 	}
 
-	skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
+	skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
+				start, offset, grc);
 
 	skb->remcsum_offload = 1;
 
@@ -254,6 +250,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
 	void *data;
 	u16 doffset = 0;
 	int flush = 1;
+	struct gro_remcsum grc;
+
+	skb_gro_remcsum_init(&grc);
 
 	off = skb_gro_offset(skb);
 	len = off + sizeof(*guehdr);
@@ -295,7 +294,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
 		if (flags & GUE_PFLAG_REMCSUM) {
 			guehdr = gue_gro_remcsum(skb, off, guehdr,
 						 data + doffset, hdrlen,
-						 guehdr->proto_ctype);
+						 guehdr->proto_ctype, &grc);
 			if (!guehdr)
 				goto out;
 
@@ -345,6 +344,7 @@ out_unlock:
 	rcu_read_unlock();
 out:
 	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_remcsum_cleanup(skb, &grc);
 
 	return pp;
 }
-- 
cgit v1.2.3


From 6db93ea13b7937c0523ea1f977b322f1347a5633 Mon Sep 17 00:00:00 2001
From: Tom Herbert
Date: Tue, 10 Feb 2015 16:30:29 -0800
Subject: udp: Set SKB_GSO_UDP_TUNNEL* in UDP GRO path

Properly set GSO types and skb->encapsulation in the UDP tunnel GRO
complete so that packets are properly represented for GSO. This sets
SKB_GSO_UDP_TUNNEL or SKB_GSO_UDP_TUNNEL_CSUM depending on whether
non-zero checksums were received, and sets SKB_GSO_TUNNEL_REMCSUM if
the remote checksum option was processed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_offload.c | 13 ++++++++++++-
 net/ipv6/udp_offload.c |  6 +++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d10f6f4ead27..4915d8284a86 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -402,6 +402,13 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
 	}
 
 	rcu_read_unlock();
+
+	if (skb->remcsum_offload)
+		skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+
+	skb->encapsulation = 1;
+	skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr));
+
 	return err;
 }
 
@@ -410,9 +417,13 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (uh->check)
+	if (uh->check) {
+		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
 		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
 					  iph->daddr, 0);
+	} else {
+		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+	}
 
 	return udp_gro_complete(skb, nhoff);
 }
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a56276996b72..ab889bb16b3c 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -161,9 +161,13 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (uh->check)
+	if (uh->check) {
+		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
 		uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
 					  &ipv6h->daddr, 0);
+	} else {
+		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
+	}
 
 	return udp_gro_complete(skb, nhoff);
 }
-- 
cgit v1.2.3


From 15e2396d4e3ce23188852b74d924107982c63b42 Mon Sep 17 00:00:00 2001
From: Tom Herbert
Date: Tue, 10 Feb 2015 16:30:31 -0800
Subject: net: Infrastructure for CHECKSUM_PARTIAL with remote checsum offload

This patch adds infrastructure so that remote checksum offload can
set CHECKSUM_PARTIAL instead of calling csum_partial and writing
the modfied checksum field.

Add skb_remcsum_adjust_partial function to set an skb for using
CHECKSUM_PARTIAL with remote checksum offload.  Changed
skb_remcsum_process and skb_gro_remcsum_process to take a boolean
argument to indicate if checksum partial can be set or the
checksum needs to be modified using the normal algorithm.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c       |  4 ++--
 include/linux/netdevice.h | 19 ++++++++++++++++++-
 include/linux/skbuff.h    | 15 ++++++++++++++-
 net/core/dev.c            |  1 +
 net/ipv4/fou.c            |  4 ++--
 5 files changed, 37 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 30310a63475a..4f04443cfd33 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -580,7 +580,7 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 	}
 
 	skb_gro_remcsum_process(skb, (void *)vh + hdrlen,
-				start, offset, grc);
+				start, offset, grc, true);
 
 	skb->remcsum_offload = 1;
 
@@ -1171,7 +1171,7 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
 
 	vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
 
-	skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset);
+	skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset, true);
 
 	return vh;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 43fd0a4dcd04..5897b4ea5a3f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1923,6 +1923,9 @@ struct napi_gro_cb {
 	/* Number of segments aggregated. */
 	u16	count;
 
+	/* Start offset for remote checksum offload */
+	u16	gro_remcsum_start;
+
 	/* jiffies when first packet was created/queued */
 	unsigned long age;
 
@@ -2244,6 +2247,12 @@ static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
 
 __sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
 
+static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
+{
+	return (NAPI_GRO_CB(skb)->gro_remcsum_start - skb_headroom(skb) ==
+		skb_gro_offset(skb));
+}
+
 static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
 						      bool zero_okay,
 						      __sum16 check)
@@ -2251,6 +2260,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
 	return ((skb->ip_summed != CHECKSUM_PARTIAL ||
 		skb_checksum_start_offset(skb) <
 		 skb_gro_offset(skb)) &&
+		!skb_at_gro_remcsum_start(skb) &&
 		NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 		(!zero_okay || check));
 }
@@ -2337,12 +2347,19 @@ static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
 
 static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
 					   int start, int offset,
-					   struct gro_remcsum *grc)
+					   struct gro_remcsum *grc,
+					   bool nopartial)
 {
 	__wsum delta;
 
 	BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
 
+	if (!nopartial) {
+		NAPI_GRO_CB(skb)->gro_remcsum_start =
+		    ((unsigned char *)ptr + start) - skb->head;
+		return;
+	}
+
 	delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset);
 
 	/* Adjust skb->csum since we changed the packet */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index da6028a50687..30007afe70b3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3104,16 +3104,29 @@ do {									\
 				       compute_pseudo(skb, proto));	\
 } while (0)
 
+static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr,
+					      u16 start, u16 offset)
+{
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	skb->csum_start = ((unsigned char *)ptr + start) - skb->head;
+	skb->csum_offset = offset - start;
+}
+
 /* Update skbuf and packet to reflect the remote checksum offload operation.
  * When called, ptr indicates the starting point for skb->csum when
  * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete
  * here, skb_postpull_rcsum is done so skb->csum start is ptr.
  */
 static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
-				       int start, int offset)
+				       int start, int offset, bool nopartial)
 {
 	__wsum delta;
 
+	if (!nopartial) {
+		skb_remcsum_adjust_partial(skb, ptr, start, offset);
+		return;
+	}
+
 	 if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
 		__skb_checksum_complete(skb);
 		skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data);
diff --git a/net/core/dev.c b/net/core/dev.c
index d030575532a2..48c6ecb18f3c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4024,6 +4024,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 		NAPI_GRO_CB(skb)->udp_mark = 0;
+		NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
 
 		/* Setup for GRO checksum validation */
 		switch (skb->ip_summed) {
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 7fa8d36e56d4..d320f575cc62 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -75,7 +75,7 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
 		return NULL;
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 
-	skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
+	skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset, true);
 
 	return guehdr;
 }
@@ -230,7 +230,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 	}
 
 	skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
-				start, offset, grc);
+				start, offset, grc, true);
 
 	skb->remcsum_offload = 1;
 
-- 
cgit v1.2.3


From fe881ef11cf0220f118816181930494d484c4883 Mon Sep 17 00:00:00 2001
From: Tom Herbert
Date: Tue, 10 Feb 2015 16:30:33 -0800
Subject: gue: Use checksum partial with remote checksum offload

Change remote checksum handling to set checksum partial as default
behavior. Added an iflink parameter to configure not using
checksum partial (calling csum_partial to update checksum).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/fou.h |  1 +
 net/ipv4/fou.c           | 28 ++++++++++++++++++++++------
 2 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
index 8df06894da23..c303588bb767 100644
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@@ -14,6 +14,7 @@ enum {
 	FOU_ATTR_AF,				/* u8 */
 	FOU_ATTR_IPPROTO,			/* u8 */
 	FOU_ATTR_TYPE,				/* u8 */
+	FOU_ATTR_REMCSUM_NOPARTIAL,		/* flag */
 
 	__FOU_ATTR_MAX,
 };
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index d320f575cc62..ff069f6597ac 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -22,14 +22,18 @@ static LIST_HEAD(fou_list);
 struct fou {
 	struct socket *sock;
 	u8 protocol;
+	u8 flags;
 	u16 port;
 	struct udp_offload udp_offloads;
 	struct list_head list;
 };
 
+#define FOU_F_REMCSUM_NOPARTIAL BIT(0)
+
 struct fou_cfg {
 	u16 type;
 	u8 protocol;
+	u8 flags;
 	struct udp_port_cfg udp_config;
 };
 
@@ -64,7 +68,8 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
 }
 
 static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
-				  void *data, size_t hdrlen, u8 ipproto)
+				  void *data, size_t hdrlen, u8 ipproto,
+				  bool nopartial)
 {
 	__be16 *pd = data;
 	size_t start = ntohs(pd[0]);
@@ -75,7 +80,8 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
 		return NULL;
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 
-	skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset, true);
+	skb_remcsum_process(skb, (void *)guehdr + hdrlen,
+			    start, offset, nopartial);
 
 	return guehdr;
 }
@@ -136,7 +142,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 
 		if (flags & GUE_PFLAG_REMCSUM) {
 			guehdr = gue_remcsum(skb, guehdr, data + doffset,
-					     hdrlen, guehdr->proto_ctype);
+					     hdrlen, guehdr->proto_ctype,
+					     !!(fou->flags &
+						FOU_F_REMCSUM_NOPARTIAL));
 			if (!guehdr)
 				goto drop;
 
@@ -209,7 +217,7 @@ out_unlock:
 static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 				      struct guehdr *guehdr, void *data,
 				      size_t hdrlen, u8 ipproto,
-				      struct gro_remcsum *grc)
+				      struct gro_remcsum *grc, bool nopartial)
 {
 	__be16 *pd = data;
 	size_t start = ntohs(pd[0]);
@@ -230,7 +238,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 	}
 
 	skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
-				start, offset, grc, true);
+				start, offset, grc, nopartial);
 
 	skb->remcsum_offload = 1;
 
@@ -250,6 +258,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
 	void *data;
 	u16 doffset = 0;
 	int flush = 1;
+	struct fou *fou = container_of(uoff, struct fou, udp_offloads);
 	struct gro_remcsum grc;
 
 	skb_gro_remcsum_init(&grc);
@@ -294,7 +303,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
 		if (flags & GUE_PFLAG_REMCSUM) {
 			guehdr = gue_gro_remcsum(skb, off, guehdr,
 						 data + doffset, hdrlen,
-						 guehdr->proto_ctype, &grc);
+						 guehdr->proto_ctype, &grc,
+						 !!(fou->flags &
+						    FOU_F_REMCSUM_NOPARTIAL));
 			if (!guehdr)
 				goto out;
 
@@ -455,6 +466,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk = sock->sk;
 
+	fou->flags = cfg->flags;
 	fou->port = cfg->udp_config.local_udp_port;
 
 	/* Initial for fou type */
@@ -541,6 +553,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
 	[FOU_ATTR_AF] = { .type = NLA_U8, },
 	[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
 	[FOU_ATTR_TYPE] = { .type = NLA_U8, },
+	[FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
 };
 
 static int parse_nl_config(struct genl_info *info,
@@ -571,6 +584,9 @@ static int parse_nl_config(struct genl_info *info,
 	if (info->attrs[FOU_ATTR_TYPE])
 		cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
 
+	if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
+		cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4762fb980465463734f02c67c67f40beb8903f73 Mon Sep 17 00:00:00 2001
From: Jan Stancek
Date: Wed, 11 Feb 2015 14:06:23 +0100
Subject: ipv6: fix possible deadlock in ip6_fl_purge / ip6_fl_gc

Use spin_lock_bh in ip6_fl_purge() to prevent following potentially
deadlock scenario between ip6_fl_purge() and ip6_fl_gc() timer.

  =================================
  [ INFO: inconsistent lock state ]
  3.19.0 #1 Not tainted
  ---------------------------------
  inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
  swapper/5/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
   (ip6_fl_lock){+.?...}, at: [<ffffffff8171155d>] ip6_fl_gc+0x2d/0x180
  {SOFTIRQ-ON-W} state was registered at:
    [<ffffffff810ee9a0>] __lock_acquire+0x4a0/0x10b0
    [<ffffffff810efd54>] lock_acquire+0xc4/0x2b0
    [<ffffffff81751d2d>] _raw_spin_lock+0x3d/0x80
    [<ffffffff81711798>] ip6_flowlabel_net_exit+0x28/0x110
    [<ffffffff815f9759>] ops_exit_list.isra.1+0x39/0x60
    [<ffffffff815fa320>] cleanup_net+0x100/0x1e0
    [<ffffffff810ad80a>] process_one_work+0x20a/0x830
    [<ffffffff810adf4b>] worker_thread+0x11b/0x460
    [<ffffffff810b42f4>] kthread+0x104/0x120
    [<ffffffff81752bfc>] ret_from_fork+0x7c/0xb0
  irq event stamp: 84640
  hardirqs last  enabled at (84640): [<ffffffff81752080>] _raw_spin_unlock_irq+0x30/0x50
  hardirqs last disabled at (84639): [<ffffffff81751eff>] _raw_spin_lock_irq+0x1f/0x80
  softirqs last  enabled at (84628): [<ffffffff81091ad1>] _local_bh_enable+0x21/0x50
  softirqs last disabled at (84629): [<ffffffff81093b7d>] irq_exit+0x12d/0x150

  other info that might help us debug this:
   Possible unsafe locking scenario:

         CPU0
         ----
    lock(ip6_fl_lock);
    <Interrupt>
      lock(ip6_fl_lock);

   *** DEADLOCK ***

Signed-off-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 2f780cba6e12..f45d6db50a45 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -172,7 +172,7 @@ static void __net_exit ip6_fl_purge(struct net *net)
 {
 	int i;
 
-	spin_lock(&ip6_fl_lock);
+	spin_lock_bh(&ip6_fl_lock);
 	for (i = 0; i <= FL_HASH_MASK; i++) {
 		struct ip6_flowlabel *fl;
 		struct ip6_flowlabel __rcu **flp;
@@ -190,7 +190,7 @@ static void __net_exit ip6_fl_purge(struct net *net)
 			flp = &fl->next;
 		}
 	}
-	spin_unlock(&ip6_fl_lock);
+	spin_unlock_bh(&ip6_fl_lock);
 }
 
 static struct ip6_flowlabel *fl_intern(struct net *net,
-- 
cgit v1.2.3


From 9672723973f1e70189b8409eb2da189a980481c5 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven
Date: Thu, 12 Feb 2015 15:17:27 +0100
Subject: bridge: netfilter: Move sysctl-specific error code inside #ifdef

If CONFIG_SYSCTL=n:

    net/bridge/br_netfilter.c: In function ‘br_netfilter_init’:
    net/bridge/br_netfilter.c:996: warning: label ‘err1’ defined but not used

Move the label and the code after it inside the existing #ifdef to get
rid of the warning.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 65728e0dc4ff..0ee453fad3de 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -987,15 +987,12 @@ static int __init br_netfilter_init(void)
 	if (brnf_sysctl_header == NULL) {
 		printk(KERN_WARNING
 		       "br_netfilter: can't register to sysctl.\n");
-		ret = -ENOMEM;
-		goto err1;
+		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+		return -ENOMEM;
 	}
 #endif
 	printk(KERN_NOTICE "Bridge firewalling registered\n");
 	return 0;
-err1:
-	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-	return ret;
 }
 
 static void __exit br_netfilter_fini(void)
-- 
cgit v1.2.3


From ba34e6d9d346fe4e05d7e417b9edf5140772d34c Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Fri, 13 Feb 2015 04:47:12 -0800
Subject: tcp: make sure skb is not shared before using skb_get()

IPv6 can keep a copy of SYN message using skb_get() in
tcp_v6_conn_request() so that caller wont free the skb when calling
kfree_skb() later.

Therefore TCP fast open has to clone the skb it is queuing in
child->sk_receive_queue, as all skbs consumed from receive_queue are
freed using __kfree_skb() (ie assuming skb->users == 1)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Fixes: 5b7ed0892f2af ("tcp: move fastopen functions to tcp_fastopen.c")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_fastopen.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 53db2c309572..ea82fd492c1b 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -134,6 +134,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
 	struct tcp_sock *tp;
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 	struct sock *child;
+	u32 end_seq;
 
 	req->num_retrans = 0;
 	req->num_timeout = 0;
@@ -185,20 +186,35 @@ static bool tcp_fastopen_create_child(struct sock *sk,
 
 	/* Queue the data carried in the SYN packet. We need to first
 	 * bump skb's refcnt because the caller will attempt to free it.
+	 * Note that IPv6 might also have used skb_get() trick
+	 * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts)
+	 * So we need to eventually get a clone of the packet,
+	 * before inserting it in sk_receive_queue.
 	 *
 	 * XXX (TFO) - we honor a zero-payload TFO request for now,
 	 * (any reason not to?) but no need to queue the skb since
 	 * there is no data. How about SYN+FIN?
 	 */
-	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) {
-		skb = skb_get(skb);
-		skb_dst_drop(skb);
-		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
-		skb_set_owner_r(skb, child);
-		__skb_queue_tail(&child->sk_receive_queue, skb);
-		tp->syn_data_acked = 1;
+	end_seq = TCP_SKB_CB(skb)->end_seq;
+	if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
+		struct sk_buff *skb2;
+
+		if (unlikely(skb_shared(skb)))
+			skb2 = skb_clone(skb, GFP_ATOMIC);
+		else
+			skb2 = skb_get(skb);
+
+		if (likely(skb2)) {
+			skb_dst_drop(skb2);
+			__skb_pull(skb2, tcp_hdrlen(skb));
+			skb_set_owner_r(skb2, child);
+			__skb_queue_tail(&child->sk_receive_queue, skb2);
+			tp->syn_data_acked = 1;
+		} else {
+			end_seq = TCP_SKB_CB(skb)->seq + 1;
+		}
 	}
-	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
 	sk->sk_data_ready(sk);
 	bh_unlock_sock(child);
 	sock_put(child);
-- 
cgit v1.2.3


From 26ad0b83587fb6e9a20eef388b0587ada3da5d06 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar
Date: Thu, 12 Feb 2015 09:58:48 -0800
Subject: openvswitch: Fix key serialization.

Fix typo where mask is used rather than key.

Fixes: 74ed7ab9264("openvswitch: Add support for unique flow IDs.")
Reported-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 3829328c5a76..216f20b90aa5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1516,7 +1516,7 @@ int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
 /* Called with ovs_mutex or RCU read lock. */
 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
 {
-	return ovs_nla_put_key(&flow->mask->key, &flow->key,
+	return ovs_nla_put_key(&flow->key, &flow->key,
 				OVS_FLOW_ATTR_KEY, false, skb);
 }
 
-- 
cgit v1.2.3


From 3b4711757d7903ab6fa88a9e7ab8901b8227da60 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau
Date: Thu, 12 Feb 2015 16:14:08 -0800
Subject: ipv6: fix ipv6_cow_metrics for non DST_HOST case

ipv6_cow_metrics() currently assumes only DST_HOST routes require
dynamic metrics allocation from inetpeer.  The assumption breaks
when ndisc discovered router with RTAX_MTU and RTAX_HOPLIMIT metric.
Refer to ndisc_router_discovery() in ndisc.c and note that dst_metric_set()
is called after the route is created.

This patch creates the metrics array (by calling dst_cow_metrics_generic) in
ipv6_cow_metrics().

Test:
radvd.conf:
interface qemubr0
{
	AdvLinkMTU 1300;
	AdvCurHopLimit 30;

	prefix fd00:face:face:face::/64
	{
		AdvOnLink on;
		AdvAutonomous on;
		AdvRouterAddr off;
	};
};

Before:
[root@qemu1 ~]# ip -6 r show | egrep -v unreachable
fd00:face:face:face::/64 dev eth0  proto kernel  metric 256  expires 27sec
fe80::/64 dev eth0  proto kernel  metric 256
default via fe80::74df:d0ff:fe23:8ef2 dev eth0  proto ra  metric 1024  expires 27sec

After:
[root@qemu1 ~]# ip -6 r show | egrep -v unreachable
fd00:face:face:face::/64 dev eth0  proto kernel  metric 256  expires 27sec mtu 1300
fe80::/64 dev eth0  proto kernel  metric 256  mtu 1300
default via fe80::74df:d0ff:fe23:8ef2 dev eth0  proto ra  metric 1024  expires 27sec mtu 1300 hoplimit 30

Fixes: 8e2ec639173f325 (ipv6: don't use inetpeer to store metrics for routes.)
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 98565ce0ebcd..4688bd4d7f59 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -141,7 +141,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 	u32 *p = NULL;
 
 	if (!(rt->dst.flags & DST_HOST))
-		return NULL;
+		return dst_cow_metrics_generic(dst, old);
 
 	peer = rt6_get_peer_create(rt);
 	if (peer) {
-- 
cgit v1.2.3


From 4a26e453d99a06e3f1548569d7d405ce38878b78 Mon Sep 17 00:00:00 2001
From: Masanari Iida
Date: Sat, 14 Feb 2015 22:26:34 +0900
Subject: net/core: Fix warning while make xmldocs caused by dev.c

This patch fix following warning wile make xmldocs.

  Warning(.//net/core/dev.c:5345): No description found
  for parameter 'bonding_info'
  Warning(.//net/core/dev.c:5345): Excess function parameter
  'netdev_bonding_info' description in 'netdev_bonding_info_change'

This warning starts to appear after following patch was added
into Linus's tree during merger period.

commit 61bd3857ff2c7daf756d49b41e6277bbdaa8f789
net/core: Add event for a change in slave state

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 48c6ecb18f3c..8f9710c62e20 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5336,7 +5336,7 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
 /**
  * netdev_bonding_info_change - Dispatch event about slave change
  * @dev: device
- * @netdev_bonding_info: info to dispatch
+ * @bonding_info: info to dispatch
  *
  * Send NETDEV_BONDING_INFO to netdev notifiers with info.
  * The caller must hold the RTNL lock.
-- 
cgit v1.2.3


From ca9f1fd263e14765a4c213e46940876ad78fce28 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger
Date: Sat, 14 Feb 2015 13:47:54 -0500
Subject: net: spelling fixes

Spelling errors caught by codespell.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c  | 2 +-
 net/core/pktgen.c  | 2 +-
 net/ipv4/devinet.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index ec9baea10c16..f6bdc2b1ba01 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -531,7 +531,7 @@ do_pass:
 			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
 			break;
 
-		/* Unkown instruction. */
+		/* Unknown instruction. */
 		default:
 			goto err;
 		}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9fa25b0ea145..b4899f5b7388 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -97,7 +97,7 @@
  * New xmit() return, do_div and misc clean up by Stephen Hemminger
  * <shemminger@osdl.org> 040923
  *
- * Randy Dunlap fixed u64 printk compiler waring
+ * Randy Dunlap fixed u64 printk compiler warning
  *
  * Remove FCS from BW calculation.  Lennert Buytenhek <buytenh@wantstofly.org>
  * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f0b4a31d7bd6..3a8985c94581 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1186,7 +1186,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 no_in_dev:
 
 	/* Not loopback addresses on loopback should be preferred
-	   in this case. It is importnat that lo is the first interface
+	   in this case. It is important that lo is the first interface
 	   in dev_base list.
 	 */
 	for_each_netdev_rcu(net, dev) {
-- 
cgit v1.2.3


From 7afb8886a05be68e376655539a064ec672de8a8e Mon Sep 17 00:00:00 2001
From: WANG Cong
Date: Fri, 13 Feb 2015 13:56:53 -0800
Subject: rtnetlink: call ->dellink on failure when ->newlink exists

Ignacy reported that when eth0 is down and add a vlan device
on top of it like:

  ip link add link eth0 name eth0.1 up type vlan id 1

We will get a refcount leak:

  unregister_netdevice: waiting for eth0.1 to become free. Usage count = 2

The problem is when rtnl_configure_link() fails in rtnl_newlink(),
we simply call unregister_device(), but for stacked device like vlan,
we almost do nothing when we unregister the upper device, more work
is done when we unregister the lower device, so call its ->dellink().

Reported-by: Ignacy Gawedzki <ignacy.gawedzki@green-communications.fr>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5be499b6a2d2..ab293a3066b3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2162,7 +2162,14 @@ replay:
 		}
 		err = rtnl_configure_link(dev, ifm);
 		if (err < 0) {
-			unregister_netdevice(dev);
+			if (ops->newlink) {
+				LIST_HEAD(list_kill);
+
+				ops->dellink(dev, &list_kill);
+				unregister_netdevice_many(&list_kill);
+			} else {
+				unregister_netdevice(dev);
+			}
 			goto out;
 		}
 
-- 
cgit v1.2.3


From 19334920eaf7df3f69950b040ede6c7598425a5b Mon Sep 17 00:00:00 2001
From: Guenter Roeck
Date: Mon, 16 Feb 2015 21:23:51 -0800
Subject: net: dsa: Set valid phy interface type

If the phy interface mode is not found in devicetree, or if devicetree
is not configured, of_get_phy_mode returns -ENODEV. The current code
sets the phy interface mode to the return value from of_get_phy_mode
without checking if it is valid.

This invalid phy interface mode is passed as parameter to of_phy_connect
or to phy_connect_direct. This sets the phy interface mode to the invalid
value, which in turn causes problems for any code using phydev->interface.

Fixes: b31f65fb4383 ("net: dsa: slave: Fix autoneg for phys on switch MDIO bus")
Fixes: 0d8bcdd383b8 ("net: dsa: allow for more complex PHY setups")
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d104ae15836f..f23deadf42a0 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -521,10 +521,13 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
 	struct device_node *phy_dn, *port_dn;
 	bool phy_is_fixed = false;
 	u32 phy_flags = 0;
-	int ret;
+	int mode, ret;
 
 	port_dn = cd->port_dn[p->port];
-	p->phy_interface = of_get_phy_mode(port_dn);
+	mode = of_get_phy_mode(port_dn);
+	if (mode < 0)
+		mode = PHY_INTERFACE_MODE_NA;
+	p->phy_interface = mode;
 
 	phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
 	if (of_phy_is_fixed_link(port_dn)) {
@@ -559,6 +562,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
 		if (!p->phy)
 			return -ENODEV;
 
+		/* Use already configured phy mode */
+		p->phy_interface = p->phy->interface;
 		phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
 				   p->phy_interface);
 	} else {
-- 
cgit v1.2.3