From 2205369a314e12fcec4781cc73ac9c08fc2b47de Mon Sep 17 00:00:00 2001 From: David S. Miller Date: Tue, 31 Dec 2013 16:23:35 -0500 Subject: vlan: Fix header ops passthru when doing TX VLAN offload. When the vlan code detects that the real device can do TX VLAN offloads in hardware, it tries to arrange for the real device's header_ops to be invoked directly. But it does so illegally, by simply hooking the real device's header_ops up to the VLAN device. This doesn't work because we will end up invoking a set of header_ops routines which expect a device type which matches the real device, but will see a VLAN device instead. Fix this by providing a pass-thru set of header_ops which will arrange to pass the proper real device instead. To facilitate this add a dev_rebuild_header(). There are implementations which provide a ->cache and ->create but not a ->rebuild (f.e. PLIP). So we need a helper function just like dev_hard_header() to avoid crashes. Use this helper in the one existing place where the header_ops->rebuild was being invoked, the neighbour code. With lots of help from Florian Westphal. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d9a550bf3e8e..7514b9c37a39 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1912,6 +1912,15 @@ static inline int dev_parse_header(const struct sk_buff *skb, return dev->header_ops->parse(skb, haddr); } +static inline int dev_rebuild_header(struct sk_buff *skb) +{ + const struct net_device *dev = skb->dev; + + if (!dev->header_ops || !dev->header_ops->rebuild) + return 0; + return dev->header_ops->rebuild(skb); +} + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) -- cgit v1.2.3 From 7e0309631ecf0cd16edba72ff74747fa1b96ead3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 1 Jan 2014 23:04:25 +0100 Subject: net: llc: fix order of evaluation in llc_conn_ac_inc_vr_by_1 Function llc_conn_ac_inc_vr_by_1() evaluates via macro PDU_GET_NEXT_Vr() into ... llc_sk(sk)->vR = ++llc_sk(sk)->vR & 0xffffffffffffff7f ... but the order in which the side effects take place is undefined because there is no intervening sequence point. As llc_sk(sk)->vR is written in llc_sk(sk)->vR (assignment left-hand side) and written in ++llc_sk(sk)->vR & 0xffffffffffffff7f this might possibly yield undefined behavior. The final value of llc_sk(sk)->vR is ambiguous, because, depending on the order of expression evaluation, the increment may occur before, after, or interleaved with the assignment. In C, evaluating such an expression yields undefined behavior. Since we're doing the increment via PDU_GET_NEXT_Vr() macro and the only place it is being used is from llc_conn_ac_inc_vr_by_1(), in order to increment vR by 1 with a follow-up optimized modulo, rewrite the expression into ((vR + 1) & CONST) in order to fix this. Signed-off-by: Daniel Borkmann Cc: Arnaldo Carvalho de Melo Cc: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/llc_pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 31e2de7d57c5..c0f0a13ed818 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -142,7 +142,7 @@ #define LLC_S_PF_IS_1(pdu) ((pdu->ctrl_2 & LLC_S_PF_BIT_MASK) ? 1 : 0) #define PDU_SUPV_GET_Nr(pdu) ((pdu->ctrl_2 & 0xFE) >> 1) -#define PDU_GET_NEXT_Vr(sn) (++sn & ~LLC_2_SEQ_NBR_MODULO) +#define PDU_GET_NEXT_Vr(sn) (((sn) + 1) & ~LLC_2_SEQ_NBR_MODULO) /* FRMR information field macros */ -- cgit v1.2.3 From 619a60ee04be33238721a15c1f9704a2a515a33e Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 2 Jan 2014 14:39:44 -0500 Subject: sctp: Remove outqueue empty state The SCTP outqueue structure maintains a data chunks that are pending transmission, the list of chunks that are pending a retransmission and a length of data in flight. It also tries to keep the emtpy state so that it can performe shutdown sequence or notify user. The problem is that the empy state is inconsistently tracked. It is possible to completely drain the queue without sending anything when using PR-SCTP. In this case, the empty state will not be correctly state as report by Jamal Hadi Salim . This can cause an association to be perminantly stuck in the SHUTDOWN_PENDING state. Additionally, SCTP is incredibly inefficient when setting the empty state. Even though all the data is availaible in the outqueue structure, we ignore it and walk a list of trasnports. In the end, we can completely remove the extra empty state and figure out if the queue is empty by looking at 3 things: length of pending data, length of in-flight data, and exisiting of retransmit data. All of these are already in the strucutre. Reported-by: Jamal Hadi Salim Signed-off-by: Vlad Yasevich Acked-by: Neil Horman Tested-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 --- net/sctp/outqueue.c | 32 +++++++------------------------- 2 files changed, 7 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 67b5d0068273..0a248b323d87 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1046,9 +1046,6 @@ struct sctp_outq { /* Corked? */ char cork; - - /* Is this structure empty? */ - char empty; }; void sctp_outq_init(struct sctp_association *, struct sctp_outq *); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index f51ba985a36e..59268f6e2c36 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -208,8 +208,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); - - q->empty = 1; } /* Free the outqueue structure and any related pending chunks. @@ -332,7 +330,6 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); - q->empty = 0; break; } } else { @@ -654,7 +651,6 @@ redo: if (chunk->fast_retransmit == SCTP_NEED_FRTX) chunk->fast_retransmit = SCTP_DONT_FRTX; - q->empty = 0; q->asoc->stats.rtxchunks++; break; } @@ -1065,8 +1061,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_transport_reset_timers(transport); - q->empty = 0; - /* Only let one DATA chunk get bundled with a * COOKIE-ECHO chunk. */ @@ -1275,29 +1269,17 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) "advertised peer ack point:0x%x\n", __func__, asoc, ctsn, asoc->adv_peer_ack_point); - /* See if all chunks are acked. - * Make sure the empty queue handler will get run later. - */ - q->empty = (list_empty(&q->out_chunk_list) && - list_empty(&q->retransmit)); - if (!q->empty) - goto finish; - - list_for_each_entry(transport, transport_list, transports) { - q->empty = q->empty && list_empty(&transport->transmitted); - if (!q->empty) - goto finish; - } - - pr_debug("%s: sack queue is empty\n", __func__); -finish: - return q->empty; + return sctp_outq_is_empty(q); } -/* Is the outqueue empty? */ +/* Is the outqueue empty? + * The queue is empty when we have not pending data, no in-flight data + * and nothing pending retransmissions. + */ int sctp_outq_is_empty(const struct sctp_outq *q) { - return q->empty; + return q->out_qlen == 0 && q->outstanding_bytes == 0 && + list_empty(&q->retransmit); } /******************************************************************** -- cgit v1.2.3 From 7a7ffbabf99445704be01bff5d7e360da908cf8e Mon Sep 17 00:00:00 2001 From: Wei-Chun Chao Date: Thu, 26 Dec 2013 13:10:22 -0800 Subject: ipv4: fix tunneled VM traffic over hw VXLAN/GRE GSO NIC VM to VM GSO traffic is broken if it goes through VXLAN or GRE tunnel and the physical NIC on the host supports hardware VXLAN/GRE GSO offload (e.g. bnx2x and next-gen mlx4). Two issues - (VXLAN) VM traffic has SKB_GSO_DODGY and SKB_GSO_UDP_TUNNEL with SKB_GSO_TCP/UDP set depending on the inner protocol. GSO header integrity check fails in udp4_ufo_fragment if inner protocol is TCP. Also gso_segs is calculated incorrectly using skb->len that includes tunnel header. Fix: robust check should only be applied to the inner packet. (VXLAN & GRE) Once GSO header integrity check passes, NULL segs is returned and the original skb is sent to hardware. However the tunnel header is already pulled. Fix: tunnel header needs to be restored so that hardware can perform GSO properly on the original packet. Signed-off-by: Wei-Chun Chao Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 +++++++++++++ net/ipv4/gre_offload.c | 11 +++++++---- net/ipv4/udp.c | 6 +++++- net/ipv4/udp_offload.c | 37 +++++++++++++++++++------------------ 4 files changed, 44 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7514b9c37a39..5faaadb0c74f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3017,6 +3017,19 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } +static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, + int pulled_hlen, u16 mac_offset, + int mac_len) +{ + skb->protocol = protocol; + skb->encapsulation = 1; + skb_push(skb, pulled_hlen); + skb_reset_transport_header(skb); + skb->mac_header = mac_offset; + skb->network_header = skb->mac_header + mac_len; + skb->mac_len = mac_len; +} + static inline bool netif_is_macvlan(struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e5d436188464..2cd02f32f99f 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -28,6 +28,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t enc_features; int ghl = GRE_HEADER_SECTION; struct gre_base_hdr *greh; + u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; __be16 protocol = skb->protocol; int tnl_hlen; @@ -58,13 +59,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } else csum = false; + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + /* setup inner skb. */ skb->protocol = greh->protocol; skb->encapsulation = 0; - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - __skb_pull(skb, ghl); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); @@ -73,8 +74,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); goto out; + } skb = segs; tnl_hlen = skb_tnl_header_len(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f140048334ce..a7e4729e974b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2478,6 +2478,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; @@ -2497,8 +2498,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, + mac_len); goto out; + } outer_hlen = skb_tnl_header_len(skb); skb = segs; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 83206de2bc76..79c62bdcd3c5 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -41,6 +41,14 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; + int offset; + __wsum csum; + + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { + segs = skb_udp_tunnel_segment(skb, features); + goto out; + } mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -63,27 +71,20 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() */ - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) - segs = skb_udp_tunnel_segment(skb, features); - else { - int offset; - __wsum csum; - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - segs = skb_segment(skb, features); - } + segs = skb_segment(skb, features); out: return segs; } -- cgit v1.2.3