From 63d443efe8be2c1d02b30d7e4edeb9aa085352b3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 May 2016 23:17:59 +0200 Subject: batman-adv: fix skb deref after free batadv_send_skb_to_orig() calls dev_queue_xmit() so we can't use skb->len. Fixes: 953324776d6d ("batman-adv: network coding - buffer unicast packets before forward") Signed-off-by: Florian Westphal Reviewed-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/routing.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index ae850f2d11cb..e3857ed4057f 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -601,6 +601,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr = eth_hdr(skb); int res, hdr_len, ret = NET_RX_DROP; + unsigned int len; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -641,6 +642,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, if (hdr_len > 0) batadv_skb_set_priority(skb, hdr_len); + len = skb->len; res = batadv_send_skb_to_orig(skb, orig_node, recv_if); /* translate transmit result into receive result */ @@ -648,7 +650,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* skb was transmitted and consumed */ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, - skb->len + ETH_HLEN); + len + ETH_HLEN); ret = NET_RX_SUCCESS; } else if (res == NET_XMIT_POLICED) { -- cgit v1.2.3 From a45e932a3c58eac11a7458c6888910e23f615077 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 6 May 2016 11:43:38 +0200 Subject: batman-adv: Avoid nullptr derefence in batadv_v_neigh_is_sob batadv_neigh_ifinfo_get can return NULL when it cannot find (even when only temporarily) anymore the neigh_ifinfo in the list neigh->ifinfo_list. This has to be checked to avoid kernel Oopses when the ifinfo is dereferenced. This a situation which isn't expected but is already handled by functions like batadv_v_neigh_cmp. The same kind of warning is therefore used before the function returns without dereferencing the pointers. Fixes: 9786906022eb ("batman-adv: B.A.T.M.A.N. V - implement neighbor comparison API calls") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_v.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 3ff8bd1b7bdc..50bfcf87f569 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,9 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + if (WARN_ON(!ifinfo1 || !ifinfo2)) + return false; + threshold = ifinfo1->bat_v.throughput / 4; threshold = ifinfo1->bat_v.throughput - threshold; -- cgit v1.2.3 From 71f9d27daa2cbcca7159c27f0c0c381cc2dd1053 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 6 May 2016 11:43:39 +0200 Subject: batman-adv: Fix refcnt leak in batadv_v_neigh_* The functions batadv_neigh_ifinfo_get increase the reference counter of the batadv_neigh_ifinfo. These have to be reduced again when the reference is not used anymore to correctly free the objects. Fixes: 9786906022eb ("batman-adv: B.A.T.M.A.N. V - implement neighbor comparison API calls") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_v.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 50bfcf87f569..4f626a6b8ebd 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -256,14 +256,23 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing2) { struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2; + int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); + if (WARN_ON(!ifinfo1)) + goto err_ifinfo1; + ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + if (WARN_ON(!ifinfo2)) + goto err_ifinfo2; - if (WARN_ON(!ifinfo1 || !ifinfo2)) - return 0; + ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; - return ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; + batadv_neigh_ifinfo_put(ifinfo2); +err_ifinfo2: + batadv_neigh_ifinfo_put(ifinfo1); +err_ifinfo1: + return ret; } static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, @@ -273,17 +282,26 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, { struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2; u32 threshold; + bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + if (WARN_ON(!ifinfo1)) + goto err_ifinfo1; - if (WARN_ON(!ifinfo1 || !ifinfo2)) - return false; + ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + if (WARN_ON(!ifinfo2)) + goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; threshold = ifinfo1->bat_v.throughput - threshold; - return ifinfo2->bat_v.throughput > threshold; + ret = ifinfo2->bat_v.throughput > threshold; + + batadv_neigh_ifinfo_put(ifinfo2); +err_ifinfo2: + batadv_neigh_ifinfo_put(ifinfo1); +err_ifinfo1: + return ret; } static struct batadv_algo_ops batadv_batman_v __read_mostly = { -- cgit v1.2.3 From f7dcdf5fdbe8fec7670d8f65a5db595c98e0ecab Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 22 Feb 2016 22:56:33 +0100 Subject: batman-adv: Fix unexpected free of bcast_own on add_if error The function batadv_iv_ogm_orig_add_if allocates new buffers for bcast_own and bcast_own_sum. It is expected that these buffers are unchanged in case either bcast_own or bcast_own_sum couldn't be resized. But the error handling of this function frees the already resized buffer for bcast_own when the allocation of the new bcast_own_sum buffer failed. This will lead to an invalid memory access when some code will try to access bcast_own. Instead the resized new bcast_own buffer has to be kept. This will not lead to problems because the size of the buffer was only increased and therefore no user of the buffer will try to access bytes outside of the new buffer. Fixes: d0015fdd3d2c ("batman-adv: provide orig_node routing API") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 7f98a9d39883..1b5bbafc0fa3 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -157,10 +157,8 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, orig_node->bat_iv.bcast_own = data_ptr; data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC); - if (!data_ptr) { - kfree(orig_node->bat_iv.bcast_own); + if (!data_ptr) goto unlock; - } memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum, (max_if_num - 1) * sizeof(u8)); -- cgit v1.2.3 From 1653f61d656516aae7130db19561258a847d1e94 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 2 May 2016 01:14:40 +0800 Subject: batman-adv: make sure ELP/OGM orig MAC is updated on address change When the MAC address of the primary interface is changed, update the originator address in the ELP and OGM skb buffers as well in order to reflect the change. Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure") Reported-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_v.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 4f626a6b8ebd..31bc57e2a944 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -73,16 +73,34 @@ static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface) batadv_v_elp_iface_disable(hard_iface); } -static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface) -{ -} - static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface) { batadv_v_elp_primary_iface_set(hard_iface); batadv_v_ogm_primary_iface_set(hard_iface); } +/** + * batadv_v_iface_update_mac - react to hard-interface MAC address change + * @hard_iface: the modified interface + * + * If the modified interface is the primary one, update the originator + * address in the ELP and OGM messages to reflect the new MAC address. + */ +static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (primary_if != hard_iface) + goto out; + + batadv_v_primary_iface_set(hard_iface); +out: + if (primary_if) + batadv_hardif_put(primary_if); +} + static void batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) { -- cgit v1.2.3 From d285f52cc0f23564fd61976d43fd5b991b4828f6 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 16 Feb 2016 10:47:07 +0100 Subject: batman-adv: Fix integer overflow in batadv_iv_ogm_calc_tq The undefined behavior sanatizer detected an signed integer overflow in a setup with near perfect link quality UBSAN: Undefined behaviour in net/batman-adv/bat_iv_ogm.c:1246:25 signed integer overflow: 8713350 * 255 cannot be represented in type 'int' The problems happens because the calculation of mixed unsigned and signed integers resulted in an integer multiplication. batadv_ogm_packet::tq (u8 255) * tq_own (u8 255) * tq_asym_penalty (int 134; max 255) * tq_iface_penalty (int 255; max 255) The tq_iface_penalty, tq_asym_penalty and inv_asym_penalty can just be changed to unsigned int because they are not expected to become negative. Fixes: c039876892e3 ("batman-adv: add WiFi penalty") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1b5bbafc0fa3..ce2f203048d3 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1180,9 +1180,10 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int tq_asym_penalty, inv_asym_penalty, if_num; + int if_num; + unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; - int tq_iface_penalty; + unsigned int tq_iface_penalty; bool ret = false; /* find corresponding one hop neighbor */ -- cgit v1.2.3 From e123705e58bf171be8c6eb0902ebfb5d6ed255ad Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Thu, 7 Jan 2016 08:11:12 +0100 Subject: batman-adv: Avoid duplicate neigh_node additions Two parallel calls to batadv_neigh_node_new() might race for creating and adding the same neig_node. Fix this by including the check for any already existing, identical neigh_node within the spin-lock. This fixes splats like the following: [ 739.535069] ------------[ cut here ]------------ [ 739.535079] WARNING: CPU: 0 PID: 0 at /usr/src/batman-adv/git/batman-adv/net/batman-adv/bat_iv_ogm.c:1004 batadv_iv_ogm_process_per_outif+0xe3f/0xe60 [batman_adv]() [ 739.535092] too many matching neigh_nodes [ 739.535094] Modules linked in: dm_mod tun ip6table_filter ip6table_mangle ip6table_nat nf_nat_ipv6 ip6_tables xt_nat iptable_nat nf_nat_ipv4 nf_nat xt_TCPMSS xt_mark iptable_mangle xt_tcpudp xt_conntrack iptable_filter ip_tables x_tables ip_gre ip_tunnel gre bridge stp llc thermal_sys kvm_intel kvm crct10dif_pclmul crc32_pclmul sha256_ssse3 sha256_generic hmac drbg ansi_cprng aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd evdev pcspkr ip6_gre ip6_tunnel tunnel6 batman_adv(O) libcrc32c nf_conntrack_ipv6 nf_defrag_ipv6 nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack autofs4 ext4 crc16 mbcache jbd2 xen_netfront xen_blkfront crc32c_intel [ 739.535177] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W O 4.2.0-0.bpo.1-amd64 #1 Debian 4.2.6-3~bpo8+2 [ 739.535186] 0000000000000000 ffffffffa013b050 ffffffff81554521 ffff88007d003c18 [ 739.535201] ffffffff8106fa01 0000000000000000 ffff8800047a087a ffff880079c3a000 [ 739.735602] ffff88007b82bf40 ffff88007bc2d1c0 ffffffff8106fa7a ffffffffa013aa8e [ 739.735624] Call Trace: [ 739.735639] [] ? dump_stack+0x40/0x50 [ 739.735677] [] ? warn_slowpath_common+0x81/0xb0 [ 739.735692] [] ? warn_slowpath_fmt+0x4a/0x50 [ 739.735715] [] ? batadv_iv_ogm_process_per_outif+0xe3f/0xe60 [batman_adv] [ 739.735740] [] ? batadv_iv_ogm_receive+0x363/0x380 [batman_adv] [ 739.735762] [] ? batadv_iv_ogm_receive+0x363/0x380 [batman_adv] [ 739.735783] [] ? __raw_callee_save___pv_queued_spin_unlock+0x11/0x20 [ 739.735804] [] ? batadv_batman_skb_recv+0xc9/0x110 [batman_adv] [ 739.735825] [] ? __netif_receive_skb_core+0x841/0x9a0 [ 739.735838] [] ? __raw_callee_save___pv_queued_spin_unlock+0x11/0x20 [ 739.735853] [] ? process_backlog+0xa1/0x140 [ 739.735864] [] ? net_rx_action+0x20a/0x320 [ 739.735878] [] ? __do_softirq+0x107/0x270 [ 739.735891] [] ? irq_exit+0x92/0xa0 [ 739.735905] [] ? xen_evtchn_do_upcall+0x31/0x40 [ 739.735924] [] ? xen_do_hypervisor_callback+0x1e/0x40 [ 739.735939] [] ? xen_hypercall_sched_op+0xa/0x20 [ 739.735965] [] ? xen_hypercall_sched_op+0xa/0x20 [ 739.735979] [] ? xen_safe_halt+0xc/0x20 [ 739.735991] [] ? default_idle+0x1c/0xa0 [ 739.736004] [] ? cpu_startup_entry+0x2eb/0x350 [ 739.736019] [] ? start_kernel+0x480/0x48b [ 739.736032] [] ? xen_start_kernel+0x507/0x511 [ 739.736048] ---[ end trace c106bb901244bc8c ]--- Fixes: f987ed6ebd99 ("batman-adv: protect neighbor list with rcu locks") Reported-by: Martin Weinelt Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 1ff4ee473966..7f51bc2c06eb 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -619,6 +619,8 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node; struct batadv_hardif_neigh_node *hardif_neigh = NULL; + spin_lock_bh(&orig_node->neigh_list_lock); + neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); if (neigh_node) goto out; @@ -650,15 +652,15 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, kref_init(&neigh_node->refcount); kref_get(&neigh_node->refcount); - spin_lock_bh(&orig_node->neigh_list_lock); hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); - spin_unlock_bh(&orig_node->neigh_list_lock); batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", neigh_addr, orig_node->orig, hard_iface->net_dev->name); out: + spin_unlock_bh(&orig_node->neigh_list_lock); + if (hardif_neigh) batadv_hardif_neigh_put(hardif_neigh); return neigh_node; -- cgit v1.2.3 From ebe24cea95ab969f76f2922032f6c390fdc816f2 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Sat, 7 May 2016 19:54:17 +0800 Subject: batman-adv: initialize ELP orig address on secondary interfaces This fix prevents nodes to wrongly create a 00:00:00:00:00:00 originator which can potentially interfere with the rest of the neighbor statistics. Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure") Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_v.c | 10 ++++++++++ net/batman-adv/bat_v_elp.c | 31 ++++++++++++++++++++++--------- net/batman-adv/bat_v_elp.h | 2 ++ 3 files changed, 34 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 31bc57e2a944..0a12e5cdd65d 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -40,6 +40,16 @@ static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) { + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_primary_if_get_selected(bat_priv); + + if (primary_if) { + batadv_v_elp_iface_activate(primary_if, hard_iface); + batadv_hardif_put(primary_if); + } + /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can * set the interface as ACTIVE right away, without any risk of race * condition diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 3844e7efd0b0..df42eb1365a0 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -376,6 +376,27 @@ void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface) hard_iface->bat_v.elp_skb = NULL; } +/** + * batadv_v_elp_iface_activate - update the ELP buffer belonging to the given + * hard-interface + * @primary_iface: the new primary interface + * @hard_iface: interface holding the to-be-updated buffer + */ +void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface, + struct batadv_hard_iface *hard_iface) +{ + struct batadv_elp_packet *elp_packet; + struct sk_buff *skb; + + if (!hard_iface->bat_v.elp_skb) + return; + + skb = hard_iface->bat_v.elp_skb; + elp_packet = (struct batadv_elp_packet *)skb->data; + ether_addr_copy(elp_packet->orig, + primary_iface->net_dev->dev_addr); +} + /** * batadv_v_elp_primary_iface_set - change internal data to reflect the new * primary interface @@ -384,8 +405,6 @@ void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface) void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface) { struct batadv_hard_iface *hard_iface; - struct batadv_elp_packet *elp_packet; - struct sk_buff *skb; /* update orig field of every elp iface belonging to this mesh */ rcu_read_lock(); @@ -393,13 +412,7 @@ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface) if (primary_iface->soft_iface != hard_iface->soft_iface) continue; - if (!hard_iface->bat_v.elp_skb) - continue; - - skb = hard_iface->bat_v.elp_skb; - elp_packet = (struct batadv_elp_packet *)skb->data; - ether_addr_copy(elp_packet->orig, - primary_iface->net_dev->dev_addr); + batadv_v_elp_iface_activate(primary_iface, hard_iface); } rcu_read_unlock(); } diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index e95f1bca0785..cc130b2d05e5 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -25,6 +25,8 @@ struct work_struct; int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface); void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface); +void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface, + struct batadv_hard_iface *hard_iface); void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface); int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming); -- cgit v1.2.3 From f1971a2e0393a86464caa77aa52168b731960dfa Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 17 May 2016 14:05:49 -0700 Subject: kcm: fix a signedness in kcm_splice_read() skb_splice_bits() returns int, kcm_splice_read() returns ssize_t, both are signed. We may need another patch to make them all ssize_t, but that deserves a separated patch. Fixes: 91687355b927 ("kcm: Splice support") Reported-by: David Binderman Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 40662d73204f..0b68ba730a06 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1483,7 +1483,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, long timeo; struct kcm_rx_msg *rxm; int err = 0; - size_t copied; + ssize_t copied; struct sk_buff *skb; /* Only support splice for SOCKSEQPACKET */ -- cgit v1.2.3 From 38036629cded6b96a9f9689758a88d067c4d4d44 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 May 2016 17:44:08 -0700 Subject: rds: tcp: block BH in TCP callbacks TCP stack can now run from process context. Use read_lock_bh(&sk->sk_callback_lock) variant to restore previous assumption. Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog") Fixes: d41a69f1d390 ("tcp: make tcp_sendmsg() aware of socket backlog") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/rds/tcp_connect.c | 4 ++-- net/rds/tcp_listen.c | 4 ++-- net/rds/tcp_recv.c | 4 ++-- net/rds/tcp_send.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 49a3fcfed360..fb82e0a0bf89 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { state_change = sk->sk_state_change; @@ -69,7 +69,7 @@ void rds_tcp_state_change(struct sock *sk) break; } out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); state_change(sk); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index be263cdf268b..3fa367945105 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -166,7 +166,7 @@ void rds_tcp_listen_data_ready(struct sock *sk) rdsdebug("listen data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); ready = sk->sk_user_data; if (!ready) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -183,7 +183,7 @@ void rds_tcp_listen_data_ready(struct sock *sk) rds_tcp_accept_work(sk); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk); } diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index d75d8b56a9e3..c3196f9d070a 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -301,7 +301,7 @@ void rds_tcp_data_ready(struct sock *sk) rdsdebug("data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -315,7 +315,7 @@ void rds_tcp_data_ready(struct sock *sk) if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 2894e6095e3b..22d0f2020a79 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -180,7 +180,7 @@ void rds_tcp_write_space(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (!conn) { write_space = sk->sk_write_space; @@ -200,7 +200,7 @@ void rds_tcp_write_space(struct sock *sk) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); /* * write_space is only called when data leaves tcp's send queue if -- cgit v1.2.3 From b91083a45e4c41b8c952cf02ceb0ce16f0b1b9b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 May 2016 17:44:09 -0700 Subject: tipc: block BH in TCP callbacks TCP stack can now run from process context. Use read_lock_bh(&sk->sk_callback_lock) variant to restore previous assumption. Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog") Fixes: d41a69f1d390 ("tcp: make tcp_sendmsg() aware of socket backlog") Signed-off-by: Eric Dumazet Cc: Jon Maloy Cc: Ying Xue Signed-off-by: David S. Miller --- net/tipc/server.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/tipc/server.c b/net/tipc/server.c index 7a0af2dc0406..272d20a795d5 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -138,28 +138,28 @@ static void sock_data_ready(struct sock *sk) { struct tipc_conn *con; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); if (con && test_bit(CF_CONNECTED, &con->flags)) { conn_get(con); if (!queue_work(con->server->rcv_wq, &con->rwork)) conn_put(con); } - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void sock_write_space(struct sock *sk) { struct tipc_conn *con; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); if (con && test_bit(CF_CONNECTED, &con->flags)) { conn_get(con); if (!queue_work(con->server->send_wq, &con->swork)) conn_put(con); } - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) -- cgit v1.2.3 From 5c7cdf339af560f980b12eb6b0b5aa5f68ac6658 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:09 -0700 Subject: gso: Remove arbitrary checks for unsupported GSO In several gso_segment functions there are checks of gso_type against a seemingly arbitrary list of SKB_GSO_* flags. This seems like an attempt to identify unsupported GSO types, but since the stack is the one that set these GSO types in the first place this seems unnecessary to do. If a combination isn't valid in the first place that stack should not allow setting it. This is a code simplication especially for add new GSO types. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 18 ------------------ net/ipv4/gre_offload.c | 14 -------------- net/ipv4/tcp_offload.c | 19 ------------------- net/ipv4/udp_offload.c | 10 ---------- net/ipv6/ip6_offload.c | 18 ------------------ net/ipv6/udp_offload.c | 13 ------------- net/mpls/mpls_gso.c | 11 +---------- 7 files changed, 1 insertion(+), 102 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2e6e65fc4d20..7f08d4525981 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1205,24 +1205,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int ihl; int id; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_TCPV6 | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_PARTIAL | - 0))) - goto out; - skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e88190a8699a..ecd1e09dbbf1 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -26,20 +26,6 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, int gre_offset, outer_hlen; bool need_csum, ufo; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_PARTIAL))) - goto out; - if (!skb->encapsulation) goto out; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 02737b607aa7..5c5964962d0c 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -83,25 +83,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - 0) || - !(type & (SKB_GSO_TCPV4 | - SKB_GSO_TCPV6)))) - goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6b7459c92bb2..81f253b6ff36 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -209,16 +209,6 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_IPIP | - SKB_GSO_GRE | SKB_GSO_GRE_CSUM) || - !(type & (SKB_GSO_UDP)))) - goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f5eb184e1093..9ad743b2c624 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -69,24 +69,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, bool encap, udpfrag; int nhoff; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_PARTIAL | - 0))) - goto out; - skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 5429f6bcf047..ac858c480f2f 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -36,19 +36,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT) || - !(type & (SKB_GSO_UDP)))) - goto out; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index bbcf60465e5c..2055e57ed1c3 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -26,15 +26,6 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, netdev_features_t mpls_features; __be16 mpls_protocol; - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_FIXEDID | - SKB_GSO_TCP_ECN))) - goto out; - /* Setup inner SKB. */ mpls_protocol = skb->protocol; skb->protocol = skb->inner_protocol; @@ -57,7 +48,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, * skb_mac_gso_segment(), an indirect caller of this function. */ __skb_pull(skb, skb->data - skb_mac_header(skb)); -out: + return segs; } -- cgit v1.2.3 From 7e13318daa4a67bff2f800923a993ef3818b3c53 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:10 -0700 Subject: net: define gso types for IPx over IPv4 and IPv6 This patch defines two new GSO definitions SKB_GSO_IPXIP4 and SKB_GSO_IPXIP6 along with corresponding NETIF_F_GSO_IPXIP4 and NETIF_F_GSO_IPXIP6. These are used to described IP in IP tunnel and what the outer protocol is. The inner protocol can be deduced from other GSO types (e.g. SKB_GSO_TCPV4 and SKB_GSO_TCPV6). The GSO types of SKB_GSO_IPIP and SKB_GSO_SIT are removed (these are both instances of SKB_GSO_IPXIP4). SKB_GSO_IPXIP6 will be used when support for GSO with IP encapsulation over IPv6 is added. Signed-off-by: Tom Herbert Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 5 ++--- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++--- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +-- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 3 +-- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 3 +-- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 3 +-- drivers/net/ethernet/intel/igb/igb_main.c | 3 +-- drivers/net/ethernet/intel/igbvf/netdev.c | 3 +-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +-- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 +-- include/linux/netdev_features.h | 12 ++++++------ include/linux/netdevice.h | 4 ++-- include/linux/skbuff.h | 4 ++-- net/core/ethtool.c | 4 ++-- net/ipv4/af_inet.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv6/ip6_offload.c | 4 ++-- net/ipv6/sit.c | 4 ++-- net/netfilter/ipvs/ip_vs_xmit.c | 17 +++++++---------- 19 files changed, 37 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index d465bd721146..0a5b770cefaa 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13259,12 +13259,11 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX; if (!chip_is_e1x) { dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_IPIP | NETIF_F_GSO_SIT; + NETIF_F_GSO_IPXIP4; dev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | - NETIF_F_GSO_IPIP | - NETIF_F_GSO_SIT | + NETIF_F_GSO_IPXIP4 | NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5a0dca3e6ef6..72a2efff8e49 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6311,7 +6311,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | - NETIF_F_GSO_IPIP | NETIF_F_GSO_SIT | + NETIF_F_GSO_IPXIP4 | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_PARTIAL | NETIF_F_RXHASH | NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO; @@ -6321,8 +6321,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM | - NETIF_F_GSO_IPIP | NETIF_F_GSO_SIT | - NETIF_F_GSO_PARTIAL; + NETIF_F_GSO_IPXIP4 | NETIF_F_GSO_PARTIAL; dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM; dev->vlan_features = dev->hw_features | NETIF_F_HIGHDMA; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1cd0ebf7520a..242a1ff344e6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9083,8 +9083,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | - NETIF_F_GSO_IPIP | - NETIF_F_GSO_SIT | + NETIF_F_GSO_IPXIP4 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_PARTIAL | diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 99a524db5560..0a8122c00ae2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2284,8 +2284,7 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | + SKB_GSO_IPXIP4 | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) { if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index fd7dae46c5d8..2bbbbd0f9f15 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1559,8 +1559,7 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_SIT | + SKB_GSO_IPXIP4 | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) { if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 642bb45ed906..02d0a1ca6960 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -2230,8 +2230,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter) NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | - NETIF_F_GSO_IPIP | - NETIF_F_GSO_SIT | + NETIF_F_GSO_IPXIP4 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_PARTIAL | diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 21727692bef6..b1a5cdb77088 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2418,8 +2418,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #define IGB_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ - NETIF_F_GSO_IPIP | \ - NETIF_F_GSO_SIT | \ + NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 322a2d7828a5..79b907f1a520 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2763,8 +2763,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #define IGBVF_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ - NETIF_F_GSO_IPIP | \ - NETIF_F_GSO_SIT | \ + NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 9f3677c7e96f..69452c379cbc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9482,8 +9482,7 @@ skip_sriov: #define IXGBE_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ - NETIF_F_GSO_IPIP | \ - NETIF_F_GSO_SIT | \ + NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 5e348b125090..d86e51116384 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4062,8 +4062,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #define IXGBEVF_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ - NETIF_F_GSO_IPIP | \ - NETIF_F_GSO_SIT | \ + NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index bc8736266749..aa7b2400f98c 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -44,8 +44,8 @@ enum { NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ NETIF_F_GSO_GRE_CSUM_BIT, /* ... GRE with csum with TSO */ - NETIF_F_GSO_IPIP_BIT, /* ... IPIP tunnel with TSO */ - NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ + NETIF_F_GSO_IPXIP4_BIT, /* ... IP4 or IP6 over IP4 with TSO */ + NETIF_F_GSO_IPXIP6_BIT, /* ... IP4 or IP6 over IP6 with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ NETIF_F_GSO_PARTIAL_BIT, /* ... Only segment inner-most L4 @@ -121,8 +121,8 @@ enum { #define NETIF_F_RXALL __NETIF_F(RXALL) #define NETIF_F_GSO_GRE __NETIF_F(GSO_GRE) #define NETIF_F_GSO_GRE_CSUM __NETIF_F(GSO_GRE_CSUM) -#define NETIF_F_GSO_IPIP __NETIF_F(GSO_IPIP) -#define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) +#define NETIF_F_GSO_IPXIP4 __NETIF_F(GSO_IPXIP4) +#define NETIF_F_GSO_IPXIP6 __NETIF_F(GSO_IPXIP6) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) @@ -200,8 +200,8 @@ enum { #define NETIF_F_GSO_ENCAP_ALL (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ - NETIF_F_GSO_IPIP | \ - NETIF_F_GSO_SIT | \ + NETIF_F_GSO_IPXIP4 | \ + NETIF_F_GSO_IPXIP6 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c148edfe4965..f45929ce8157 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4006,8 +4006,8 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE_CSUM != (NETIF_F_GSO_GRE_CSUM >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_IPIP != (NETIF_F_GSO_IPIP >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_IPXIP4 != (NETIF_F_GSO_IPXIP4 >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_IPXIP6 != (NETIF_F_GSO_IPXIP6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c413c588a24f..65968a97517f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -471,9 +471,9 @@ enum { SKB_GSO_GRE_CSUM = 1 << 8, - SKB_GSO_IPIP = 1 << 9, + SKB_GSO_IPXIP4 = 1 << 9, - SKB_GSO_SIT = 1 << 10, + SKB_GSO_IPXIP6 = 1 << 10, SKB_GSO_UDP_TUNNEL = 1 << 11, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index bdb4013581b1..f4034817d255 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -84,8 +84,8 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", - [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", - [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", + [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation", + [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7f08d4525981..25040b183a60 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1483,7 +1483,7 @@ out_unlock: static int ipip_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_IPIP; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; return inet_gro_complete(skb, nhoff); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 92827483ee3d..978370132f29 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -219,7 +219,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 9ad743b2c624..787e55f4796c 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -86,7 +86,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); if (skb->encapsulation && - skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6)) udpfrag = proto == IPPROTO_UDP && encap; else udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; @@ -294,7 +294,7 @@ out_unlock: static int sit_gro_complete(struct sk_buff *skb, int nhoff) { skb->encapsulation = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_SIT; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; return ipv6_gro_complete(skb, nhoff); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a13d8c114ccb..0a5a255277e5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -913,7 +913,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - if (iptunnel_handle_offloads(skb, SKB_GSO_SIT)) { + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) { ip_rt_put(rt); goto tx_error; } @@ -1000,7 +1000,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - if (iptunnel_handle_offloads(skb, SKB_GSO_IPIP)) + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) goto tx_error; skb_set_inner_ipproto(skb, IPPROTO_IPIP); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 6d19d2eeaa60..01d3d894de46 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -932,17 +932,14 @@ error: static inline int __tun_gso_type_mask(int encaps_af, int orig_af) { - if (encaps_af == AF_INET) { - if (orig_af == AF_INET) - return SKB_GSO_IPIP; - - return SKB_GSO_SIT; + switch (encaps_af) { + case AF_INET: + return SKB_GSO_IPXIP4; + case AF_INET6: + return SKB_GSO_IPXIP6; + default: + return 0; } - - /* GSO: we need to provide proper SKB_GSO_ value for IPv6: - * SKB_GSO_SIT/IPV6 - */ - return 0; } /* -- cgit v1.2.3 From 4c64242a90a4932260d9ad32b12c745c466e2987 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:11 -0700 Subject: ipv6: Fix nexthdr for reinjection In ip6_input_finish the nexthdr protocol is retrieved from the next header offset that is returned in the cb of the skb. This method does not work for UDP encapsulation that may not even have a concept of a nexthdr field (e.g. FOU). This patch checks for a final protocol (INET6_PROTO_FINAL) when a protocol handler returns > 0. If the protocol is not final then resubmission is performed on nhoff value. If the protocol is final then the nexthdr is taken to be the return value. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f185cbcda114..d35dff23f609 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -236,6 +236,7 @@ resubmit: nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; +resubmit_final: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { @@ -263,10 +264,21 @@ resubmit: goto discard; ret = ipprot->handler(skb); - if (ret > 0) - goto resubmit; - else if (ret == 0) + if (ret > 0) { + if (ipprot->flags & INET6_PROTO_FINAL) { + /* Not an extension header, most likely UDP + * encapsulation. Use return value as nexthdr + * protocol not nhoff (which presumably is + * not set by handler). + */ + nexthdr = ret; + goto resubmit_final; + } else { + goto resubmit; + } + } else if (ret == 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); + } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { -- cgit v1.2.3 From 1da44f9c15e6389d45e034d5fd0b937e2928b412 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:12 -0700 Subject: ipv6: Change "final" protocol processing for encapsulation When performing foo-over-UDP, UDP packets are processed by the encapsulation handler which returns another protocol to process. This may result in processing two (or more) protocols in the loop that are marked as INET6_PROTO_FINAL. The actions taken for hitting a final protocol, in particular the skb_postpull_rcsum can only be performed once. This patch set adds a check of a final protocol has been seen. The rules are: - If the final protocol has not been seen any protocol is processed (final and non-final). In the case of a final protocol, the final actions are taken (like the skb_postpull_rcsum) - If a final protocol has been seen (e.g. an encapsulating UDP header) then no further non-final protocols are allowed (e.g. extension headers). For more final protocols the final actions are not taken (e.g. skb_postpull_rcsum). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index d35dff23f609..94611e450ec9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -223,6 +223,7 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk unsigned int nhoff; int nexthdr; bool raw; + bool have_final = false; /* * Parse extension headers @@ -242,9 +243,21 @@ resubmit_final: if (ipprot) { int ret; - if (ipprot->flags & INET6_PROTO_FINAL) { + if (have_final) { + if (!(ipprot->flags & INET6_PROTO_FINAL)) { + /* Once we've seen a final protocol don't + * allow encapsulation on any non-final + * ones. This allows foo in UDP encapsulation + * to work. + */ + goto discard; + } + } else if (ipprot->flags & INET6_PROTO_FINAL) { const struct ipv6hdr *hdr; + /* Only do this once for first final protocol */ + have_final = true; + /* Free reference early: we don't need it any more, and it may hold ip_conntrack module loaded indefinitely. */ -- cgit v1.2.3 From 55c2bc1432241e7be39b11339bd00e85f878ebd6 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:13 -0700 Subject: net: Cleanup encap items in ip_tunnels.h Consolidate all the ip_tunnel_encap definitions in one spot in the header file. Also, move ip_encap_hlen and ip_tunnel_encap from ip_tunnel.c to ip_tunnels.h so they call be called without a dependency on ip_tunnel module. Similarly, move iptun_encaps to ip_tunnel_core.c. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 76 ++++++++++++++++++++++++++++++++++++----------- net/ipv4/ip_tunnel.c | 45 ---------------------------- net/ipv4/ip_tunnel_core.c | 4 +++ 3 files changed, 62 insertions(+), 63 deletions(-) (limited to 'net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index d916b4315903..dbf444428437 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -171,22 +171,6 @@ struct ip_tunnel_net { struct ip_tunnel __rcu *collect_md_tun; }; -struct ip_tunnel_encap_ops { - size_t (*encap_hlen)(struct ip_tunnel_encap *e); - int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4); -}; - -#define MAX_IPTUN_ENCAP_OPS 8 - -extern const struct ip_tunnel_encap_ops __rcu * - iptun_encaps[MAX_IPTUN_ENCAP_OPS]; - -int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, - unsigned int num); -int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, - unsigned int num); - static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, @@ -251,8 +235,6 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); -int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, - u8 *protocol, struct flowi4 *fl4); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); @@ -271,9 +253,67 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); void ip_tunnel_setup(struct net_device *dev, int net_id); + +struct ip_tunnel_encap_ops { + size_t (*encap_hlen)(struct ip_tunnel_encap *e); + int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi4 *fl4); +}; + +#define MAX_IPTUN_ENCAP_OPS 8 + +extern const struct ip_tunnel_encap_ops __rcu * + iptun_encaps[MAX_IPTUN_ENCAP_OPS]; + +int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, + unsigned int num); +int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, + unsigned int num); + int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); +static inline int ip_encap_hlen(struct ip_tunnel_encap *e) +{ + const struct ip_tunnel_encap_ops *ops; + int hlen = -EINVAL; + + if (e->type == TUNNEL_ENCAP_NONE) + return 0; + + if (e->type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(iptun_encaps[e->type]); + if (likely(ops && ops->encap_hlen)) + hlen = ops->encap_hlen(e); + rcu_read_unlock(); + + return hlen; +} + +static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, + u8 *protocol, struct flowi4 *fl4) +{ + const struct ip_tunnel_encap_ops *ops; + int ret = -EINVAL; + + if (t->encap.type == TUNNEL_ENCAP_NONE) + return 0; + + if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(iptun_encaps[t->encap.type]); + if (likely(ops && ops->build_header)) + ret = ops->build_header(skb, &t->encap, protocol, fl4); + rcu_read_unlock(); + + return ret; +} + /* Extract dsfield from inner protocol */ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, const struct sk_buff *skb) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a69ed94bda1b..d8f5e0a269f5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -443,29 +443,6 @@ drop: } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); -static int ip_encap_hlen(struct ip_tunnel_encap *e) -{ - const struct ip_tunnel_encap_ops *ops; - int hlen = -EINVAL; - - if (e->type == TUNNEL_ENCAP_NONE) - return 0; - - if (e->type >= MAX_IPTUN_ENCAP_OPS) - return -EINVAL; - - rcu_read_lock(); - ops = rcu_dereference(iptun_encaps[e->type]); - if (likely(ops && ops->encap_hlen)) - hlen = ops->encap_hlen(e); - rcu_read_unlock(); - - return hlen; -} - -const struct ip_tunnel_encap_ops __rcu * - iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; - int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, unsigned int num) { @@ -519,28 +496,6 @@ int ip_tunnel_encap_setup(struct ip_tunnel *t, } EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); -int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, - u8 *protocol, struct flowi4 *fl4) -{ - const struct ip_tunnel_encap_ops *ops; - int ret = -EINVAL; - - if (t->encap.type == TUNNEL_ENCAP_NONE) - return 0; - - if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) - return -EINVAL; - - rcu_read_lock(); - ops = rcu_dereference(iptun_encaps[t->encap.type]); - if (likely(ops && ops->build_header)) - ret = ops->build_header(skb, &t->encap, protocol, fl4); - rcu_read_unlock(); - - return ret; -} -EXPORT_SYMBOL(ip_tunnel_encap); - static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rtable *rt, __be16 df, const struct iphdr *inner_iph) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 9118b0e640ba..cc66a2043e6d 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -47,6 +47,10 @@ #include #include +const struct ip_tunnel_encap_ops __rcu * + iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; +EXPORT_SYMBOL(iptun_encaps); + void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) -- cgit v1.2.3 From 440924bbc0e11fb429ccc25f6d9597d5a7a02296 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:14 -0700 Subject: fou: Call setup_udp_tunnel_sock Use helper function to set up UDP tunnel related information for a fou socket. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/fou.c | 50 ++++++++++++++++---------------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index eeec7d60e5fd..6cbc72535426 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -448,31 +448,13 @@ static void fou_release(struct fou *fou) kfree_rcu(fou, rcu); } -static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) -{ - udp_sk(sk)->encap_rcv = fou_udp_recv; - udp_sk(sk)->gro_receive = fou_gro_receive; - udp_sk(sk)->gro_complete = fou_gro_complete; - fou_from_sock(sk)->protocol = cfg->protocol; - - return 0; -} - -static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) -{ - udp_sk(sk)->encap_rcv = gue_udp_recv; - udp_sk(sk)->gro_receive = gue_gro_receive; - udp_sk(sk)->gro_complete = gue_gro_complete; - - return 0; -} - static int fou_create(struct net *net, struct fou_cfg *cfg, struct socket **sockp) { struct socket *sock = NULL; struct fou *fou = NULL; struct sock *sk; + struct udp_tunnel_sock_cfg tunnel_cfg; int err; /* Open UDP socket */ @@ -491,33 +473,33 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, fou->flags = cfg->flags; fou->port = cfg->udp_config.local_udp_port; + fou->type = cfg->type; + fou->sock = sock; + + memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); + tunnel_cfg.encap_type = 1; + tunnel_cfg.sk_user_data = fou; + tunnel_cfg.encap_destroy = NULL; /* Initial for fou type */ switch (cfg->type) { case FOU_ENCAP_DIRECT: - err = fou_encap_init(sk, fou, cfg); - if (err) - goto error; + tunnel_cfg.encap_rcv = fou_udp_recv; + tunnel_cfg.gro_receive = fou_gro_receive; + tunnel_cfg.gro_complete = fou_gro_complete; + fou->protocol = cfg->protocol; break; case FOU_ENCAP_GUE: - err = gue_encap_init(sk, fou, cfg); - if (err) - goto error; + tunnel_cfg.encap_rcv = gue_udp_recv; + tunnel_cfg.gro_receive = gue_gro_receive; + tunnel_cfg.gro_complete = gue_gro_complete; break; default: err = -EINVAL; goto error; } - fou->type = cfg->type; - - udp_sk(sk)->encap_type = 1; - udp_encap_enable(); - - sk->sk_user_data = fou; - fou->sock = sock; - - inet_inc_convert_csum(sk); + setup_udp_tunnel_sock(net, sock, &tunnel_cfg); sk->sk_allocation = GFP_ATOMIC; -- cgit v1.2.3 From dc969b81ebb37d6ec3d7659763bf017ee03f3ac1 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:15 -0700 Subject: fou: Split out {fou,gue}_build_header Create __fou_build_header and __gue_build_header. These implement the protocol generic parts of building the fou and gue header. fou_build_header and gue_build_header implement the IPv4 specific functions and call the __*_build_header functions. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/fou.h | 8 ++++---- net/ipv4/fou.c | 47 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/fou.h b/include/net/fou.h index 19b8a0c62a98..7d2fda2a3a9c 100644 --- a/include/net/fou.h +++ b/include/net/fou.h @@ -11,9 +11,9 @@ size_t fou_encap_hlen(struct ip_tunnel_encap *e); static size_t gue_encap_hlen(struct ip_tunnel_encap *e); -int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4); -int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4); +int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type); +int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type); #endif diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 6cbc72535426..f4f2ddd8f216 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -780,6 +780,22 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, *protocol = IPPROTO_UDP; } +int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type) +{ + int err; + + err = iptunnel_handle_offloads(skb, type); + if (err) + return err; + + *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), + skb, 0, 0, false); + + return 0; +} +EXPORT_SYMBOL(__fou_build_header); + int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4) { @@ -788,26 +804,21 @@ int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, __be16 sport; int err; - err = iptunnel_handle_offloads(skb, type); + err = __fou_build_header(skb, e, protocol, &sport, type); if (err) return err; - sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), - skb, 0, 0, false); fou_build_udp(skb, e, fl4, protocol, sport); return 0; } EXPORT_SYMBOL(fou_build_header); -int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, - u8 *protocol, struct flowi4 *fl4) +int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, __be16 *sport, int type) { - int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : - SKB_GSO_UDP_TUNNEL; struct guehdr *guehdr; size_t hdrlen, optlen = 0; - __be16 sport; void *data; bool need_priv = false; int err; @@ -826,8 +837,8 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, return err; /* Get source port (based on flow hash) before skb_push */ - sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), - skb, 0, 0, false); + *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), + skb, 0, 0, false); hdrlen = sizeof(struct guehdr) + optlen; @@ -872,6 +883,22 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, } + return 0; +} +EXPORT_SYMBOL(__gue_build_header); + +int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi4 *fl4) +{ + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : + SKB_GSO_UDP_TUNNEL; + __be16 sport; + int err; + + err = __gue_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + fou_build_udp(skb, e, fl4, protocol, sport); return 0; -- cgit v1.2.3 From 5f914b681253966612e052df364c3b8e4a3d5f63 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:16 -0700 Subject: fou: Support IPv6 in fou This patch adds receive path support for IPv6 with fou. - Add address family to fou structure for open sockets. This supports AF_INET and AF_INET6. Lookups for fou ports are performed on both the port number and family. - In fou and gue receive adjust tot_len in IPv4 header or payload_len based on address family. - Allow AF_INET6 in FOU_ATTR_AF netlink attribute. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/fou.c | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index f4f2ddd8f216..5f9207c039e7 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -21,6 +21,7 @@ struct fou { u8 protocol; u8 flags; __be16 port; + u8 family; u16 type; struct list_head list; struct rcu_head rcu; @@ -47,14 +48,17 @@ static inline struct fou *fou_from_sock(struct sock *sk) return sk->sk_user_data; } -static int fou_recv_pull(struct sk_buff *skb, size_t len) +static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len) { - struct iphdr *iph = ip_hdr(skb); - /* Remove 'len' bytes from the packet (UDP header and * FOU header if present). */ - iph->tot_len = htons(ntohs(iph->tot_len) - len); + if (fou->family == AF_INET) + ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); + else + ipv6_hdr(skb)->payload_len = + htons(ntohs(ipv6_hdr(skb)->payload_len) - len); + __skb_pull(skb, len); skb_postpull_rcsum(skb, udp_hdr(skb), len); skb_reset_transport_header(skb); @@ -68,7 +72,7 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) if (!fou) return 1; - if (fou_recv_pull(skb, sizeof(struct udphdr))) + if (fou_recv_pull(skb, fou, sizeof(struct udphdr))) goto drop; return -fou->protocol; @@ -141,7 +145,11 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) hdrlen = sizeof(struct guehdr) + optlen; - ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); + if (fou->family == AF_INET) + ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); + else + ipv6_hdr(skb)->payload_len = + htons(ntohs(ipv6_hdr(skb)->payload_len) - len); /* Pull csum through the guehdr now . This can be used if * there is a remote checksum offload. @@ -426,7 +434,8 @@ static int fou_add_to_port_list(struct net *net, struct fou *fou) mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (fou->port == fout->port) { + if (fou->port == fout->port && + fou->family == fout->family) { mutex_unlock(&fn->fou_lock); return -EALREADY; } @@ -471,8 +480,9 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk = sock->sk; - fou->flags = cfg->flags; fou->port = cfg->udp_config.local_udp_port; + fou->family = cfg->udp_config.family; + fou->flags = cfg->flags; fou->type = cfg->type; fou->sock = sock; @@ -524,12 +534,13 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) { struct fou_net *fn = net_generic(net, fou_net_id); __be16 port = cfg->udp_config.local_udp_port; + u8 family = cfg->udp_config.family; int err = -EINVAL; struct fou *fou; mutex_lock(&fn->fou_lock); list_for_each_entry(fou, &fn->fou_list, list) { - if (fou->port == port) { + if (fou->port == port && fou->family == family) { fou_release(fou); err = 0; break; @@ -567,8 +578,15 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_AF]) { u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]); - if (family != AF_INET) - return -EINVAL; + switch (family) { + case AF_INET: + break; + case AF_INET6: + cfg->udp_config.ipv6_v6only = 1; + break; + default: + return -EAFNOSUPPORT; + } cfg->udp_config.family = family; } @@ -659,6 +677,7 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) struct fou_cfg cfg; struct fou *fout; __be16 port; + u8 family; int ret; ret = parse_nl_config(info, &cfg); @@ -668,6 +687,10 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) if (port == 0) return -EINVAL; + family = cfg.udp_config.family; + if (family != AF_INET && family != AF_INET6) + return -EINVAL; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -675,7 +698,7 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) ret = -ESRCH; mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (port == fout->port) { + if (port == fout->port && family == fout->family) { ret = fou_dump_info(fout, info->snd_portid, info->snd_seq, 0, msg, info->genlhdr->cmd); -- cgit v1.2.3 From 058214a4d1dfefed9f01a277fadd3590acb5f990 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:17 -0700 Subject: ip6_tun: Add infrastructure for doing encapsulation Add encap_hlen and ip_tunnel_encap structure to ip6_tnl. Add functions for getting encap hlen, setting up encap on a tunnel, performing encapsulation operation. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 58 +++++++++++++++++++++++++++++ net/ipv4/ip_tunnel_core.c | 5 +++ net/ipv6/ip6_tunnel.c | 94 ++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 144 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index fb9e0153f4f2..d325c81332e3 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -52,10 +52,68 @@ struct ip6_tnl { __u32 o_seqno; /* The last output seqno */ int hlen; /* tun_hlen + encap_hlen */ int tun_hlen; /* Precalculated header length */ + int encap_hlen; /* Encap header length (FOU,GUE) */ + struct ip_tunnel_encap encap; int mlink; +}; +struct ip6_tnl_encap_ops { + size_t (*encap_hlen)(struct ip_tunnel_encap *e); + int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6); }; +extern const struct ip6_tnl_encap_ops __rcu * + ip6tun_encaps[MAX_IPTUN_ENCAP_OPS]; + +int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num); +int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num); +int ip6_tnl_encap_setup(struct ip6_tnl *t, + struct ip_tunnel_encap *ipencap); + +static inline int ip6_encap_hlen(struct ip_tunnel_encap *e) +{ + const struct ip6_tnl_encap_ops *ops; + int hlen = -EINVAL; + + if (e->type == TUNNEL_ENCAP_NONE) + return 0; + + if (e->type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(ip6tun_encaps[e->type]); + if (likely(ops && ops->encap_hlen)) + hlen = ops->encap_hlen(e); + rcu_read_unlock(); + + return hlen; +} + +static inline int ip6_tnl_encap(struct sk_buff *skb, struct ip6_tnl *t, + u8 *protocol, struct flowi6 *fl6) +{ + const struct ip6_tnl_encap_ops *ops; + int ret = -EINVAL; + + if (t->encap.type == TUNNEL_ENCAP_NONE) + return 0; + + if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(ip6tun_encaps[t->encap.type]); + if (likely(ops && ops->build_header)) + ret = ops->build_header(skb, &t->encap, protocol, fl6); + rcu_read_unlock(); + + return ret; +} + /* Tunnel encapsulation limit destination sub-option */ struct ipv6_tlv_tnl_enc_lim { diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index cc66a2043e6d..afd6b5968caf 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,10 @@ const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; EXPORT_SYMBOL(iptun_encaps); +const struct ip6_tnl_encap_ops __rcu * + ip6tun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; +EXPORT_SYMBOL(ip6tun_encaps); + void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e79330f214bd..64ddbeaca371 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1010,7 +1010,8 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; - unsigned int max_headroom = sizeof(struct ipv6hdr); + unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; + unsigned int max_headroom = psh_hlen; int err = -1; /* NBMA tunnel */ @@ -1063,7 +1064,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - sizeof(*ipv6h); + mtu = dst_mtu(dst) - psh_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1124,11 +1125,18 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, skb->encapsulation = 1; } + /* Calculate max headroom for all the headers and adjust + * needed_headroom if necessary. + */ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) - + dst->header_len; + + dst->header_len + t->hlen; if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; + err = ip6_tnl_encap(skb, t, &proto, fl6); + if (err) + return err; + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1280,6 +1288,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; + int t_hlen; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); @@ -1303,6 +1312,10 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) else dev->flags &= ~IFF_POINTOPOINT; + t->tun_hlen = 0; + t->hlen = t->encap_hlen + t->tun_hlen; + t_hlen = t->hlen + sizeof(struct ipv6hdr); + if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); @@ -1316,9 +1329,9 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (rt->dst.dev) { dev->hard_header_len = rt->dst.dev->hard_header_len + - sizeof(struct ipv6hdr); + t_hlen; - dev->mtu = rt->dst.dev->mtu - sizeof(struct ipv6hdr); + dev->mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; @@ -1564,6 +1577,59 @@ int ip6_tnl_get_iflink(const struct net_device *dev) } EXPORT_SYMBOL(ip6_tnl_get_iflink); +int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num) +{ + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + return !cmpxchg((const struct ip6_tnl_encap_ops **) + &ip6tun_encaps[num], + NULL, ops) ? 0 : -1; +} +EXPORT_SYMBOL(ip6_tnl_encap_add_ops); + +int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, + unsigned int num) +{ + int ret; + + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + ret = (cmpxchg((const struct ip6_tnl_encap_ops **) + &ip6tun_encaps[num], + ops, NULL) == ops) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(ip6_tnl_encap_del_ops); + +int ip6_tnl_encap_setup(struct ip6_tnl *t, + struct ip_tunnel_encap *ipencap) +{ + int hlen; + + memset(&t->encap, 0, sizeof(t->encap)); + + hlen = ip6_encap_hlen(ipencap); + if (hlen < 0) + return hlen; + + t->encap.type = ipencap->type; + t->encap.sport = ipencap->sport; + t->encap.dport = ipencap->dport; + t->encap.flags = ipencap->flags; + + t->encap_hlen = hlen; + t->hlen = t->encap_hlen + t->tun_hlen; + + return 0; +} +EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup); + static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, @@ -1585,19 +1651,13 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { - struct ip6_tnl *t; - dev->netdev_ops = &ip6_tnl_netdev_ops; dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); - dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr); - t = netdev_priv(dev); - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); /* This perm addr will be used as interface identifier by IPv6 */ dev->addr_assign_type = NET_ADDR_RANDOM; @@ -1615,6 +1675,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int ret; + int t_hlen; t->dev = dev; t->net = dev_net(dev); @@ -1630,8 +1691,15 @@ ip6_tnl_dev_init_gen(struct net_device *dev) if (ret) goto destroy_dst; - t->hlen = 0; t->tun_hlen = 0; + t->hlen = t->encap_hlen + t->tun_hlen; + t_hlen = t->hlen + sizeof(struct ipv6hdr); + + dev->type = ARPHRD_TUNNEL6; + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; return 0; -- cgit v1.2.3 From aa3463d65e7b9f5ae322db4a12214c2cb041bc8e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:18 -0700 Subject: fou: Add encap ops for IPv6 tunnels This patch add a new fou6 module that provides encapsulation operations for IPv6. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/fou.h | 2 +- net/ipv6/Makefile | 1 + net/ipv6/fou6.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 net/ipv6/fou6.c (limited to 'net') diff --git a/include/net/fou.h b/include/net/fou.h index 7d2fda2a3a9c..f5cc6910a27e 100644 --- a/include/net/fou.h +++ b/include/net/fou.h @@ -9,7 +9,7 @@ #include size_t fou_encap_hlen(struct ip_tunnel_encap *e); -static size_t gue_encap_hlen(struct ip_tunnel_encap *e); +size_t gue_encap_hlen(struct ip_tunnel_encap *e); int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, __be16 *sport, int type); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 5e9d6bf4aaca..7ec3129c9ace 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o +obj-$(CONFIG_NET_FOU) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c new file mode 100644 index 000000000000..c972d0b52579 --- /dev/null +++ b/net/ipv6/fou6.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, + struct flowi6 *fl6, u8 *protocol, __be16 sport) +{ + struct udphdr *uh; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + uh = udp_hdr(skb); + + uh->dest = e->dport; + uh->source = sport; + uh->len = htons(skb->len); + udp6_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM6), skb, + &fl6->saddr, &fl6->daddr, skb->len); + + *protocol = IPPROTO_UDP; +} + +int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) +{ + __be16 sport; + int err; + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ? + SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + + err = __fou_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + + fou6_build_udp(skb, e, fl6, protocol, sport); + + return 0; +} +EXPORT_SYMBOL(fou6_build_header); + +int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, + u8 *protocol, struct flowi6 *fl6) +{ + __be16 sport; + int err; + int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ? + SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + + err = __gue_build_header(skb, e, protocol, &sport, type); + if (err) + return err; + + fou6_build_udp(skb, e, fl6, protocol, sport); + + return 0; +} +EXPORT_SYMBOL(gue6_build_header); + +#ifdef CONFIG_NET_FOU_IP_TUNNELS + +static const struct ip6_tnl_encap_ops fou_ip6tun_ops = { + .encap_hlen = fou_encap_hlen, + .build_header = fou6_build_header, +}; + +static const struct ip6_tnl_encap_ops gue_ip6tun_ops = { + .encap_hlen = gue_encap_hlen, + .build_header = gue6_build_header, +}; + +static int ip6_tnl_encap_add_fou_ops(void) +{ + int ret; + + ret = ip6_tnl_encap_add_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); + if (ret < 0) { + pr_err("can't add fou6 ops\n"); + return ret; + } + + ret = ip6_tnl_encap_add_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE); + if (ret < 0) { + pr_err("can't add gue6 ops\n"); + ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); + return ret; + } + + return 0; +} + +static void ip6_tnl_encap_del_fou_ops(void) +{ + ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU); + ip6_tnl_encap_del_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE); +} + +#else + +static int ip6_tnl_encap_add_fou_ops(void) +{ + return 0; +} + +static void ip6_tnl_encap_del_fou_ops(void) +{ +} + +#endif + +static int __init fou6_init(void) +{ + int ret; + + ret = ip6_tnl_encap_add_fou_ops(); + + return ret; +} + +static void __exit fou6_fini(void) +{ + ip6_tnl_encap_del_fou_ops(); +} + +module_init(fou6_init); +module_exit(fou6_fini); +MODULE_AUTHOR("Tom Herbert "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 1faf3d9f7c06c803397665ada1448f374e8f48e0 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:19 -0700 Subject: ip6_gre: Add support for fou/gue encapsulation Add netlink and setup for encapsulation Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4541fa54035e..6fb1b89d0178 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -729,7 +729,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) t->tun_hlen = gre_calc_hlen(t->parms.o_flags); - t->hlen = t->tun_hlen; + t->hlen = t->encap_hlen + t->tun_hlen; t_hlen = t->hlen + sizeof(struct ipv6hdr); @@ -1022,9 +1022,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); - - tunnel->hlen = tunnel->tun_hlen; - + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); dev->hard_header_len = LL_MAX_HEADER + t_hlen; @@ -1290,15 +1288,57 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->priv_flags &= ~IFF_TX_SKB_SHARING; } +static bool ip6gre_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *ipencap) +{ + bool ret = false; + + memset(ipencap, 0, sizeof(*ipencap)); + + if (!data) + return ret; + + if (data[IFLA_GRE_ENCAP_TYPE]) { + ret = true; + ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); + } + + if (data[IFLA_GRE_ENCAP_FLAGS]) { + ret = true; + ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); + } + + if (data[IFLA_GRE_ENCAP_SPORT]) { + ret = true; + ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); + } + + if (data[IFLA_GRE_ENCAP_DPORT]) { + ret = true; + ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); + } + + return ret; +} + static int ip6gre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct ip6_tnl *nt; struct net *net = dev_net(dev); struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip_tunnel_encap ipencap; int err; nt = netdev_priv(dev); + + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + ip6gre_netlink_parms(data, &nt->parms); if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) @@ -1345,10 +1385,18 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct net *net = nt->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct __ip6_tnl_parm p; + struct ip_tunnel_encap ipencap; if (dev == ign->fb_tunnel_dev) return -EINVAL; + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + ip6gre_netlink_parms(data, &p); t = ip6gre_tunnel_locate(net, &p, 0); @@ -1400,6 +1448,14 @@ static size_t ip6gre_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_GRE_FLAGS */ nla_total_size(4) + + /* IFLA_GRE_ENCAP_TYPE */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_FLAGS */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_SPORT */ + nla_total_size(2) + + /* IFLA_GRE_ENCAP_DPORT */ + nla_total_size(2) + 0; } @@ -1422,6 +1478,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, + t->encap.type) || + nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, + t->encap.sport) || + nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, + t->encap.dport) || + nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, + t->encap.flags)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -1440,6 +1507,10 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, [IFLA_GRE_FLAGS] = { .type = NLA_U32 }, + [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, + [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { -- cgit v1.2.3 From b3a27b519b22d4bf03788f6826190d4c5a130b3c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:20 -0700 Subject: ip6_tunnel: Add support for fou/gue encapsulation Add netlink and setup for encapsulation Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 64ddbeaca371..74b35e4aacd9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1797,13 +1797,55 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); } +static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *ipencap) +{ + bool ret = false; + + memset(ipencap, 0, sizeof(*ipencap)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_ENCAP_TYPE]) { + ret = true; + ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); + } + + if (data[IFLA_IPTUN_ENCAP_FLAGS]) { + ret = true; + ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); + } + + if (data[IFLA_IPTUN_ENCAP_SPORT]) { + ret = true; + ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); + } + + if (data[IFLA_IPTUN_ENCAP_DPORT]) { + ret = true; + ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); + } + + return ret; +} + static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); struct ip6_tnl *nt, *t; + struct ip_tunnel_encap ipencap; nt = netdev_priv(dev); + + if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return err; + } + ip6_tnl_netlink_parms(data, &nt->parms); t = ip6_tnl_locate(net, &nt->parms, 0); @@ -1820,10 +1862,17 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], struct __ip6_tnl_parm p; struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip_tunnel_encap ipencap; if (dev == ip6n->fb_tnl_dev) return -EINVAL; + if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(t, &ipencap); + + if (err < 0) + return err; + } ip6_tnl_netlink_parms(data, &p); t = ip6_tnl_locate(net, &p, 0); @@ -1864,6 +1913,14 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_IPTUN_PROTO */ nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_TYPE */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_FLAGS */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_SPORT */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_DPORT */ + nla_total_size(2) + 0; } @@ -1881,6 +1938,17 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, + tunnel->encap.type) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, + tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, + tunnel->encap.dport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, + tunnel->encap.flags)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -1904,6 +1972,10 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, + [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { -- cgit v1.2.3 From 51c052d4f5871554377278762065450b4e64f6d1 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:21 -0700 Subject: ipv6: Set features for IPv6 tunnels Need to set dev features, use same values that are used in GREv6. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 74b35e4aacd9..cabf492a56dc 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1640,6 +1640,11 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; +#define IPXIPX_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_CSUM) /** * ip6_tnl_dev_setup - setup virtual tunnel device @@ -1659,6 +1664,10 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->addr_len = sizeof(struct in6_addr); dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); + + dev->features |= IPXIPX_FEATURES; + dev->hw_features |= IPXIPX_FEATURES; + /* This perm addr will be used as interface identifier by IPv6 */ dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->perm_addr); -- cgit v1.2.3 From 815d22e55b0eba3bfb8f0ba532ce9ae364fee556 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:22 -0700 Subject: ip6ip6: Support for GSO/GRO Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ip6_offload.c | 24 +++++++++++++++++++++--- net/ipv6/ip6_tunnel.c | 5 +++++ 2 files changed, 26 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 787e55f4796c..332d6a03f182 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -253,9 +253,11 @@ out: return pp; } -static struct sk_buff **sit_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) { + /* Common GRO receive for SIT and IP6IP6 */ + if (NAPI_GRO_CB(skb)->encap_mark) { NAPI_GRO_CB(skb)->flush = 1; return NULL; @@ -298,6 +300,13 @@ static int sit_gro_complete(struct sk_buff *skb, int nhoff) return ipv6_gro_complete(skb, nhoff); } +static int ip6ip6_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; + return ipv6_gro_complete(skb, nhoff); +} + static struct packet_offload ipv6_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .callbacks = { @@ -310,11 +319,19 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = sit_gro_receive, + .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = sit_gro_complete, }, }; +static const struct net_offload ip6ip6_offload = { + .callbacks = { + .gso_segment = ipv6_gso_segment, + .gro_receive = sit_ip6ip6_gro_receive, + .gro_complete = ip6ip6_gro_complete, + }, +}; + static int __init ipv6_offload_init(void) { @@ -326,6 +343,7 @@ static int __init ipv6_offload_init(void) dev_add_offload(&ipv6_packet_offload); inet_add_offload(&sit_offload, IPPROTO_IPV6); + inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6); return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cabf492a56dc..d26d2269abec 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1242,6 +1242,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + + skb_set_inner_ipproto(skb, IPPROTO_IPV6); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, IPPROTO_IPV6); if (err != 0) { -- cgit v1.2.3 From b8921ca83eed2496108ee308e9a41c5084089680 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:23 -0700 Subject: ip4ip6: Support for GSO/GRO Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/inet_common.h | 5 +++++ net/ipv4/af_inet.c | 12 +++++++----- net/ipv6/ip6_offload.c | 33 ++++++++++++++++++++++++++++++++- net/ipv6/ip6_tunnel.c | 5 +++++ 4 files changed, 49 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 109e3ee9108c..5d683428fced 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -39,6 +39,11 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); +struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb); +int inet_gro_complete(struct sk_buff *skb, int nhoff); +struct sk_buff *inet_gso_segment(struct sk_buff *skb, + netdev_features_t features); + static inline void inet_ctl_sock_destroy(struct sock *sk) { if (sk) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 25040b183a60..377424ea17a4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1192,8 +1192,8 @@ int inet_sk_rebuild_header(struct sock *sk) } EXPORT_SYMBOL(inet_sk_rebuild_header); -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *inet_gso_segment(struct sk_buff *skb, + netdev_features_t features) { bool udpfrag = false, fixedid = false, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); @@ -1280,9 +1280,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, out: return segs; } +EXPORT_SYMBOL(inet_gso_segment); -static struct sk_buff **inet_gro_receive(struct sk_buff **head, - struct sk_buff *skb) +struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff **pp = NULL; @@ -1398,6 +1398,7 @@ out: return pp; } +EXPORT_SYMBOL(inet_gro_receive); static struct sk_buff **ipip_gro_receive(struct sk_buff **head, struct sk_buff *skb) @@ -1449,7 +1450,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) return -EINVAL; } -static int inet_gro_complete(struct sk_buff *skb, int nhoff) +int inet_gro_complete(struct sk_buff *skb, int nhoff) { __be16 newlen = htons(skb->len - nhoff); struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); @@ -1479,6 +1480,7 @@ out_unlock: return err; } +EXPORT_SYMBOL(inet_gro_complete); static int ipip_gro_complete(struct sk_buff *skb, int nhoff) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 332d6a03f182..22e90e56b5a9 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -16,6 +16,7 @@ #include #include +#include #include "ip6_offload.h" @@ -268,6 +269,21 @@ static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head, return ipv6_gro_receive(head, skb); } +static struct sk_buff **ip4ip6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + /* Common GRO receive for SIT and IP6IP6 */ + + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return inet_gro_receive(head, skb); +} + static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; @@ -307,6 +323,13 @@ static int ip6ip6_gro_complete(struct sk_buff *skb, int nhoff) return ipv6_gro_complete(skb, nhoff); } +static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; + return inet_gro_complete(skb, nhoff); +} + static struct packet_offload ipv6_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .callbacks = { @@ -324,6 +347,14 @@ static const struct net_offload sit_offload = { }, }; +static const struct net_offload ip4ip6_offload = { + .callbacks = { + .gso_segment = inet_gso_segment, + .gro_receive = ip4ip6_gro_receive, + .gro_complete = ip4ip6_gro_complete, + }, +}; + static const struct net_offload ip6ip6_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, @@ -331,7 +362,6 @@ static const struct net_offload ip6ip6_offload = { .gro_complete = ip6ip6_gro_complete, }, }; - static int __init ipv6_offload_init(void) { @@ -344,6 +374,7 @@ static int __init ipv6_offload_init(void) inet_add_offload(&sit_offload, IPPROTO_IPV6); inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6); + inet6_add_offload(&ip4ip6_offload, IPPROTO_IPIP); return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index d26d2269abec..823dad1e631b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1188,6 +1188,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) + return -1; + + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, IPPROTO_IPIP); if (err != 0) { -- cgit v1.2.3 From 3ee93eaf2bbfbe0083f71a18a265d48adbd5bb27 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:24 -0700 Subject: ipv6: Don't reset inner headers in ip6_tnl_xmit Since iptunnel_handle_offloads() is called in all paths we can probably drop the block in ip6_tnl_xmit that was checking for skb->encapsulation and resetting the inner headers. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 823dad1e631b..7b0481e3738f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1120,11 +1120,6 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - /* Calculate max headroom for all the headers and adjust * needed_headroom if necessary. */ -- cgit v1.2.3 From 37e14f4fe2991f6089a9c8a3830e3ab634ec7190 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 18 May 2016 10:06:23 -0700 Subject: RDS: TCP: rds_tcp_accept_worker() must exit gracefully when terminating rds-tcp There are two instances where we want to terminate RDS-TCP: when exiting the netns or during module unload. In either case, the termination sequence is to stop the listen socket, mark the rtn->rds_tcp_listen_sock as null, and flush any accept workqs. Thus any workqs that get flushed at this point will encounter a null rds_tcp_listen_sock, and must exit gracefully to allow the RDS-TCP termination to complete successfully. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp_listen.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 3fa367945105..094a8cae0ef1 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -80,6 +80,9 @@ int rds_tcp_accept_one(struct socket *sock) int conn_state; struct sock *nsk; + if (!sock) /* module unload or netns delete in progress */ + return -ENETUNREACH; + ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, &new_sock); -- cgit v1.2.3 From c948bb5c2cc4d63f9d76fc02baf5a8331e3cd27f Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 18 May 2016 10:06:24 -0700 Subject: RDS: TCP: Avoid rds connection churn from rogue SYNs When a rogue SYN is received after the connection arbitration algorithm has converged, the incoming SYN should not needlessly quiesce the transmit path, and it should not result in needless TCP connection resets due to re-execution of the connection arbitration logic. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp_listen.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 094a8cae0ef1..4bf4befe5066 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -132,11 +132,13 @@ int rds_tcp_accept_one(struct socket *sock) * so we must quiesce any send threads before resetting * c_transport_data. */ - wait_event(conn->c_waitq, - !test_bit(RDS_IN_XMIT, &conn->c_flags)); - if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { + if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) || + !conn->c_outgoing) { goto rst_nsk; - } else if (rs_tcp->t_sock) { + } else { + atomic_set(&conn->c_state, RDS_CONN_CONNECTING); + wait_event(conn->c_waitq, + !test_bit(RDS_IN_XMIT, &conn->c_flags)); rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); conn->c_outgoing = 0; } -- cgit v1.2.3 From 6a5536810180bb2c739df890a36cfa29da1914eb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 18 May 2016 10:44:47 -0700 Subject: ip6_gre: Do not allow segmentation offloads GRE_CSUM is enabled with FOU/GUE This patch addresses the same issue we had for IPv4 where enabling GRE with an inner checksum cannot be supported with FOU/GUE due to the fact that they will jump past the GRE header at it is treated like a tunnel header. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6fb1b89d0178..af503f518278 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1355,11 +1355,15 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, dev->hw_features |= GRE6_FEATURES; if (!(nt->parms.o_flags & TUNNEL_SEQ)) { - /* TCP segmentation offload is not supported when we - * generate output sequences. + /* TCP offload with GRE SEQ is not supported, nor + * can we support 2 levels of outer headers requiring + * an update. */ - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; + if (!(nt->parms.o_flags & TUNNEL_CSUM) || + (nt->encap.type == TUNNEL_ENCAP_NONE)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } /* Can use a lockless transmit, unless we generate * output sequences -- cgit v1.2.3 From e5aed006be918af163eb397e45aa5ea6cefd5e01 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 19 May 2016 15:58:33 +0200 Subject: udp: prevent skbs lingering in tunnel socket queues In case we find a socket with encapsulation enabled we should call the encap_recv function even if just a udp header without payload is available. The callbacks are responsible for correctly verifying and dropping the packets. Also, in case the header validation fails for geneve and vxlan we shouldn't put the skb back into the socket queue, no one will pick them up there. Instead we can simply discard them in the respective encap_recv functions. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/geneve.c | 10 +++------- drivers/net/vxlan.c | 4 ++-- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index a6dc11ce497f..cadefe4fdaa2 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -335,15 +335,15 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) /* Need Geneve and inner Ethernet header to be present */ if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) - goto error; + goto drop; /* Return packets with reserved bits set */ geneveh = geneve_hdr(skb); if (unlikely(geneveh->ver != GENEVE_VER)) - goto error; + goto drop; if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) - goto error; + goto drop; gs = rcu_dereference_sk_user_data(sk); if (!gs) @@ -366,10 +366,6 @@ drop: /* Consume bad packet */ kfree_skb(skb); return 0; - -error: - /* Let the UDP layer deal with the skb */ - return 1; } static struct socket *geneve_create_sock(struct net *net, bool ipv6, diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 25ab6bf013c4..8ff30c3bdfce 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1304,7 +1304,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) /* Need UDP and VXLAN header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) - return 1; + goto drop; unparsed = *vxlan_hdr(skb); /* VNI flag always required to be set */ @@ -1313,7 +1313,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) ntohl(vxlan_hdr(skb)->vx_flags), ntohl(vxlan_hdr(skb)->vx_vni)); /* Return non vxlan pkt */ - return 1; + goto drop; } unparsed.vx_flags &= ~VXLAN_HF_VNI; unparsed.vx_vni &= ~VXLAN_VNI_MASK; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2e3ebfe5549e..d56c0559b477 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1565,7 +1565,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2ba6a77a8815..2da1896af934 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -617,7 +617,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ -- cgit v1.2.3