diff options
author | David S. Miller | 2017-02-09 17:24:30 -0500 |
---|---|---|
committer | David S. Miller | 2017-02-09 17:24:30 -0500 |
commit | b66a8043d0baa9818a407ed857eada5cb21cd9bb (patch) | |
tree | a6ed185caf60e4db08ef708993c6a4eaac1129b9 /drivers | |
parent | c0e4dadb3494bd0bf4fdeac341509deac8f4b47c (diff) | |
parent | 9c744d10870c5d6f36264be39bf5de87447f1052 (diff) |
Merge branch 'enic-vxlan-offload'
Govindarajulu Varadarajan says:
====================
enic: add vxlan offload support
This series adds vxlan offload support for enic driver. The first
patch adds vxlan devcmd for configuring vxland offload parameters.
Second patch adds ndo_udp_tunnel_add/del and offload on rx path.
There are to modes in which fw supports vxlan offload.
mode 0: fcoe bit is set for encapsulated packet. fcoe_fc_crc_ok is set
if checksum of csum is ok. This bit is or of ip_csum_ok and
tcp_udp_csum_ok
mode 2: BIT(0) in rss_hash is set if it is encapsulated packet.
BIT(1) is set if outer_ip_csum_ok/
BIT(2) is set if outer_tcp_csum_ok
Some hw supports only mode 0, some support mode 0 and 2. Driver gets
the supported modes bitmap using get_supported_feature_ver devcmd
and selects the highest mode both driver and fw supports.
Third patch adds offload support on tx path by adding
enic_features_check().
v2: Order local variable declarations from longest to shortest line,
on all three patches.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/ethernet/cisco/enic/enic.h | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/cisco/enic/enic_main.c | 282 | ||||
-rw-r--r-- | drivers/net/ethernet/cisco/enic/vnic_dev.c | 34 | ||||
-rw-r--r-- | drivers/net/ethernet/cisco/enic/vnic_dev.h | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/cisco/enic/vnic_devcmd.h | 51 | ||||
-rw-r--r-- | drivers/net/ethernet/cisco/enic/vnic_enet.h | 1 |
6 files changed, 364 insertions, 15 deletions
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 9023c858715d..2b23f46b34d3 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -135,6 +135,11 @@ struct enic_rfs_flw_tbl { struct timer_list rfs_may_expire; }; +struct vxlan_offload { + u16 vxlan_udp_port_number; + u8 patch_level; +}; + /* Per-instance private data structure */ struct enic { struct net_device *netdev; @@ -175,6 +180,7 @@ struct enic { /* receive queue cache line section */ ____cacheline_aligned struct vnic_rq rq[ENIC_RQ_MAX]; unsigned int rq_count; + struct vxlan_offload vxlan; u64 rq_truncated_pkts; u64 rq_bad_fcs; struct napi_struct napi[ENIC_RQ_MAX + ENIC_WQ_MAX]; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index c009f6ddabf7..4b87beeabce1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -45,6 +45,7 @@ #endif #include <linux/crash_dump.h> #include <net/busy_poll.h> +#include <net/vxlan.h> #include "cq_enet_desc.h" #include "vnic_dev.h" @@ -176,6 +177,134 @@ static void enic_unset_affinity_hint(struct enic *enic) irq_set_affinity_hint(enic->msix_entry[i].vector, NULL); } +static void enic_udp_tunnel_add(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + struct enic *enic = netdev_priv(netdev); + __be16 port = ti->port; + int err; + + spin_lock_bh(&enic->devcmd_lock); + + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) { + netdev_info(netdev, "udp_tnl: only vxlan tunnel offload supported"); + goto error; + } + + if (ti->sa_family != AF_INET) { + netdev_info(netdev, "vxlan: only IPv4 offload supported"); + goto error; + } + + if (enic->vxlan.vxlan_udp_port_number) { + if (ntohs(port) == enic->vxlan.vxlan_udp_port_number) + netdev_warn(netdev, "vxlan: udp port already offloaded"); + else + netdev_info(netdev, "vxlan: offload supported for only one UDP port"); + + goto error; + } + + err = vnic_dev_overlay_offload_cfg(enic->vdev, + OVERLAY_CFG_VXLAN_PORT_UPDATE, + ntohs(port)); + if (err) + goto error; + + err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, + enic->vxlan.patch_level); + if (err) + goto error; + + enic->vxlan.vxlan_udp_port_number = ntohs(port); + + netdev_info(netdev, "vxlan fw-vers-%d: offload enabled for udp port: %d, sa_family: %d ", + (int)enic->vxlan.patch_level, ntohs(port), ti->sa_family); + + goto unlock; + +error: + netdev_info(netdev, "failed to offload udp port: %d, sa_family: %d, type: %d", + ntohs(port), ti->sa_family, ti->type); +unlock: + spin_unlock_bh(&enic->devcmd_lock); +} + +static void enic_udp_tunnel_del(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + struct enic *enic = netdev_priv(netdev); + int err; + + spin_lock_bh(&enic->devcmd_lock); + + if ((ti->sa_family != AF_INET) || + ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number)) || + (ti->type != UDP_TUNNEL_TYPE_VXLAN)) { + netdev_info(netdev, "udp_tnl: port:%d, sa_family: %d, type: %d not offloaded", + ntohs(ti->port), ti->sa_family, ti->type); + goto unlock; + } + + err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, + OVERLAY_OFFLOAD_DISABLE); + if (err) { + netdev_err(netdev, "vxlan: del offload udp port: %d failed", + ntohs(ti->port)); + goto unlock; + } + + enic->vxlan.vxlan_udp_port_number = 0; + + netdev_info(netdev, "vxlan: del offload udp port %d, family %d\n", + ntohs(ti->port), ti->sa_family); + +unlock: + spin_unlock_bh(&enic->devcmd_lock); +} + +static netdev_features_t enic_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); + struct enic *enic = netdev_priv(dev); + struct udphdr *udph; + u16 port = 0; + u16 proto; + + if (!skb->encapsulation) + return features; + + features = vxlan_features_check(skb, features); + + /* hardware only supports IPv4 vxlan tunnel */ + if (vlan_get_protocol(skb) != htons(ETH_P_IP)) + goto out; + + /* hardware does not support offload of ipv6 inner pkt */ + if (eth->h_proto != ntohs(ETH_P_IP)) + goto out; + + proto = ip_hdr(skb)->protocol; + + if (proto == IPPROTO_UDP) { + udph = udp_hdr(skb); + port = be16_to_cpu(udph->dest); + } + + /* HW supports offload of only one UDP port. Remove CSUM and GSO MASK + * for other UDP port tunnels + */ + if (port != enic->vxlan.vxlan_udp_port_number) + goto out; + + return features; + +out: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + int enic_is_dynamic(struct enic *enic) { return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN; @@ -504,20 +633,19 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq, return err; } -static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, - struct sk_buff *skb, unsigned int mss, - int vlan_tag_insert, unsigned int vlan_tag, - int loopback) +static void enic_preload_tcp_csum_encap(struct sk_buff *skb) { - unsigned int frag_len_left = skb_headlen(skb); - unsigned int len_left = skb->len - frag_len_left; - unsigned int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); - int eop = (len_left == 0); - unsigned int len; - dma_addr_t dma_addr; - unsigned int offset = 0; - skb_frag_t *frag; + if (skb->protocol == cpu_to_be16(ETH_P_IP)) { + inner_ip_hdr(skb)->check = 0; + inner_tcp_hdr(skb)->check = + ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr, + inner_ip_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + } +} +static void enic_preload_tcp_csum(struct sk_buff *skb) +{ /* Preload TCP csum field with IP pseudo hdr calculated * with IP length set to zero. HW will later add in length * to each TCP segment resulting from the TSO. @@ -531,6 +659,30 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } +} + +static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, + struct sk_buff *skb, unsigned int mss, + int vlan_tag_insert, unsigned int vlan_tag, + int loopback) +{ + unsigned int frag_len_left = skb_headlen(skb); + unsigned int len_left = skb->len - frag_len_left; + int eop = (len_left == 0); + unsigned int offset = 0; + unsigned int hdr_len; + dma_addr_t dma_addr; + unsigned int len; + skb_frag_t *frag; + + if (skb->encapsulation) { + hdr_len = skb_inner_transport_header(skb) - skb->data; + hdr_len += inner_tcp_hdrlen(skb); + enic_preload_tcp_csum_encap(skb); + } else { + hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + enic_preload_tcp_csum(skb); + } /* Queue WQ_ENET_MAX_DESC_LEN length descriptors * for the main skb fragment @@ -579,6 +731,38 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, return 0; } +static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq, + struct sk_buff *skb, + int vlan_tag_insert, + unsigned int vlan_tag, int loopback) +{ + unsigned int head_len = skb_headlen(skb); + unsigned int len_left = skb->len - head_len; + /* Hardware will overwrite the checksum fields, calculating from + * scratch and ignoring the value placed by software. + * Offload mode = 00 + * mss[2], mss[1], mss[0] bits are set + */ + unsigned int mss_or_csum = 7; + int eop = (len_left == 0); + dma_addr_t dma_addr; + int err = 0; + + dma_addr = pci_map_single(enic->pdev, skb->data, head_len, + PCI_DMA_TODEVICE); + if (unlikely(enic_dma_map_check(enic, dma_addr))) + return -ENOMEM; + + enic_queue_wq_desc_ex(wq, skb, dma_addr, head_len, mss_or_csum, 0, + vlan_tag_insert, vlan_tag, + WQ_ENET_OFFLOAD_MODE_CSUM, eop, 1 /* SOP */, eop, + loopback); + if (!eop) + err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); + + return err; +} + static inline void enic_queue_wq_skb(struct enic *enic, struct vnic_wq *wq, struct sk_buff *skb) { @@ -601,6 +785,9 @@ static inline void enic_queue_wq_skb(struct enic *enic, err = enic_queue_wq_skb_tso(enic, wq, skb, mss, vlan_tag_insert, vlan_tag, loopback); + else if (skb->encapsulation) + err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert, + vlan_tag, loopback); else if (skb->ip_summed == CHECKSUM_PARTIAL) err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert, vlan_tag, loopback); @@ -1113,6 +1300,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, u8 packet_error; u16 q_number, completed_index, bytes_written, vlan_tci, checksum; u32 rss_hash; + bool outer_csum_ok = true, encap = false; if (skipped) return; @@ -1161,7 +1349,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, skb_put(skb, bytes_written); skb->protocol = eth_type_trans(skb, netdev); skb_record_rx_queue(skb, q_number); - if (netdev->features & NETIF_F_RXHASH) { + if ((netdev->features & NETIF_F_RXHASH) && rss_hash && + (type == 3)) { switch (rss_type) { case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4: case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6: @@ -1175,15 +1364,39 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, break; } } + if (enic->vxlan.vxlan_udp_port_number) { + switch (enic->vxlan.patch_level) { + case 0: + if (fcoe) { + encap = true; + outer_csum_ok = fcoe_fc_crc_ok; + } + break; + case 2: + if ((type == 7) && + (rss_hash & BIT(0))) { + encap = true; + outer_csum_ok = (rss_hash & BIT(1)) && + (rss_hash & BIT(2)); + } + break; + } + } /* Hardware does not provide whole packet checksum. It only * provides pseudo checksum. Since hw validates the packet * checksum but not provide us the checksum value. use * CHECSUM_UNNECESSARY. + * + * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is + * inner csum_ok. outer_csum_ok is set by hw when outer udp + * csum is correct or is zero. */ - if ((netdev->features & NETIF_F_RXCSUM) && tcp_udp_csum_ok && - ipv4_csum_ok) + if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && + tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) { skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = encap; + } if (vlan_stripped) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); @@ -2285,6 +2498,9 @@ static const struct net_device_ops enic_netdev_dynamic_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = enic_rx_flow_steer, #endif + .ndo_udp_tunnel_add = enic_udp_tunnel_add, + .ndo_udp_tunnel_del = enic_udp_tunnel_del, + .ndo_features_check = enic_features_check, }; static const struct net_device_ops enic_netdev_ops = { @@ -2308,6 +2524,9 @@ static const struct net_device_ops enic_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = enic_rx_flow_steer, #endif + .ndo_udp_tunnel_add = enic_udp_tunnel_add, + .ndo_udp_tunnel_del = enic_udp_tunnel_del, + .ndo_features_check = enic_features_check, }; static void enic_dev_deinit(struct enic *enic) @@ -2683,6 +2902,39 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_RXHASH; if (ENIC_SETTING(enic, RXCSUM)) netdev->hw_features |= NETIF_F_RXCSUM; + if (ENIC_SETTING(enic, VXLAN)) { + u64 patch_level; + + netdev->hw_enc_features |= NETIF_F_RXCSUM | + NETIF_F_TSO | + NETIF_F_TSO_ECN | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_HW_CSUM | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_features |= netdev->hw_enc_features; + /* get bit mask from hw about supported offload bit level + * BIT(0) = fw supports patch_level 0 + * fcoe bit = encap + * fcoe_fc_crc_ok = outer csum ok + * BIT(1) = always set by fw + * BIT(2) = fw supports patch_level 2 + * BIT(0) in rss_hash = encap + * BIT(1,2) in rss_hash = outer_ip_csum_ok/ + * outer_tcp_csum_ok + * used in enic_rq_indicate_buf + */ + err = vnic_dev_get_supported_feature_ver(enic->vdev, + VIC_FEATURE_VXLAN, + &patch_level); + if (err) + patch_level = 0; + /* mask bits that are supported by driver + */ + patch_level &= BIT_ULL(0) | BIT_ULL(2); + patch_level = fls(patch_level); + patch_level = patch_level ? patch_level - 1 : 0; + enic->vxlan.patch_level = patch_level; + } netdev->features |= netdev->hw_features; netdev->vlan_features |= netdev->features; diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c index 8f27df3207bc..1841ad45d215 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.c +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c @@ -1247,3 +1247,37 @@ int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry, return ret; } + +int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config) +{ + u64 a0 = overlay; + u64 a1 = config; + int wait = 1000; + + return vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_CTRL, &a0, &a1, wait); +} + +int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay, + u16 vxlan_udp_port_number) +{ + u64 a1 = vxlan_udp_port_number; + u64 a0 = overlay; + int wait = 1000; + + return vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_CFG, &a0, &a1, wait); +} + +int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature, + u64 *supported_versions) +{ + u64 a0 = feature; + int wait = 1000; + u64 a1 = 0; + int ret; + + ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait); + if (!ret) + *supported_versions = a0; + + return ret; +} diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.h b/drivers/net/ethernet/cisco/enic/vnic_dev.h index 54156c484424..9d43d6bb9907 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.h +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.h @@ -179,5 +179,10 @@ int vnic_dev_set_mac_addr(struct vnic_dev *vdev, u8 *mac_addr); int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry, struct filter *data); int vnic_devcmd_init(struct vnic_dev *vdev); +int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config); +int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay, + u16 vxlan_udp_port_number); +int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature, + u64 *supported_versions); #endif /* _VNIC_DEV_H_ */ diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h index 2a812880b884..d83880b0d468 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h +++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h @@ -406,6 +406,31 @@ enum vnic_devcmd_cmd { * in: (u32) a0=Queue Pair number */ CMD_QP_STATS_CLEAR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 63), + + /* Use this devcmd for agreeing on the highest common version supported + * by both driver and fw for features who need such a facility. + * in: (u64) a0 = feature (driver requests for the supported versions + * on this feature) + * out: (u64) a0 = bitmap of all supported versions for that feature + */ + CMD_GET_SUPP_FEATURE_VER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 69), + + /* Control (Enable/Disable) overlay offloads on the given vnic + * in: (u8) a0 = OVERLAY_FEATURE_NVGRE : NVGRE + * a0 = OVERLAY_FEATURE_VXLAN : VxLAN + * in: (u8) a1 = OVERLAY_OFFLOAD_ENABLE : Enable or + * a1 = OVERLAY_OFFLOAD_DISABLE : Disable or + * a1 = OVERLAY_OFFLOAD_ENABLE_V2 : Enable with version 2 + */ + CMD_OVERLAY_OFFLOAD_CTRL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 72), + + /* Configuration of overlay offloads feature on a given vNIC + * in: (u8) a0 = DEVCMD_OVERLAY_NVGRE : NVGRE + * a0 = DEVCMD_OVERLAY_VXLAN : VxLAN + * in: (u8) a1 = VXLAN_PORT_UPDATE : VxLAN + * in: (u16) a2 = unsigned short int port information + */ + CMD_OVERLAY_OFFLOAD_CFG = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 73), }; /* CMD_ENABLE2 flags */ @@ -657,4 +682,30 @@ struct devcmd2_result { #define DEVCMD2_RING_SIZE 32 #define DEVCMD2_DESC_SIZE 128 +enum overlay_feature_t { + OVERLAY_FEATURE_NVGRE = 1, + OVERLAY_FEATURE_VXLAN, + OVERLAY_FEATURE_MAX, +}; + +enum overlay_ofld_cmd { + OVERLAY_OFFLOAD_ENABLE, + OVERLAY_OFFLOAD_DISABLE, + OVERLAY_OFFLOAD_ENABLE_P2, + OVERLAY_OFFLOAD_MAX, +}; + +#define OVERLAY_CFG_VXLAN_PORT_UPDATE 0 + +/* Use this enum to get the supported versions for each of these features + * If you need to use the devcmd_get_supported_feature_version(), add + * the new feature into this enum and install function handler in devcmd.c + */ +enum vic_feature_t { + VIC_FEATURE_VXLAN, + VIC_FEATURE_RDMA, + VIC_FEATURE_VXLAN_PATCH, + VIC_FEATURE_MAX, +}; + #endif /* _VNIC_DEVCMD_H_ */ diff --git a/drivers/net/ethernet/cisco/enic/vnic_enet.h b/drivers/net/ethernet/cisco/enic/vnic_enet.h index 75aced2de869..7d6fbb5635a4 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_enet.h +++ b/drivers/net/ethernet/cisco/enic/vnic_enet.h @@ -48,6 +48,7 @@ struct vnic_enet_config { #define VENETF_RSSHASH_IPV6_EX 0x200 /* Hash on IPv6 extended fields */ #define VENETF_RSSHASH_TCPIPV6_EX 0x400 /* Hash on TCP + IPv6 ext. fields */ #define VENETF_LOOP 0x800 /* Loopback enabled */ +#define VENETF_VXLAN 0x10000 /* VxLAN offload */ #define VENET_INTR_TYPE_MIN 0 /* Timer specs min interrupt spacing */ #define VENET_INTR_TYPE_IDLE 1 /* Timer specs idle time before irq */ |