aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/device_drivers/ethernet/index.rst2
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst746
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst1302
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst224
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/index.rst26
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst168
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst239
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/tracepoints.rst229
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c73
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c3
-rw-r--r--drivers/net/ethernet/microchip/sparx5/Makefile3
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c5
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.h124
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h581
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_police.c53
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_pool.c81
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_psfp.c332
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c3
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_qos.c59
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_sdlb.c335
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c237
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api.c3
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api_client.h3
-rw-r--r--drivers/net/ethernet/wangxun/Kconfig1
-rw-r--r--drivers/net/ethernet/wangxun/libwx/Makefile2
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c675
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.h5
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c2004
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.h32
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h314
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c249
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_type.h18
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c271
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_type.h21
-rw-r--r--drivers/net/virtio_net.c2
-rw-r--r--include/linux/mlx5/mlx5_ifc.h12
-rw-r--r--include/linux/poison.h3
-rw-r--r--include/linux/skbuff.h7
-rw-r--r--include/net/page_pool.h4
-rw-r--r--include/net/raw.h13
-rw-r--r--include/trace/events/bridge.h58
-rw-r--r--include/uapi/linux/if_bridge.h2
-rw-r--r--include/uapi/linux/if_link.h2
-rw-r--r--net/bridge/br_mdb.c17
-rw-r--r--net/bridge/br_multicast.c179
-rw-r--r--net/bridge/br_netlink.c19
-rw-r--r--net/bridge/br_netlink_tunnel.c3
-rw-r--r--net/bridge/br_private.h12
-rw-r--r--net/bridge/br_vlan.c11
-rw-r--r--net/bridge/br_vlan_options.c27
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/page_pool.c6
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/devlink/Makefile2
-rw-r--r--net/devlink/dev.c1343
-rw-r--r--net/devlink/devl_internal.h30
-rw-r--r--net/devlink/leftover.c1358
-rw-r--r--net/dsa/slave.c11
-rw-r--r--net/ipv4/raw.c21
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/smc/af_smc.c25
-rw-r--r--net/smc/smc_core.c75
-rw-r--r--net/smc/smc_core.h6
-rw-r--r--net/smc/smc_llc.c34
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh60
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_max.sh1336
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib.sh216
86 files changed, 11143 insertions, 2381 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 5196905582c5..392969ac88ad 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -39,7 +39,7 @@ Contents:
intel/ice
marvell/octeontx2
marvell/octeon_ep
- mellanox/mlx5
+ mellanox/mlx5/index
microsoft/netvsc
neterion/s2io
netronome/nfp
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
deleted file mode 100644
index 6969652f593c..000000000000
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ /dev/null
@@ -1,746 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-
-=================================================
-Mellanox ConnectX(R) mlx5 core VPI Network Driver
-=================================================
-
-Copyright (c) 2019, Mellanox Technologies LTD.
-
-Contents
-========
-
-- `Enabling the driver and kconfig options`_
-- `Devlink info`_
-- `Devlink parameters`_
-- `Bridge offload`_
-- `mlx5 subfunction`_
-- `mlx5 function attributes`_
-- `Devlink health reporters`_
-- `mlx5 tracepoints`_
-
-Enabling the driver and kconfig options
-=======================================
-
-| mlx5 core is modular and most of the major mlx5 core driver features can be selected (compiled in/out)
-| at build time via kernel Kconfig flags.
-| Basic features, ethernet net device rx/tx offloads and XDP, are available with the most basic flags
-| CONFIG_MLX5_CORE=y/m and CONFIG_MLX5_CORE_EN=y.
-| For the list of advanced features, please see below.
-
-**CONFIG_MLX5_CORE=(y/m/n)** (module mlx5_core.ko)
-
-| The driver can be enabled by choosing CONFIG_MLX5_CORE=y/m in kernel config.
-| This will provide mlx5 core driver for mlx5 ulps to interface with (mlx5e, mlx5_ib).
-
-
-**CONFIG_MLX5_CORE_EN=(y/n)**
-
-| Choosing this option will allow basic ethernet netdevice support with all of the standard rx/tx offloads.
-| mlx5e is the mlx5 ulp driver which provides netdevice kernel interface, when chosen, mlx5e will be
-| built-in into mlx5_core.ko.
-
-
-**CONFIG_MLX5_EN_ARFS=(y/n)**
-
-| Enables Hardware-accelerated receive flow steering (arfs) support, and ntuple filtering.
-| https://community.mellanox.com/s/article/howto-configure-arfs-on-connectx-4
-
-
-**CONFIG_MLX5_EN_RXNFC=(y/n)**
-
-| Enables ethtool receive network flow classification, which allows user defined
-| flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc API.
-
-
-**CONFIG_MLX5_CORE_EN_DCB=(y/n)**:
-
-| Enables `Data Center Bridging (DCB) Support <https://community.mellanox.com/s/article/howto-auto-config-pfc-and-ets-on-connectx-4-via-lldp-dcbx>`_.
-
-
-**CONFIG_MLX5_MPFS=(y/n)**
-
-| Ethernet Multi-Physical Function Switch (MPFS) support in ConnectX NIC.
-| MPFs is required for when `Multi-Host <http://www.mellanox.com/page/multihost>`_ configuration is enabled to allow passing
-| user configured unicast MAC addresses to the requesting PF.
-
-
-**CONFIG_MLX5_ESWITCH=(y/n)**
-
-| Ethernet SRIOV E-Switch support in ConnectX NIC. E-Switch provides internal SRIOV packet steering
-| and switching for the enabled VFs and PF in two available modes:
-| 1) `Legacy SRIOV mode (L2 mac vlan steering based) <https://community.mellanox.com/s/article/howto-configure-sr-iov-for-connectx-4-connectx-5-with-kvm--ethernet-x>`_.
-| 2) `Switchdev mode (eswitch offloads) <https://www.mellanox.com/related-docs/prod_software/ASAP2_Hardware_Offloading_for_vSwitches_User_Manual_v4.4.pdf>`_.
-
-
-**CONFIG_MLX5_CORE_IPOIB=(y/n)**
-
-| IPoIB offloads & acceleration support.
-| Requires CONFIG_MLX5_CORE_EN to provide an accelerated interface for the rdma
-| IPoIB ulp netdevice.
-
-
-**CONFIG_MLX5_FPGA=(y/n)**
-
-| Build support for the Innova family of network cards by Mellanox Technologies.
-| Innova network cards are comprised of a ConnectX chip and an FPGA chip on one board.
-| If you select this option, the mlx5_core driver will include the Innova FPGA core and allow
-| building sandbox-specific client drivers.
-
-
-**CONFIG_MLX5_EN_IPSEC=(y/n)**
-
-| Enables `IPSec XFRM cryptography-offload acceleration <http://www.mellanox.com/related-docs/prod_software/Mellanox_Innova_IPsec_Ethernet_Adapter_Card_User_Manual.pdf>`_.
-
-**CONFIG_MLX5_EN_TLS=(y/n)**
-
-| TLS cryptography-offload acceleration.
-
-
-**CONFIG_MLX5_INFINIBAND=(y/n/m)** (module mlx5_ib.ko)
-
-| Provides low-level InfiniBand/RDMA and `RoCE <https://community.mellanox.com/s/article/recommended-network-configuration-examples-for-roce-deployment>`_ support.
-
-**CONFIG_MLX5_SF=(y/n)**
-
-| Build support for subfunction.
-| Subfunctons are more light weight than PCI SRIOV VFs. Choosing this option
-| will enable support for creating subfunction devices.
-
-**External options** ( Choose if the corresponding mlx5 feature is required )
-
-- CONFIG_PTP_1588_CLOCK: When chosen, mlx5 ptp support will be enabled
-- CONFIG_VXLAN: When chosen, mlx5 vxlan support will be enabled.
-- CONFIG_MLXFW: When chosen, mlx5 firmware flashing support will be enabled (via devlink and ethtool).
-
-Devlink info
-============
-
-The devlink info reports the running and stored firmware versions on device.
-It also prints the device PSID which represents the HCA board type ID.
-
-User command example::
-
- $ devlink dev info pci/0000:00:06.0
- pci/0000:00:06.0:
- driver mlx5_core
- versions:
- fixed:
- fw.psid MT_0000000009
- running:
- fw.version 16.26.0100
- stored:
- fw.version 16.26.0100
-
-Devlink parameters
-==================
-
-flow_steering_mode: Device flow steering mode
----------------------------------------------
-The flow steering mode parameter controls the flow steering mode of the driver.
-Two modes are supported:
-1. 'dmfs' - Device managed flow steering.
-2. 'smfs' - Software/Driver managed flow steering.
-
-In DMFS mode, the HW steering entities are created and managed through the
-Firmware.
-In SMFS mode, the HW steering entities are created and managed though by
-the driver directly into hardware without firmware intervention.
-
-SMFS mode is faster and provides better rule insertion rate compared to default DMFS mode.
-
-User command examples:
-
-- Set SMFS flow steering mode::
-
- $ devlink dev param set pci/0000:06:00.0 name flow_steering_mode value "smfs" cmode runtime
-
-- Read device flow steering mode::
-
- $ devlink dev param show pci/0000:06:00.0 name flow_steering_mode
- pci/0000:06:00.0:
- name flow_steering_mode type driver-specific
- values:
- cmode runtime value smfs
-
-enable_roce: RoCE enablement state
-----------------------------------
-RoCE enablement state controls driver support for RoCE traffic.
-When RoCE is disabled, there is no gid table, only raw ethernet QPs are supported and traffic on the well-known UDP RoCE port is handled as raw ethernet traffic.
-
-To change RoCE enablement state, a user must change the driverinit cmode value and run devlink reload.
-
-User command examples:
-
-- Disable RoCE::
-
- $ devlink dev param set pci/0000:06:00.0 name enable_roce value false cmode driverinit
- $ devlink dev reload pci/0000:06:00.0
-
-- Read RoCE enablement state::
-
- $ devlink dev param show pci/0000:06:00.0 name enable_roce
- pci/0000:06:00.0:
- name enable_roce type generic
- values:
- cmode driverinit value true
-
-esw_port_metadata: Eswitch port metadata state
-----------------------------------------------
-When applicable, disabling eswitch metadata can increase packet rate
-up to 20% depending on the use case and packet sizes.
-
-Eswitch port metadata state controls whether to internally tag packets with
-metadata. Metadata tagging must be enabled for multi-port RoCE, failover
-between representors and stacked devices.
-By default metadata is enabled on the supported devices in E-switch.
-Metadata is applicable only for E-switch in switchdev mode and
-users may disable it when NONE of the below use cases will be in use:
-1. HCA is in Dual/multi-port RoCE mode.
-2. VF/SF representor bonding (Usually used for Live migration)
-3. Stacked devices
-
-When metadata is disabled, the above use cases will fail to initialize if
-users try to enable them.
-
-- Show eswitch port metadata::
-
- $ devlink dev param show pci/0000:06:00.0 name esw_port_metadata
- pci/0000:06:00.0:
- name esw_port_metadata type driver-specific
- values:
- cmode runtime value true
-
-- Disable eswitch port metadata::
-
- $ devlink dev param set pci/0000:06:00.0 name esw_port_metadata value false cmode runtime
-
-- Change eswitch mode to switchdev mode where after choosing the metadata value::
-
- $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
-
-Bridge offload
-==============
-The mlx5 driver implements support for offloading bridge rules when in switchdev
-mode. Linux bridge FDBs are automatically offloaded when mlx5 switchdev
-representor is attached to bridge.
-
-- Change device to switchdev mode::
-
- $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
-
-- Attach mlx5 switchdev representor 'enp8s0f0' to bridge netdev 'bridge1'::
-
- $ ip link set enp8s0f0 master bridge1
-
-VLANs
------
-Following bridge VLAN functions are supported by mlx5:
-
-- VLAN filtering (including multiple VLANs per port)::
-
- $ ip link set bridge1 type bridge vlan_filtering 1
- $ bridge vlan add dev enp8s0f0 vid 2-3
-
-- VLAN push on bridge ingress::
-
- $ bridge vlan add dev enp8s0f0 vid 3 pvid
-
-- VLAN pop on bridge egress::
-
- $ bridge vlan add dev enp8s0f0 vid 3 untagged
-
-mlx5 subfunction
-================
-mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface.
-
-A subfunction has its own function capabilities and its own resources. This
-means a subfunction has its own dedicated queues (txq, rxq, cq, eq). These
-queues are neither shared nor stolen from the parent PCI function.
-
-When a subfunction is RDMA capable, it has its own QP1, GID table, and RDMA
-resources neither shared nor stolen from the parent PCI function.
-
-A subfunction has a dedicated window in PCI BAR space that is not shared
-with the other subfunctions or the parent PCI function. This ensures that all
-devices (netdev, rdma, vdpa, etc.) of the subfunction accesses only assigned
-PCI BAR space.
-
-A subfunction supports eswitch representation through which it supports tc
-offloads. The user configures eswitch to send/receive packets from/to
-the subfunction port.
-
-Subfunctions share PCI level resources such as PCI MSI-X IRQs with
-other subfunctions and/or with its parent PCI function.
-
-Example mlx5 software, system, and device view::
-
- _______
- | admin |
- | user |----------
- |_______| |
- | |
- ____|____ __|______ _________________
- | | | | | |
- | devlink | | tc tool | | user |
- | tool | |_________| | applications |
- |_________| | |_________________|
- | | | |
- | | | | Userspace
- +---------|-------------|-------------------|----------|--------------------+
- | | +----------+ +----------+ Kernel
- | | | netdev | | rdma dev |
- | | +----------+ +----------+
- (devlink port add/del | ^ ^
- port function set) | | |
- | | +---------------|
- _____|___ | | _______|_______
- | | | | | mlx5 class |
- | devlink | +------------+ | | drivers |
- | kernel | | rep netdev | | |(mlx5_core,ib) |
- |_________| +------------+ | |_______________|
- | | | ^
- (devlink ops) | | (probe/remove)
- _________|________ | | ____|________
- | subfunction | | +---------------+ | subfunction |
- | management driver|----- | subfunction |---| driver |
- | (mlx5_core) | | auxiliary dev | | (mlx5_core) |
- |__________________| +---------------+ |_____________|
- | ^
- (sf add/del, vhca events) |
- | (device add/del)
- _____|____ ____|________
- | | | subfunction |
- | PCI NIC |--- activate/deactivate events--->| host driver |
- |__________| | (mlx5_core) |
- |_____________|
-
-Subfunction is created using devlink port interface.
-
-- Change device to switchdev mode::
-
- $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
-
-- Add a devlink port of subfunction flavour::
-
- $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88
- pci/0000:06:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false
- function:
- hw_addr 00:00:00:00:00:00 state inactive opstate detached
-
-- Show a devlink port of the subfunction::
-
- $ devlink port show pci/0000:06:00.0/32768
- pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
- function:
- hw_addr 00:00:00:00:00:00 state inactive opstate detached
-
-- Delete a devlink port of subfunction after use::
-
- $ devlink port del pci/0000:06:00.0/32768
-
-mlx5 function attributes
-========================
-The mlx5 driver provides a mechanism to setup PCI VF/SF function attributes in
-a unified way for SmartNIC and non-SmartNIC.
-
-This is supported only when the eswitch mode is set to switchdev. Port function
-configuration of the PCI VF/SF is supported through devlink eswitch port.
-
-Port function attributes should be set before PCI VF/SF is enumerated by the
-driver.
-
-MAC address setup
------------------
-mlx5 driver support devlink port function attr mechanism to setup MAC
-address. (refer to Documentation/networking/devlink/devlink-port.rst)
-
-RoCE capability setup
----------------------
-Not all mlx5 PCI devices/SFs require RoCE capability.
-
-When RoCE capability is disabled, it saves 1 Mbytes worth of system memory per
-PCI devices/SF.
-
-mlx5 driver support devlink port function attr mechanism to setup RoCE
-capability. (refer to Documentation/networking/devlink/devlink-port.rst)
-
-migratable capability setup
----------------------------
-User who wants mlx5 PCI VFs to be able to perform live migration need to
-explicitly enable the VF migratable capability.
-
-mlx5 driver support devlink port function attr mechanism to setup migratable
-capability. (refer to Documentation/networking/devlink/devlink-port.rst)
-
-SF state setup
---------------
-To use the SF, the user must activate the SF using the SF function state
-attribute.
-
-- Get the state of the SF identified by its unique devlink port index::
-
- $ devlink port show ens2f0npf0sf88
- pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false
- function:
- hw_addr 00:00:00:00:88:88 state inactive opstate detached
-
-- Activate the function and verify its state is active::
-
- $ devlink port function set ens2f0npf0sf88 state active
-
- $ devlink port show ens2f0npf0sf88
- pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false
- function:
- hw_addr 00:00:00:00:88:88 state active opstate detached
-
-Upon function activation, the PF driver instance gets the event from the device
-that a particular SF was activated. It's the cue to put the device on bus, probe
-it and instantiate the devlink instance and class specific auxiliary devices
-for it.
-
-- Show the auxiliary device and port of the subfunction::
-
- $ devlink dev show
- devlink dev show auxiliary/mlx5_core.sf.4
-
- $ devlink port show auxiliary/mlx5_core.sf.4/1
- auxiliary/mlx5_core.sf.4/1: type eth netdev p0sf88 flavour virtual port 0 splittable false
-
- $ rdma link show mlx5_0/1
- link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev p0sf88
-
- $ rdma dev show
- 8: rocep6s0f1: node_type ca fw 16.29.0550 node_guid 248a:0703:00b3:d113 sys_image_guid 248a:0703:00b3:d112
- 13: mlx5_0: node_type ca fw 16.29.0550 node_guid 0000:00ff:fe00:8888 sys_image_guid 248a:0703:00b3:d112
-
-- Subfunction auxiliary device and class device hierarchy::
-
- mlx5_core.sf.4
- (subfunction auxiliary device)
- /\
- / \
- / \
- / \
- / \
- mlx5_core.eth.4 mlx5_core.rdma.4
- (sf eth aux dev) (sf rdma aux dev)
- | |
- | |
- p0sf88 mlx5_0
- (sf netdev) (sf rdma device)
-
-Additionally, the SF port also gets the event when the driver attaches to the
-auxiliary device of the subfunction. This results in changing the operational
-state of the function. This provides visibility to the user to decide when is it
-safe to delete the SF port for graceful termination of the subfunction.
-
-- Show the SF port operational state::
-
- $ devlink port show ens2f0npf0sf88
- pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false
- function:
- hw_addr 00:00:00:00:88:88 state active opstate attached
-
-Devlink health reporters
-========================
-
-tx reporter
------------
-The tx reporter is responsible for reporting and recovering of the following two error scenarios:
-
-- tx timeout
- Report on kernel tx timeout detection.
- Recover by searching lost interrupts.
-- tx error completion
- Report on error tx completion.
- Recover by flushing the tx queue and reset it.
-
-tx reporter also support on demand diagnose callback, on which it provides
-real time information of its send queues status.
-
-User commands examples:
-
-- Diagnose send queues status::
-
- $ devlink health diagnose pci/0000:82:00.0 reporter tx
-
-NOTE: This command has valid output only when interface is up, otherwise the command has empty output.
-
-- Show number of tx errors indicated, number of recover flows ended successfully,
- is autorecover enabled and graceful period from last recover::
-
- $ devlink health show pci/0000:82:00.0 reporter tx
-
-rx reporter
------------
-The rx reporter is responsible for reporting and recovering of the following two error scenarios:
-
-- rx queues' initialization (population) timeout
- Population of rx queues' descriptors on ring initialization is done
- in napi context via triggering an irq. In case of a failure to get
- the minimum amount of descriptors, a timeout would occur, and
- descriptors could be recovered by polling the EQ (Event Queue).
-- rx completions with errors (reported by HW on interrupt context)
- Report on rx completion error.
- Recover (if needed) by flushing the related queue and reset it.
-
-rx reporter also supports on demand diagnose callback, on which it
-provides real time information of its receive queues' status.
-
-- Diagnose rx queues' status and corresponding completion queue::
-
- $ devlink health diagnose pci/0000:82:00.0 reporter rx
-
-NOTE: This command has valid output only when interface is up. Otherwise, the command has empty output.
-
-- Show number of rx errors indicated, number of recover flows ended successfully,
- is autorecover enabled, and graceful period from last recover::
-
- $ devlink health show pci/0000:82:00.0 reporter rx
-
-fw reporter
------------
-The fw reporter implements `diagnose` and `dump` callbacks.
-It follows symptoms of fw error such as fw syndrome by triggering
-fw core dump and storing it into the dump buffer.
-The fw reporter diagnose command can be triggered any time by the user to check
-current fw status.
-
-User commands examples:
-
-- Check fw heath status::
-
- $ devlink health diagnose pci/0000:82:00.0 reporter fw
-
-- Read FW core dump if already stored or trigger new one::
-
- $ devlink health dump show pci/0000:82:00.0 reporter fw
-
-NOTE: This command can run only on the PF which has fw tracer ownership,
-running it on other PF or any VF will return "Operation not permitted".
-
-fw fatal reporter
------------------
-The fw fatal reporter implements `dump` and `recover` callbacks.
-It follows fatal errors indications by CR-space dump and recover flow.
-The CR-space dump uses vsc interface which is valid even if the FW command
-interface is not functional, which is the case in most FW fatal errors.
-The recover function runs recover flow which reloads the driver and triggers fw
-reset if needed.
-On firmware error, the health buffer is dumped into the dmesg. The log
-level is derived from the error's severity (given in health buffer).
-
-User commands examples:
-
-- Run fw recover flow manually::
-
- $ devlink health recover pci/0000:82:00.0 reporter fw_fatal
-
-- Read FW CR-space dump if already stored or trigger new one::
-
- $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal
-
-NOTE: This command can run only on PF.
-
-mlx5 tracepoints
-================
-
-mlx5 driver provides internal tracepoints for tracking and debugging using
-kernel tracepoints interfaces (refer to Documentation/trace/ftrace.rst).
-
-For the list of support mlx5 events, check `/sys/kernel/debug/tracing/events/mlx5/`.
-
-tc and eswitch offloads tracepoints:
-
-- mlx5e_configure_flower: trace flower filter actions and cookies offloaded to mlx5::
-
- $ echo mlx5:mlx5e_configure_flower >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- tc-6535 [019] ...1 2672.404466: mlx5e_configure_flower: cookie=0000000067874a55 actions= REDIRECT
-
-- mlx5e_delete_flower: trace flower filter actions and cookies deleted from mlx5::
-
- $ echo mlx5:mlx5e_delete_flower >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- tc-6569 [010] .N.1 2686.379075: mlx5e_delete_flower: cookie=0000000067874a55 actions= NULL
-
-- mlx5e_stats_flower: trace flower stats request::
-
- $ echo mlx5:mlx5e_stats_flower >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- tc-6546 [010] ...1 2679.704889: mlx5e_stats_flower: cookie=0000000060eb3d6a bytes=0 packets=0 lastused=4295560217
-
-- mlx5e_tc_update_neigh_used_value: trace tunnel rule neigh update value offloaded to mlx5::
-
- $ echo mlx5:mlx5e_tc_update_neigh_used_value >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- kworker/u48:4-8806 [009] ...1 55117.882428: mlx5e_tc_update_neigh_used_value: netdev: ens1f0 IPv4: 1.1.1.10 IPv6: ::ffff:1.1.1.10 neigh_used=1
-
-- mlx5e_rep_neigh_update: trace neigh update tasks scheduled due to neigh state change events::
-
- $ echo mlx5:mlx5e_rep_neigh_update >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- kworker/u48:7-2221 [009] ...1 1475.387435: mlx5e_rep_neigh_update: netdev: ens1f0 MAC: 24:8a:07:9a:17:9a IPv4: 1.1.1.10 IPv6: ::ffff:1.1.1.10 neigh_connected=1
-
-Bridge offloads tracepoints:
-
-- mlx5_esw_bridge_fdb_entry_init: trace bridge FDB entry offloaded to mlx5::
-
- $ echo mlx5:mlx5_esw_bridge_fdb_entry_init >> set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- kworker/u20:9-2217 [003] ...1 318.582243: mlx5_esw_bridge_fdb_entry_init: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=0 flags=0 used=0
-
-- mlx5_esw_bridge_fdb_entry_cleanup: trace bridge FDB entry deleted from mlx5::
-
- $ echo mlx5:mlx5_esw_bridge_fdb_entry_cleanup >> set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- ip-2581 [005] ...1 318.629871: mlx5_esw_bridge_fdb_entry_cleanup: net_device=enp8s0f0_1 addr=e4:fd:05:08:00:03 vid=0 flags=0 used=16
-
-- mlx5_esw_bridge_fdb_entry_refresh: trace bridge FDB entry offload refreshed in
- mlx5::
-
- $ echo mlx5:mlx5_esw_bridge_fdb_entry_refresh >> set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- kworker/u20:8-3849 [003] ...1 466716: mlx5_esw_bridge_fdb_entry_refresh: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=3 flags=0 used=0
-
-- mlx5_esw_bridge_vlan_create: trace bridge VLAN object add on mlx5
- representor::
-
- $ echo mlx5:mlx5_esw_bridge_vlan_create >> set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- ip-2560 [007] ...1 318.460258: mlx5_esw_bridge_vlan_create: vid=1 flags=6
-
-- mlx5_esw_bridge_vlan_cleanup: trace bridge VLAN object delete from mlx5
- representor::
-
- $ echo mlx5:mlx5_esw_bridge_vlan_cleanup >> set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- bridge-2582 [007] ...1 318.653496: mlx5_esw_bridge_vlan_cleanup: vid=2 flags=8
-
-- mlx5_esw_bridge_vport_init: trace mlx5 vport assigned with bridge upper
- device::
-
- $ echo mlx5:mlx5_esw_bridge_vport_init >> set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- ip-2560 [007] ...1 318.458915: mlx5_esw_bridge_vport_init: vport_num=1
-
-- mlx5_esw_bridge_vport_cleanup: trace mlx5 vport removed from bridge upper
- device::
-
- $ echo mlx5:mlx5_esw_bridge_vport_cleanup >> set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- ip-5387 [000] ...1 573713: mlx5_esw_bridge_vport_cleanup: vport_num=1
-
-Eswitch QoS tracepoints:
-
-- mlx5_esw_vport_qos_create: trace creation of transmit scheduler arbiter for vport::
-
- $ echo mlx5:mlx5_esw_vport_qos_create >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- <...>-23496 [018] .... 73136.838831: mlx5_esw_vport_qos_create: (0000:82:00.0) vport=2 tsar_ix=4 bw_share=0, max_rate=0 group=000000007b576bb3
-
-- mlx5_esw_vport_qos_config: trace configuration of transmit scheduler arbiter for vport::
-
- $ echo mlx5:mlx5_esw_vport_qos_config >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- <...>-26548 [023] .... 75754.223823: mlx5_esw_vport_qos_config: (0000:82:00.0) vport=1 tsar_ix=3 bw_share=34, max_rate=10000 group=000000007b576bb3
-
-- mlx5_esw_vport_qos_destroy: trace deletion of transmit scheduler arbiter for vport::
-
- $ echo mlx5:mlx5_esw_vport_qos_destroy >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- <...>-27418 [004] .... 76546.680901: mlx5_esw_vport_qos_destroy: (0000:82:00.0) vport=1 tsar_ix=3
-
-- mlx5_esw_group_qos_create: trace creation of transmit scheduler arbiter for rate group::
-
- $ echo mlx5:mlx5_esw_group_qos_create >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- <...>-26578 [008] .... 75776.022112: mlx5_esw_group_qos_create: (0000:82:00.0) group=000000008dac63ea tsar_ix=5
-
-- mlx5_esw_group_qos_config: trace configuration of transmit scheduler arbiter for rate group::
-
- $ echo mlx5:mlx5_esw_group_qos_config >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- <...>-27303 [020] .... 76461.455356: mlx5_esw_group_qos_config: (0000:82:00.0) group=000000008dac63ea tsar_ix=5 bw_share=100 max_rate=20000
-
-- mlx5_esw_group_qos_destroy: trace deletion of transmit scheduler arbiter for group::
-
- $ echo mlx5:mlx5_esw_group_qos_destroy >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- <...>-27418 [006] .... 76547.187258: mlx5_esw_group_qos_destroy: (0000:82:00.0) group=000000007b576bb3 tsar_ix=1
-
-SF tracepoints:
-
-- mlx5_sf_add: trace addition of the SF port::
-
- $ echo mlx5:mlx5_sf_add >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- devlink-9363 [031] ..... 24610.188722: mlx5_sf_add: (0000:06:00.0) port_index=32768 controller=0 hw_id=0x8000 sfnum=88
-
-- mlx5_sf_free: trace freeing of the SF port::
-
- $ echo mlx5:mlx5_sf_free >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- devlink-9830 [038] ..... 26300.404749: mlx5_sf_free: (0000:06:00.0) port_index=32768 controller=0 hw_id=0x8000
-
-- mlx5_sf_hwc_alloc: trace allocating of the hardware SF context::
-
- $ echo mlx5:mlx5_sf_hwc_alloc >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- devlink-9775 [031] ..... 26296.385259: mlx5_sf_hwc_alloc: (0000:06:00.0) controller=0 hw_id=0x8000 sfnum=88
-
-- mlx5_sf_hwc_free: trace freeing of the hardware SF context::
-
- $ echo mlx5:mlx5_sf_hwc_free >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- kworker/u128:3-9093 [046] ..... 24625.365771: mlx5_sf_hwc_free: (0000:06:00.0) hw_id=0x8000
-
-- mlx5_sf_hwc_deferred_free : trace deferred freeing of the hardware SF context::
-
- $ echo mlx5:mlx5_sf_hwc_deferred_free >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- devlink-9519 [046] ..... 24624.400271: mlx5_sf_hwc_deferred_free: (0000:06:00.0) hw_id=0x8000
-
-- mlx5_sf_vhca_event: trace SF vhca event and state::
-
- $ echo mlx5:mlx5_sf_vhca_event >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- kworker/u128:3-9093 [046] ..... 24625.365525: mlx5_sf_vhca_event: (0000:06:00.0) hw_id=0x8000 sfnum=88 vhca_state=1
-
-- mlx5_sf_dev_add : trace SF device add event::
-
- $ echo mlx5:mlx5_sf_dev_add>> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- kworker/u128:3-9093 [000] ..... 24616.524495: mlx5_sf_dev_add: (0000:06:00.0) sfdev=00000000fc5d96fd aux_id=4 hw_id=0x8000 sfnum=88
-
-- mlx5_sf_dev_del : trace SF device delete event::
-
- $ echo mlx5:mlx5_sf_dev_del >> /sys/kernel/debug/tracing/set_event
- $ cat /sys/kernel/debug/tracing/trace
- ...
- kworker/u128:3-9093 [044] ..... 24624.400749: mlx5_sf_dev_del: (0000:06:00.0) sfdev=00000000fc5d96fd aux_id=4 hw_id=0x8000 sfnum=88
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
new file mode 100644
index 000000000000..4cd8e869762b
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
@@ -0,0 +1,1302 @@
+.. SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+.. include:: <isonum.txt>
+
+================
+Ethtool counters
+================
+
+:Copyright: |copy| 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+Contents
+========
+
+- `Overview`_
+- `Groups`_
+- `Types`_
+- `Descriptions`_
+
+Overview
+========
+
+There are several counter groups based on where the counter is being counted. In
+addition, each group of counters may have different counter types.
+
+These counter groups are based on which component in a networking setup,
+illustrated below, that they describe::
+
+ ----------------------------------------
+ | |
+ ---------------------------------------- ---------------------------------------- |
+ | Hypervisor | | VM | |
+ | | | | |
+ | ------------------- --------------- | | ------------------- --------------- | |
+ | | Ethernet driver | | RDMA driver | | | | Ethernet driver | | RDMA driver | | |
+ | ------------------- --------------- | | ------------------- --------------- | |
+ | | | | | | | | |
+ | ------------------- | | ------------------- | |
+ | | | | | |--
+ ---------------------------------------- ----------------------------------------
+ | |
+ ------------- -----------------------------
+ | |
+ ------ ------ ------ ------ ------ ------ ------
+ -----| PF |----------------------| VF |-| VF |-| VF |----- --| PF |--- --| PF |--- --| PF |---
+ | ------ ------ ------ ------ | | ------ | | ------ | | ------ |
+ | | | | | | | |
+ | | | | | | | |
+ | | | | | | | |
+ | eSwitch | | eSwitch | | eSwitch | | eSwitch |
+ ---------------------------------------------------------- ----------- ----------- -----------
+ -------------------------------------------------------------------------------
+ | |
+ | |
+ | Uplink (no counters) |
+ -------------------------------------------------------------------------------
+ ---------------------------------------------------------------
+ | |
+ | |
+ | MPFS (no counters) |
+ ---------------------------------------------------------------
+ |
+ |
+ | Port
+
+Groups
+======
+
+Ring
+ Software counters populated by the driver stack.
+
+Netdev
+ An aggregation of software ring counters.
+
+vPort counters
+ Traffic counters and drops due to steering or no buffers. May indicate issues
+ with NIC. These counters include Ethernet traffic counters (including Raw
+ Ethernet) and RDMA/RoCE traffic counters.
+
+Physical port counters
+ Counters that collect statistics about the PFs and VFs. May indicate issues
+ with NIC, link, or network. This measuring point holds information on
+ standardized counters like IEEE 802.3, RFC2863, RFC 2819, RFC 3635 and
+ additional counters like flow control, FEC and more. Physical port counters
+ are not exposed to virtual machines.
+
+Priority Port Counters
+ A set of the physical port counters, per priority per port.
+
+Types
+=====
+
+Counters are divided into three types.
+
+Traffic Informative Counters
+ Counters which count traffic. These counters can be used for load estimation
+ or for general debug.
+
+Traffic Acceleration Counters
+ Counters which count traffic that was accelerated by Mellanox driver or by
+ hardware. The counters are an additional layer to the informative counter set,
+ and the same traffic is counted in both informative and acceleration counters.
+
+.. [#accel] Traffic acceleration counter.
+
+Error Counters
+ Increment of these counters might indicate a problem. Each of these counters
+ has an explanation and correction action.
+
+Statistic can be fetched via the `ip link` or `ethtool` commands. `ethtool`
+provides more detailed information.::
+
+ ip –s link show <if-name>
+ ethtool -S <if-name>
+
+Descriptions
+============
+
+XSK, PTP, and QoS counters that are similar to counters defined previously will
+not be separately listed. For example, `ptp_tx[i]_packets` will not be
+explicitly documented since `tx[i]_packets` describes the behavior of both
+counters, except `ptp_tx[i]_packets` is only counted when precision time
+protocol is used.
+
+Ring / Netdev Counter
+----------------------------
+The following counters are available per ring or software port.
+
+These counters provide information on the amount of traffic that was accelerated
+by the NIC. The counters are counting the accelerated traffic in addition to the
+standard counters which counts it (i.e. accelerated traffic is counted twice).
+
+The counter names in the table below refers to both ring and port counters. The
+notation for ring counters includes the [i] index without the braces. The
+notation for port counters doesn't include the [i]. A counter name
+`rx[i]_packets` will be printed as `rx0_packets` for ring 0 and `rx_packets` for
+the software port.
+
+.. flat-table:: Ring / Software Port Counter Table
+ :widths: 2 3 1
+
+ * - Counter
+ - Description
+ - Type
+
+ * - `rx[i]_packets`
+ - The number of packets received on ring i.
+ - Informative
+
+ * - `rx[i]_bytes`
+ - The number of bytes received on ring i.
+ - Informative
+
+ * - `tx[i]_packets`
+ - The number of packets transmitted on ring i.
+ - Informative
+
+ * - `tx[i]_bytes`
+ - The number of bytes transmitted on ring i.
+ - Informative
+
+ * - `tx[i]_recover`
+ - The number of times the SQ was recovered.
+ - Error
+
+ * - `tx[i]_cqes`
+ - Number of CQEs events on SQ issued on ring i.
+ - Informative
+
+ * - `tx[i]_cqe_err`
+ - The number of error CQEs encountered on the SQ for ring i.
+ - Error
+
+ * - `tx[i]_tso_packets`
+ - The number of TSO packets transmitted on ring i [#accel]_.
+ - Acceleration
+
+ * - `tx[i]_tso_bytes`
+ - The number of TSO bytes transmitted on ring i [#accel]_.
+ - Acceleration
+
+ * - `tx[i]_tso_inner_packets`
+ - The number of TSO packets which are indicated to be carry internal
+ encapsulation transmitted on ring i [#accel]_.
+ - Acceleration
+
+ * - `tx[i]_tso_inner_bytes`
+ - The number of TSO bytes which are indicated to be carry internal
+ encapsulation transmitted on ring i [#accel]_.
+ - Acceleration
+
+ * - `rx[i]_gro_packets`
+ - Number of received packets processed using hardware-accelerated GRO. The
+ number of hardware GRO offloaded packets received on ring i.
+ - Acceleration
+
+ * - `rx[i]_gro_bytes`
+ - Number of received bytes processed using hardware-accelerated GRO. The
+ number of hardware GRO offloaded bytes received on ring i.
+ - Acceleration
+
+ * - `rx[i]_gro_skbs`
+ - The number of receive SKBs constructed while performing
+ hardware-accelerated GRO.
+ - Informative
+
+ * - `rx[i]_gro_match_packets`
+ - Number of received packets processed using hardware-accelerated GRO that
+ met the flow table match criteria.
+ - Informative
+
+ * - `rx[i]_gro_large_hds`
+ - Number of receive packets using hardware-accelerated GRO that have large
+ headers that require additional memory to be allocated.
+ - Informative
+
+ * - `rx[i]_lro_packets`
+ - The number of LRO packets received on ring i [#accel]_.
+ - Acceleration
+
+ * - `rx[i]_lro_bytes`
+ - The number of LRO bytes received on ring i [#accel]_.
+ - Acceleration
+
+ * - `rx[i]_ecn_mark`
+ - The number of received packets where the ECN mark was turned on.
+ - Informative
+
+ * - `rx_oversize_pkts_buffer`
+ - The number of dropped received packets due to length which arrived to RQ
+ and exceed software buffer size allocated by the device for incoming
+ traffic. It might imply that the device MTU is larger than the software
+ buffers size.
+ - Error
+
+ * - `rx_oversize_pkts_sw_drop`
+ - Number of received packets dropped in software because the CQE data is
+ larger than the MTU size.
+ - Error
+
+ * - `rx[i]_csum_unnecessary`
+ - Packets received with a `CHECKSUM_UNNECESSARY` on ring i [#accel]_.
+ - Acceleration
+
+ * - `rx[i]_csum_unnecessary_inner`
+ - Packets received with inner encapsulation with a `CHECKSUM_UNNECESSARY`
+ on ring i [#accel]_.
+ - Acceleration
+
+ * - `rx[i]_csum_none`
+ - Packets received with a `CHECKSUM_NONE` on ring i [#accel]_.
+ - Acceleration
+
+ * - `rx[i]_csum_complete`
+ - Packets received with a `CHECKSUM_COMPLETE` on ring i [#accel]_.
+ - Acceleration
+
+ * - `rx[i]_csum_complete_tail`
+ - Number of received packets that had checksum calculation computed,
+ potentially needed padding, and were able to do so with
+ `CHECKSUM_PARTIAL`.
+ - Informative
+
+ * - `rx[i]_csum_complete_tail_slow`
+ - Number of received packets that need padding larger than eight bytes for
+ the checksum.
+ - Informative
+
+ * - `tx[i]_csum_partial`
+ - Packets transmitted with a `CHECKSUM_PARTIAL` on ring i [#accel]_.
+ - Acceleration
+
+ * - `tx[i]_csum_partial_inner`
+ - Packets transmitted with inner encapsulation with a `CHECKSUM_PARTIAL` on
+ ring i [#accel]_.
+ - Acceleration
+
+ * - `tx[i]_csum_none`
+ - Packets transmitted with no hardware checksum acceleration on ring i.
+ - Informative
+
+ * - `tx[i]_stopped` / `tx_queue_stopped` [#ring_global]_
+ - Events where SQ was full on ring i. If this counter is increased, check
+ the amount of buffers allocated for transmission.
+ - Informative
+
+ * - `tx[i]_wake` / `tx_queue_wake` [#ring_global]_
+ - Events where SQ was full and has become not full on ring i.
+ - Informative
+
+ * - `tx[i]_dropped` / `tx_queue_dropped` [#ring_global]_
+ - Packets transmitted that were dropped due to DMA mapping failure on
+ ring i. If this counter is increased, check the amount of buffers
+ allocated for transmission.
+ - Error
+
+ * - `tx[i]_nop`
+ - The number of nop WQEs (empty WQEs) inserted to the SQ (related to
+ ring i) due to the reach of the end of the cyclic buffer. When reaching
+ near to the end of cyclic buffer the driver may add those empty WQEs to
+ avoid handling a state the a WQE start in the end of the queue and ends
+ in the beginning of the queue. This is a normal condition.
+ - Informative
+
+ * - `tx[i]_added_vlan_packets`
+ - The number of packets sent where vlan tag insertion was offloaded to the
+ hardware.
+ - Acceleration
+
+ * - `rx[i]_removed_vlan_packets`
+ - The number of packets received where vlan tag stripping was offloaded to
+ the hardware.
+ - Acceleration
+
+ * - `rx[i]_wqe_err`
+ - The number of wrong opcodes received on ring i.
+ - Error
+
+ * - `rx[i]_mpwqe_frag`
+ - The number of WQEs that failed to allocate compound page and hence
+ fragmented MPWQE’s (Multi Packet WQEs) were used on ring i. If this
+ counter raise, it may suggest that there is no enough memory for large
+ pages, the driver allocated fragmented pages. This is not abnormal
+ condition.
+ - Informative
+
+ * - `rx[i]_mpwqe_filler_cqes`
+ - The number of filler CQEs events that were issued on ring i.
+ - Informative
+
+ * - `rx[i]_mpwqe_filler_strides`
+ - The number of strides consumed by filler CQEs on ring i.
+ - Informative
+
+ * - `tx[i]_mpwqe_blks`
+ - The number of send blocks processed from Multi-Packet WQEs (mpwqe).
+ - Informative
+
+ * - `tx[i]_mpwqe_pkts`
+ - The number of send packets processed from Multi-Packet WQEs (mpwqe).
+ - Informative
+
+ * - `rx[i]_cqe_compress_blks`
+ - The number of receive blocks with CQE compression on ring i [#accel]_.
+ - Acceleration
+
+ * - `rx[i]_cqe_compress_pkts`
+ - The number of receive packets with CQE compression on ring i [#accel]_.
+ - Acceleration
+
+ * - `rx[i]_cache_reuse`
+ - The number of events of successful reuse of a page from a driver's
+ internal page cache.
+ - Acceleration
+
+ * - `rx[i]_cache_full`
+ - The number of events of full internal page cache where driver can't put a
+ page back to the cache for recycling (page will be freed).
+ - Acceleration
+
+ * - `rx[i]_cache_empty`
+ - The number of events where cache was empty - no page to give. Driver
+ shall allocate new page.
+ - Acceleration
+
+ * - `rx[i]_cache_busy`
+ - The number of events where cache head was busy and cannot be recycled.
+ Driver allocated new page.
+ - Acceleration
+
+ * - `rx[i]_cache_waive`
+ - The number of cache evacuation. This can occur due to page move to
+ another NUMA node or page was pfmemalloc-ed and should be freed as soon
+ as possible.
+ - Acceleration
+
+ * - `rx[i]_arfs_err`
+ - Number of flow rules that failed to be added to the flow table.
+ - Error
+
+ * - `rx[i]_recover`
+ - The number of times the RQ was recovered.
+ - Error
+
+ * - `tx[i]_xmit_more`
+ - The number of packets sent with `xmit_more` indication set on the skbuff
+ (no doorbell).
+ - Acceleration
+
+ * - `ch[i]_poll`
+ - The number of invocations of NAPI poll of channel i.
+ - Informative
+
+ * - `ch[i]_arm`
+ - The number of times the NAPI poll function completed and armed the
+ completion queues on channel i.
+ - Informative
+
+ * - `ch[i]_aff_change`
+ - The number of times the NAPI poll function explicitly stopped execution
+ on a CPU due to a change in affinity, on channel i.
+ - Informative
+
+ * - `ch[i]_events`
+ - The number of hard interrupt events on the completion queues of channel i.
+ - Informative
+
+ * - `ch[i]_eq_rearm`
+ - The number of times the EQ was recovered.
+ - Error
+
+ * - `ch[i]_force_irq`
+ - Number of times NAPI is triggered by XSK wakeups by posting a NOP to
+ ICOSQ.
+ - Acceleration
+
+ * - `rx[i]_congst_umr`
+ - The number of times an outstanding UMR request is delayed due to
+ congestion, on ring i.
+ - Informative
+
+ * - `rx_pp_alloc_fast`
+ - Number of successful fast path allocations.
+ - Informative
+
+ * - `rx_pp_alloc_slow`
+ - Number of slow path order-0 allocations.
+ - Informative
+
+ * - `rx_pp_alloc_slow_high_order`
+ - Number of slow path high order allocations.
+ - Informative
+
+ * - `rx_pp_alloc_empty`
+ - Counter is incremented when ptr ring is empty, so a slow path allocation
+ was forced.
+ - Informative
+
+ * - `rx_pp_alloc_refill`
+ - Counter is incremented when an allocation which triggered a refill of the
+ cache.
+ - Informative
+
+ * - `rx_pp_alloc_waive`
+ - Counter is incremented when pages obtained from the ptr ring that cannot
+ be added to the cache due to a NUMA mismatch.
+ - Informative
+
+ * - `rx_pp_recycle_cached`
+ - Counter is incremented when recycling placed page in the page pool cache.
+ - Informative
+
+ * - `rx_pp_recycle_cache_full`
+ - Counter is incremented when page pool cache was full.
+ - Informative
+
+ * - `rx_pp_recycle_ring`
+ - Counter is incremented when page placed into the ptr ring.
+ - Informative
+
+ * - `rx_pp_recycle_ring_full`
+ - Counter is incremented when page released from page pool because the ptr
+ ring was full.
+ - Informative
+
+ * - `rx_pp_recycle_released_ref`
+ - Counter is incremented when page released (and not recycled) because
+ refcnt > 1.
+ - Informative
+
+ * - `rx[i]_xsk_buff_alloc_err`
+ - The number of times allocating an skb or XSK buffer failed in the XSK RQ
+ context.
+ - Error
+
+ * - `rx[i]_xsk_arfs_err`
+ - aRFS (accelerated Receive Flow Steering) does not occur in the XSK RQ
+ context, so this counter should never increment.
+ - Error
+
+ * - `rx[i]_xdp_tx_xmit`
+ - The number of packets forwarded back to the port due to XDP program
+ `XDP_TX` action (bouncing). these packets are not counted by other
+ software counters. These packets are counted by physical port and vPort
+ counters.
+ - Informative
+
+ * - `rx[i]_xdp_tx_mpwqe`
+ - Number of multi-packet WQEs transmitted by the netdev and `XDP_TX`-ed by
+ the netdev during the RQ context.
+ - Acceleration
+
+ * - `rx[i]_xdp_tx_inlnw`
+ - Number of WQE data segments transmitted where the data could be inlined
+ in the WQE and then `XDP_TX`-ed during the RQ context.
+ - Acceleration
+
+ * - `rx[i]_xdp_tx_nops`
+ - Number of NOP WQEBBs (WQE building blocks) received posted to the XDP SQ.
+ - Acceleration
+
+ * - `rx[i]_xdp_tx_full`
+ - The number of packets that should have been forwarded back to the port
+ due to `XDP_TX` action but were dropped due to full tx queue. These packets
+ are not counted by other software counters. These packets are counted by
+ physical port and vPort counters. You may open more rx queues and spread
+ traffic rx over all queues and/or increase rx ring size.
+ - Error
+
+ * - `rx[i]_xdp_tx_err`
+ - The number of times an `XDP_TX` error such as frame too long and frame
+ too short occurred on `XDP_TX` ring of RX ring.
+ - Error
+
+ * - `rx[i]_xdp_tx_cqes` / `rx_xdp_tx_cqe` [#ring_global]_
+ - The number of completions received on the CQ of the `XDP_TX` ring.
+ - Informative
+
+ * - `rx[i]_xdp_drop`
+ - The number of packets dropped due to XDP program `XDP_DROP` action. these
+ packets are not counted by other software counters. These packets are
+ counted by physical port and vPort counters.
+ - Informative
+
+ * - `rx[i]_xdp_redirect`
+ - The number of times an XDP redirect action was triggered on ring i.
+ - Acceleration
+
+ * - `tx[i]_xdp_xmit`
+ - The number of packets redirected to the interface(due to XDP redirect).
+ These packets are not counted by other software counters. These packets
+ are counted by physical port and vPort counters.
+ - Informative
+
+ * - `tx[i]_xdp_full`
+ - The number of packets redirected to the interface(due to XDP redirect),
+ but were dropped due to full tx queue. these packets are not counted by
+ other software counters. you may enlarge tx queues.
+ - Informative
+
+ * - `tx[i]_xdp_mpwqe`
+ - Number of multi-packet WQEs offloaded onto the NIC that were
+ `XDP_REDIRECT`-ed from other netdevs.
+ - Acceleration
+
+ * - `tx[i]_xdp_inlnw`
+ - Number of WQE data segments where the data could be inlined in the WQE
+ where the data segments were `XDP_REDIRECT`-ed from other netdevs.
+ - Acceleration
+
+ * - `tx[i]_xdp_nops`
+ - Number of NOP WQEBBs (WQE building blocks) posted to the SQ that were
+ `XDP_REDIRECT`-ed from other netdevs.
+ - Acceleration
+
+ * - `tx[i]_xdp_err`
+ - The number of packets redirected to the interface(due to XDP redirect)
+ but were dropped due to error such as frame too long and frame too short.
+ - Error
+
+ * - `tx[i]_xdp_cqes`
+ - The number of completions received for packets redirected to the
+ interface(due to XDP redirect) on the CQ.
+ - Informative
+
+ * - `tx[i]_xsk_xmit`
+ - The number of packets transmitted using XSK zerocopy functionality.
+ - Acceleration
+
+ * - `tx[i]_xsk_mpwqe`
+ - Number of multi-packet WQEs offloaded onto the NIC that were
+ `XDP_REDIRECT`-ed from other netdevs.
+ - Acceleration
+
+ * - `tx[i]_xsk_inlnw`
+ - Number of WQE data segments where the data could be inlined in the WQE
+ that are transmitted using XSK zerocopy.
+ - Acceleration
+
+ * - `tx[i]_xsk_full`
+ - Number of times doorbell is rung in XSK zerocopy mode when SQ is full.
+ - Error
+
+ * - `tx[i]_xsk_err`
+ - Number of errors that occurred in XSK zerocopy mode such as if the data
+ size is larger than the MTU size.
+ - Error
+
+ * - `tx[i]_xsk_cqes`
+ - Number of CQEs processed in XSK zerocopy mode.
+ - Acceleration
+
+ * - `tx_tls_ctx`
+ - Number of TLS TX HW offload contexts added to device for encryption.
+ - Acceleration
+
+ * - `tx_tls_del`
+ - Number of TLS TX HW offload contexts removed from device (connection
+ closed).
+ - Acceleration
+
+ * - `tx_tls_pool_alloc`
+ - Number of times a unit of work is successfully allocated in the TLS HW
+ offload pool.
+ - Acceleration
+
+ * - `tx_tls_pool_free`
+ - Number of times a unit of work is freed in the TLS HW offload pool.
+ - Acceleration
+
+ * - `rx_tls_ctx`
+ - Number of TLS RX HW offload contexts added to device for decryption.
+ - Acceleration
+
+ * - `rx_tls_del`
+ - Number of TLS RX HW offload contexts deleted from device (connection has
+ finished).
+ - Acceleration
+
+ * - `rx[i]_tls_decrypted_packets`
+ - Number of successfully decrypted RX packets which were part of a TLS
+ stream.
+ - Acceleration
+
+ * - `rx[i]_tls_decrypted_bytes`
+ - Number of TLS payload bytes in RX packets which were successfully
+ decrypted.
+ - Acceleration
+
+ * - `rx[i]_tls_resync_req_pkt`
+ - Number of received TLS packets with a resync request.
+ - Acceleration
+
+ * - `rx[i]_tls_resync_req_start`
+ - Number of times the TLS async resync request was started.
+ - Acceleration
+
+ * - `rx[i]_tls_resync_req_end`
+ - Number of times the TLS async resync request properly ended with
+ providing the HW tracked tcp-seq.
+ - Acceleration
+
+ * - `rx[i]_tls_resync_req_skip`
+ - Number of times the TLS async resync request procedure was started but
+ not properly ended.
+ - Error
+
+ * - `rx[i]_tls_resync_res_ok`
+ - Number of times the TLS resync response call to the driver was
+ successfully handled.
+ - Acceleration
+
+ * - `rx[i]_tls_resync_res_retry`
+ - Number of times the TLS resync response call to the driver was
+ reattempted when ICOSQ is full.
+ - Error
+
+ * - `rx[i]_tls_resync_res_skip`
+ - Number of times the TLS resync response call to the driver was terminated
+ unsuccessfully.
+ - Error
+
+ * - `rx[i]_tls_err`
+ - Number of times when CQE TLS offload was problematic.
+ - Error
+
+ * - `tx[i]_tls_encrypted_packets`
+ - The number of send packets that are TLS encrypted by the kernel.
+ - Acceleration
+
+ * - `tx[i]_tls_encrypted_bytes`
+ - The number of send bytes that are TLS encrypted by the kernel.
+ - Acceleration
+
+ * - `tx[i]_tls_ooo`
+ - Number of times out of order TLS SQE fragments were handled on ring i.
+ - Acceleration
+
+ * - `tx[i]_tls_dump_packets`
+ - Number of TLS decrypted packets copied over from NIC over DMA.
+ - Acceleration
+
+ * - `tx[i]_tls_dump_bytes`
+ - Number of TLS decrypted bytes copied over from NIC over DMA.
+ - Acceleration
+
+ * - `tx[i]_tls_resync_bytes`
+ - Number of TLS bytes requested to be resynchronized in order to be
+ decrypted.
+ - Acceleration
+
+ * - `tx[i]_tls_skip_no_sync_data`
+ - Number of TLS send data that can safely be skipped / do not need to be
+ decrypted.
+ - Acceleration
+
+ * - `tx[i]_tls_drop_no_sync_data`
+ - Number of TLS send data that were dropped due to retransmission of TLS
+ data.
+ - Acceleration
+
+ * - `ptp_cq[i]_abort`
+ - Number of times a CQE has to be skipped in precision time protocol due to
+ a skew between the port timestamp and CQE timestamp being greater than
+ 128 seconds.
+ - Error
+
+ * - `ptp_cq[i]_abort_abs_diff_ns`
+ - Accumulation of time differences between the port timestamp and CQE
+ timestamp when the difference is greater than 128 seconds in precision
+ time protocol.
+ - Error
+
+.. [#ring_global] The corresponding ring and global counters do not share the
+ same name (i.e. do not follow the common naming scheme).
+
+vPort Counters
+--------------
+Counters on the NIC port that is connected to a eSwitch.
+
+.. flat-table:: vPort Counter Table
+ :widths: 2 3 1
+
+ * - Counter
+ - Description
+ - Type
+
+ * - `rx_vport_unicast_packets`
+ - Unicast packets received, steered to a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `rx_vport_unicast_bytes`
+ - Unicast bytes received, steered to a port including Raw Ethernet QP/DPDK
+ traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `tx_vport_unicast_packets`
+ - Unicast packets transmitted, steered from a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `tx_vport_unicast_bytes`
+ - Unicast bytes transmitted, steered from a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `rx_vport_multicast_packets`
+ - Multicast packets received, steered to a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `rx_vport_multicast_bytes`
+ - Multicast bytes received, steered to a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `tx_vport_multicast_packets`
+ - Multicast packets transmitted, steered from a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `tx_vport_multicast_bytes`
+ - Multicast bytes transmitted, steered from a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `rx_vport_broadcast_packets`
+ - Broadcast packets received, steered to a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `rx_vport_broadcast_bytes`
+ - Broadcast bytes received, steered to a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `tx_vport_broadcast_packets`
+ - Broadcast packets transmitted, steered from a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `tx_vport_broadcast_bytes`
+ - Broadcast bytes transmitted, steered from a port including Raw Ethernet
+ QP/DPDK traffic, excluding RDMA traffic.
+ - Informative
+
+ * - `rx_vport_rdma_unicast_packets`
+ - RDMA unicast packets received, steered to a port (counters counts
+ RoCE/UD/RC traffic) [#accel]_.
+ - Acceleration
+
+ * - `rx_vport_rdma_unicast_bytes`
+ - RDMA unicast bytes received, steered to a port (counters counts
+ RoCE/UD/RC traffic) [#accel]_.
+ - Acceleration
+
+ * - `tx_vport_rdma_unicast_packets`
+ - RDMA unicast packets transmitted, steered from a port (counters counts
+ RoCE/UD/RC traffic) [#accel]_.
+ - Acceleration
+
+ * - `tx_vport_rdma_unicast_bytes`
+ - RDMA unicast bytes transmitted, steered from a port (counters counts
+ RoCE/UD/RC traffic) [#accel]_.
+ - Acceleration
+
+ * - `rx_vport_rdma_multicast_packets`
+ - RDMA multicast packets received, steered to a port (counters counts
+ RoCE/UD/RC traffic) [#accel]_.
+ - Acceleration
+
+ * - `rx_vport_rdma_multicast_bytes`
+ - RDMA multicast bytes received, steered to a port (counters counts
+ RoCE/UD/RC traffic) [#accel]_.
+ - Acceleration
+
+ * - `tx_vport_rdma_multicast_packets`
+ - RDMA multicast packets transmitted, steered from a port (counters counts
+ RoCE/UD/RC traffic) [#accel]_.
+ - Acceleration
+
+ * - `tx_vport_rdma_multicast_bytes`
+ - RDMA multicast bytes transmitted, steered from a port (counters counts
+ RoCE/UD/RC traffic) [#accel]_.
+ - Acceleration
+
+ * - `rx_steer_missed_packets`
+ - Number of packets that was received by the NIC, however was discarded
+ because it did not match any flow in the NIC flow table.
+ - Error
+
+ * - `rx_packets`
+ - Representor only: packets received, that were handled by the hypervisor.
+ - Informative
+
+ * - `rx_bytes`
+ - Representor only: bytes received, that were handled by the hypervisor.
+ - Informative
+
+ * - `tx_packets`
+ - Representor only: packets transmitted, that were handled by the
+ hypervisor.
+ - Informative
+
+ * - `tx_bytes`
+ - Representor only: bytes transmitted, that were handled by the hypervisor.
+ - Informative
+
+ * - `dev_internal_queue_oob`
+ - The number of dropped packets due to lack of receive WQEs for an internal
+ device RQ.
+ - Error
+
+Physical Port Counters
+----------------------
+The physical port counters are the counters on the external port connecting the
+adapter to the network. This measuring point holds information on standardized
+counters like IEEE 802.3, RFC2863, RFC 2819, RFC 3635 and additional counters
+like flow control, FEC and more.
+
+.. flat-table:: Physical Port Counter Table
+ :widths: 2 3 1
+
+ * - Counter
+ - Description
+ - Type
+
+ * - `rx_packets_phy`
+ - The number of packets received on the physical port. This counter doesn’t
+ include packets that were discarded due to FCS, frame size and similar
+ errors.
+ - Informative
+
+ * - `tx_packets_phy`
+ - The number of packets transmitted on the physical port.
+ - Informative
+
+ * - `rx_bytes_phy`
+ - The number of bytes received on the physical port, including Ethernet
+ header and FCS.
+ - Informative
+
+ * - `tx_bytes_phy`
+ - The number of bytes transmitted on the physical port.
+ - Informative
+
+ * - `rx_multicast_phy`
+ - The number of multicast packets received on the physical port.
+ - Informative
+
+ * - `tx_multicast_phy`
+ - The number of multicast packets transmitted on the physical port.
+ - Informative
+
+ * - `rx_broadcast_phy`
+ - The number of broadcast packets received on the physical port.
+ - Informative
+
+ * - `tx_broadcast_phy`
+ - The number of broadcast packets transmitted on the physical port.
+ - Informative
+
+ * - `rx_crc_errors_phy`
+ - The number of dropped received packets due to FCS (Frame Check Sequence)
+ error on the physical port. If this counter is increased in high rate,
+ check the link quality using `rx_symbol_error_phy` and
+ `rx_corrected_bits_phy` counters below.
+ - Error
+
+ * - `rx_in_range_len_errors_phy`
+ - The number of received packets dropped due to length/type errors on a
+ physical port.
+ - Error
+
+ * - `rx_out_of_range_len_phy`
+ - The number of received packets dropped due to length greater than allowed
+ on a physical port. If this counter is increasing, it implies that the
+ peer connected to the adapter has a larger MTU configured. Using same MTU
+ configuration shall resolve this issue.
+ - Error
+
+ * - `rx_oversize_pkts_phy`
+ - The number of dropped received packets due to length which exceed MTU
+ size on a physical port. If this counter is increasing, it implies that
+ the peer connected to the adapter has a larger MTU configured. Using same
+ MTU configuration shall resolve this issue.
+ - Error
+
+ * - `rx_symbol_err_phy`
+ - The number of received packets dropped due to physical coding errors
+ (symbol errors) on a physical port.
+ - Error
+
+ * - `rx_mac_control_phy`
+ - The number of MAC control packets received on the physical port.
+ - Informative
+
+ * - `tx_mac_control_phy`
+ - The number of MAC control packets transmitted on the physical port.
+ - Informative
+
+ * - `rx_pause_ctrl_phy`
+ - The number of link layer pause packets received on a physical port. If
+ this counter is increasing, it implies that the network is congested and
+ cannot absorb the traffic coming from to the adapter.
+ - Informative
+
+ * - `tx_pause_ctrl_phy`
+ - The number of link layer pause packets transmitted on a physical port. If
+ this counter is increasing, it implies that the NIC is congested and
+ cannot absorb the traffic coming from the network.
+ - Informative
+
+ * - `rx_unsupported_op_phy`
+ - The number of MAC control packets received with unsupported opcode on a
+ physical port.
+ - Error
+
+ * - `rx_discards_phy`
+ - The number of received packets dropped due to lack of buffers on a
+ physical port. If this counter is increasing, it implies that the adapter
+ is congested and cannot absorb the traffic coming from the network.
+ - Error
+
+ * - `tx_discards_phy`
+ - The number of packets which were discarded on transmission, even no
+ errors were detected. the drop might occur due to link in down state,
+ head of line drop, pause from the network, etc.
+ - Error
+
+ * - `tx_errors_phy`
+ - The number of transmitted packets dropped due to a length which exceed
+ MTU size on a physical port.
+ - Error
+
+ * - `rx_undersize_pkts_phy`
+ - The number of received packets dropped due to length which is shorter
+ than 64 bytes on a physical port. If this counter is increasing, it
+ implies that the peer connected to the adapter has a non-standard MTU
+ configured or malformed packet had arrived.
+ - Error
+
+ * - `rx_fragments_phy`
+ - The number of received packets dropped due to a length which is shorter
+ than 64 bytes and has FCS error on a physical port. If this counter is
+ increasing, it implies that the peer connected to the adapter has a
+ non-standard MTU configured.
+ - Error
+
+ * - `rx_jabbers_phy`
+ - The number of received packets d due to a length which is longer than 64
+ bytes and had FCS error on a physical port.
+ - Error
+
+ * - `rx_64_bytes_phy`
+ - The number of packets received on the physical port with size of 64 bytes.
+ - Informative
+
+ * - `rx_65_to_127_bytes_phy`
+ - The number of packets received on the physical port with size of 65 to
+ 127 bytes.
+ - Informative
+
+ * - `rx_128_to_255_bytes_phy`
+ - The number of packets received on the physical port with size of 128 to
+ 255 bytes.
+ - Informative
+
+ * - `rx_256_to_511_bytes_phy`
+ - The number of packets received on the physical port with size of 256 to
+ 512 bytes.
+ - Informative
+
+ * - `rx_512_to_1023_bytes_phy`
+ - The number of packets received on the physical port with size of 512 to
+ 1023 bytes.
+ - Informative
+
+ * - `rx_1024_to_1518_bytes_phy`
+ - The number of packets received on the physical port with size of 1024 to
+ 1518 bytes.
+ - Informative
+
+ * - `rx_1519_to_2047_bytes_phy`
+ - The number of packets received on the physical port with size of 1519 to
+ 2047 bytes.
+ - Informative
+
+ * - `rx_2048_to_4095_bytes_phy`
+ - The number of packets received on the physical port with size of 2048 to
+ 4095 bytes.
+ - Informative
+
+ * - `rx_4096_to_8191_bytes_phy`
+ - The number of packets received on the physical port with size of 4096 to
+ 8191 bytes.
+ - Informative
+
+ * - `rx_8192_to_10239_bytes_phy`
+ - The number of packets received on the physical port with size of 8192 to
+ 10239 bytes.
+ - Informative
+
+ * - `link_down_events_phy`
+ - The number of times where the link operative state changed to down. In
+ case this counter is increasing it may imply on port flapping. You may
+ need to replace the cable/transceiver.
+ - Error
+
+ * - `rx_out_of_buffer`
+ - Number of times receive queue had no software buffers allocated for the
+ adapter's incoming traffic.
+ - Error
+
+ * - `module_bus_stuck`
+ - The number of times that module's I\ :sup:`2`\C bus (data or clock)
+ short-wire was detected. You may need to replace the cable/transceiver.
+ - Error
+
+ * - `module_high_temp`
+ - The number of times that the module temperature was too high. If this
+ issue persist, you may need to check the ambient temperature or replace
+ the cable/transceiver module.
+ - Error
+
+ * - `module_bad_shorted`
+ - The number of times that the module cables were shorted. You may need to
+ replace the cable/transceiver module.
+ - Error
+
+ * - `module_unplug`
+ - The number of times that module was ejected.
+ - Informative
+
+ * - `rx_buffer_passed_thres_phy`
+ - The number of events where the port receive buffer was over 85% full.
+ - Informative
+
+ * - `tx_pause_storm_warning_events`
+ - The number of times the device was sending pauses for a long period of
+ time.
+ - Informative
+
+ * - `tx_pause_storm_error_events`
+ - The number of times the device was sending pauses for a long period of
+ time, reaching time out and disabling transmission of pause frames. on
+ the period where pause frames were disabled, drop could have been
+ occurred.
+ - Error
+
+ * - `rx[i]_buff_alloc_err`
+ - Failed to allocate a buffer to received packet (or SKB) on ring i.
+ - Error
+
+ * - `rx_bits_phy`
+ - This counter provides information on the total amount of traffic that
+ could have been received and can be used as a guideline to measure the
+ ratio of errored traffic in `rx_pcs_symbol_err_phy` and
+ `rx_corrected_bits_phy`.
+ - Informative
+
+ * - `rx_pcs_symbol_err_phy`
+ - This counter counts the number of symbol errors that wasn’t corrected by
+ FEC correction algorithm or that FEC algorithm was not active on this
+ interface. If this counter is increasing, it implies that the link
+ between the NIC and the network is suffering from high BER, and that
+ traffic is lost. You may need to replace the cable/transceiver. The error
+ rate is the number of `rx_pcs_symbol_err_phy` divided by the number of
+ `rx_bits_phy` on a specific time frame.
+ - Error
+
+ * - `rx_corrected_bits_phy`
+ - The number of corrected bits on this port according to active FEC
+ (RS/FC). If this counter is increasing, it implies that the link between
+ the NIC and the network is suffering from high BER. The corrected bit
+ rate is the number of `rx_corrected_bits_phy` divided by the number of
+ `rx_bits_phy` on a specific time frame.
+ - Error
+
+ * - `rx_err_lane_[l]_phy`
+ - This counter counts the number of physical raw errors per lane l index.
+ The counter counts errors before FEC corrections. If this counter is
+ increasing, it implies that the link between the NIC and the network is
+ suffering from high BER, and that traffic might be lost. You may need to
+ replace the cable/transceiver. Please check in accordance with
+ `rx_corrected_bits_phy`.
+ - Error
+
+ * - `rx_global_pause`
+ - The number of pause packets received on the physical port. If this
+ counter is increasing, it implies that the network is congested and
+ cannot absorb the traffic coming from the adapter. Note: This counter is
+ only enabled when global pause mode is enabled.
+ - Informative
+
+ * - `rx_global_pause_duration`
+ - The duration of pause received (in microSec) on the physical port. The
+ counter represents the time the port did not send any traffic. If this
+ counter is increasing, it implies that the network is congested and
+ cannot absorb the traffic coming from the adapter. Note: This counter is
+ only enabled when global pause mode is enabled.
+ - Informative
+
+ * - `tx_global_pause`
+ - The number of pause packets transmitted on a physical port. If this
+ counter is increasing, it implies that the adapter is congested and
+ cannot absorb the traffic coming from the network. Note: This counter is
+ only enabled when global pause mode is enabled.
+ - Informative
+
+ * - `tx_global_pause_duration`
+ - The duration of pause transmitter (in microSec) on the physical port.
+ Note: This counter is only enabled when global pause mode is enabled.
+ - Informative
+
+ * - `rx_global_pause_transition`
+ - The number of times a transition from Xoff to Xon on the physical port
+ has occurred. Note: This counter is only enabled when global pause mode
+ is enabled.
+ - Informative
+
+ * - `rx_if_down_packets`
+ - The number of received packets that were dropped due to interface down.
+ - Informative
+
+Priority Port Counters
+----------------------
+The following counters are physical port counters that are counted per L2
+priority (0-7).
+
+**Note:** `p` in the counter name represents the priority.
+
+.. flat-table:: Priority Port Counter Table
+ :widths: 2 3 1
+
+ * - Counter
+ - Description
+ - Type
+
+ * - `rx_prio[p]_bytes`
+ - The number of bytes received with priority p on the physical port.
+ - Informative
+
+ * - `rx_prio[p]_packets`
+ - The number of packets received with priority p on the physical port.
+ - Informative
+
+ * - `tx_prio[p]_bytes`
+ - The number of bytes transmitted on priority p on the physical port.
+ - Informative
+
+ * - `tx_prio[p]_packets`
+ - The number of packets transmitted on priority p on the physical port.
+ - Informative
+
+ * - `rx_prio[p]_pause`
+ - The number of pause packets received with priority p on a physical port.
+ If this counter is increasing, it implies that the network is congested
+ and cannot absorb the traffic coming from the adapter. Note: This counter
+ is available only if PFC was enabled on priority p.
+ - Informative
+
+ * - `rx_prio[p]_pause_duration`
+ - The duration of pause received (in microSec) on priority p on the
+ physical port. The counter represents the time the port did not send any
+ traffic on this priority. If this counter is increasing, it implies that
+ the network is congested and cannot absorb the traffic coming from the
+ adapter. Note: This counter is available only if PFC was enabled on
+ priority p.
+ - Informative
+
+ * - `rx_prio[p]_pause_transition`
+ - The number of times a transition from Xoff to Xon on priority p on the
+ physical port has occurred. Note: This counter is available only if PFC
+ was enabled on priority p.
+ - Informative
+
+ * - `tx_prio[p]_pause`
+ - The number of pause packets transmitted on priority p on a physical port.
+ If this counter is increasing, it implies that the adapter is congested
+ and cannot absorb the traffic coming from the network. Note: This counter
+ is available only if PFC was enabled on priority p.
+ - Informative
+
+ * - `tx_prio[p]_pause_duration`
+ - The duration of pause transmitter (in microSec) on priority p on the
+ physical port. Note: This counter is available only if PFC was enabled on
+ priority p.
+ - Informative
+
+ * - `rx_prio[p]_buf_discard`
+ - The number of packets discarded by device due to lack of per host receive
+ buffers.
+ - Informative
+
+ * - `rx_prio[p]_cong_discard`
+ - The number of packets discarded by device due to per host congestion.
+ - Informative
+
+ * - `rx_prio[p]_marked`
+ - The number of packets ecn marked by device due to per host congestion.
+ - Informative
+
+ * - `rx_prio[p]_discards`
+ - The number of packets discarded by device due to lack of receive buffers.
+ - Informative
+
+Device Counters
+---------------
+.. flat-table:: Device Counter Table
+ :widths: 2 3 1
+
+ * - Counter
+ - Description
+ - Type
+
+ * - `rx_pci_signal_integrity`
+ - Counts physical layer PCIe signal integrity errors, the number of
+ transitions to recovery due to Framing errors and CRC (dlp and tlp). If
+ this counter is raising, try moving the adapter card to a different slot
+ to rule out a bad PCI slot. Validate that you are running with the latest
+ firmware available and latest server BIOS version.
+ - Error
+
+ * - `tx_pci_signal_integrity`
+ - Counts physical layer PCIe signal integrity errors, the number of
+ transition to recovery initiated by the other side (moving to recovery
+ due to getting TS/EIEOS). If this counter is raising, try moving the
+ adapter card to a different slot to rule out a bad PCI slot. Validate
+ that you are running with the latest firmware available and latest server
+ BIOS version.
+ - Error
+
+ * - `outbound_pci_buffer_overflow`
+ - The number of packets dropped due to pci buffer overflow. If this counter
+ is raising in high rate, it might indicate that the receive traffic rate
+ for a host is larger than the PCIe bus and therefore a congestion occurs.
+ - Informative
+
+ * - `outbound_pci_stalled_rd`
+ - The percentage (in the range 0...100) of time within the last second that
+ the NIC had outbound non-posted reads requests but could not perform the
+ operation due to insufficient posted credits.
+ - Informative
+
+ * - `outbound_pci_stalled_wr`
+ - The percentage (in the range 0...100) of time within the last second that
+ the NIC had outbound posted writes requests but could not perform the
+ operation due to insufficient posted credits.
+ - Informative
+
+ * - `outbound_pci_stalled_rd_events`
+ - The number of seconds where `outbound_pci_stalled_rd` was above 30%.
+ - Informative
+
+ * - `outbound_pci_stalled_wr_events`
+ - The number of seconds where `outbound_pci_stalled_wr` was above 30%.
+ - Informative
+
+ * - `dev_out_of_buffer`
+ - The number of times the device owned queue had not enough buffers
+ allocated.
+ - Error
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
new file mode 100644
index 000000000000..9b5c40ba7f0d
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
@@ -0,0 +1,224 @@
+.. SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+.. include:: <isonum.txt>
+
+=======
+Devlink
+=======
+
+:Copyright: |copy| 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+Contents
+========
+
+- `Info`_
+- `Parameters`_
+- `Health reporters`_
+
+Info
+====
+
+The devlink info reports the running and stored firmware versions on device.
+It also prints the device PSID which represents the HCA board type ID.
+
+User command example::
+
+ $ devlink dev info pci/0000:00:06.0
+ pci/0000:00:06.0:
+ driver mlx5_core
+ versions:
+ fixed:
+ fw.psid MT_0000000009
+ running:
+ fw.version 16.26.0100
+ stored:
+ fw.version 16.26.0100
+
+Parameters
+==========
+
+flow_steering_mode: Device flow steering mode
+---------------------------------------------
+The flow steering mode parameter controls the flow steering mode of the driver.
+Two modes are supported:
+1. 'dmfs' - Device managed flow steering.
+2. 'smfs' - Software/Driver managed flow steering.
+
+In DMFS mode, the HW steering entities are created and managed through the
+Firmware.
+In SMFS mode, the HW steering entities are created and managed though by
+the driver directly into hardware without firmware intervention.
+
+SMFS mode is faster and provides better rule insertion rate compared to default DMFS mode.
+
+User command examples:
+
+- Set SMFS flow steering mode::
+
+ $ devlink dev param set pci/0000:06:00.0 name flow_steering_mode value "smfs" cmode runtime
+
+- Read device flow steering mode::
+
+ $ devlink dev param show pci/0000:06:00.0 name flow_steering_mode
+ pci/0000:06:00.0:
+ name flow_steering_mode type driver-specific
+ values:
+ cmode runtime value smfs
+
+enable_roce: RoCE enablement state
+----------------------------------
+If the device supports RoCE disablement, RoCE enablement state controls device
+support for RoCE capability. Otherwise, the control occurs in the driver stack.
+When RoCE is disabled at the driver level, only raw ethernet QPs are supported.
+
+To change RoCE enablement state, a user must change the driverinit cmode value
+and run devlink reload.
+
+User command examples:
+
+- Disable RoCE::
+
+ $ devlink dev param set pci/0000:06:00.0 name enable_roce value false cmode driverinit
+ $ devlink dev reload pci/0000:06:00.0
+
+- Read RoCE enablement state::
+
+ $ devlink dev param show pci/0000:06:00.0 name enable_roce
+ pci/0000:06:00.0:
+ name enable_roce type generic
+ values:
+ cmode driverinit value true
+
+esw_port_metadata: Eswitch port metadata state
+----------------------------------------------
+When applicable, disabling eswitch metadata can increase packet rate
+up to 20% depending on the use case and packet sizes.
+
+Eswitch port metadata state controls whether to internally tag packets with
+metadata. Metadata tagging must be enabled for multi-port RoCE, failover
+between representors and stacked devices.
+By default metadata is enabled on the supported devices in E-switch.
+Metadata is applicable only for E-switch in switchdev mode and
+users may disable it when NONE of the below use cases will be in use:
+1. HCA is in Dual/multi-port RoCE mode.
+2. VF/SF representor bonding (Usually used for Live migration)
+3. Stacked devices
+
+When metadata is disabled, the above use cases will fail to initialize if
+users try to enable them.
+
+- Show eswitch port metadata::
+
+ $ devlink dev param show pci/0000:06:00.0 name esw_port_metadata
+ pci/0000:06:00.0:
+ name esw_port_metadata type driver-specific
+ values:
+ cmode runtime value true
+
+- Disable eswitch port metadata::
+
+ $ devlink dev param set pci/0000:06:00.0 name esw_port_metadata value false cmode runtime
+
+- Change eswitch mode to switchdev mode where after choosing the metadata value::
+
+ $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
+
+Health reporters
+================
+
+tx reporter
+-----------
+The tx reporter is responsible for reporting and recovering of the following two error scenarios:
+
+- tx timeout
+ Report on kernel tx timeout detection.
+ Recover by searching lost interrupts.
+- tx error completion
+ Report on error tx completion.
+ Recover by flushing the tx queue and reset it.
+
+tx reporter also support on demand diagnose callback, on which it provides
+real time information of its send queues status.
+
+User commands examples:
+
+- Diagnose send queues status::
+
+ $ devlink health diagnose pci/0000:82:00.0 reporter tx
+
+NOTE: This command has valid output only when interface is up, otherwise the command has empty output.
+
+- Show number of tx errors indicated, number of recover flows ended successfully,
+ is autorecover enabled and graceful period from last recover::
+
+ $ devlink health show pci/0000:82:00.0 reporter tx
+
+rx reporter
+-----------
+The rx reporter is responsible for reporting and recovering of the following two error scenarios:
+
+- rx queues' initialization (population) timeout
+ Population of rx queues' descriptors on ring initialization is done
+ in napi context via triggering an irq. In case of a failure to get
+ the minimum amount of descriptors, a timeout would occur, and
+ descriptors could be recovered by polling the EQ (Event Queue).
+- rx completions with errors (reported by HW on interrupt context)
+ Report on rx completion error.
+ Recover (if needed) by flushing the related queue and reset it.
+
+rx reporter also supports on demand diagnose callback, on which it
+provides real time information of its receive queues' status.
+
+- Diagnose rx queues' status and corresponding completion queue::
+
+ $ devlink health diagnose pci/0000:82:00.0 reporter rx
+
+NOTE: This command has valid output only when interface is up. Otherwise, the command has empty output.
+
+- Show number of rx errors indicated, number of recover flows ended successfully,
+ is autorecover enabled, and graceful period from last recover::
+
+ $ devlink health show pci/0000:82:00.0 reporter rx
+
+fw reporter
+-----------
+The fw reporter implements `diagnose` and `dump` callbacks.
+It follows symptoms of fw error such as fw syndrome by triggering
+fw core dump and storing it into the dump buffer.
+The fw reporter diagnose command can be triggered any time by the user to check
+current fw status.
+
+User commands examples:
+
+- Check fw heath status::
+
+ $ devlink health diagnose pci/0000:82:00.0 reporter fw
+
+- Read FW core dump if already stored or trigger new one::
+
+ $ devlink health dump show pci/0000:82:00.0 reporter fw
+
+NOTE: This command can run only on the PF which has fw tracer ownership,
+running it on other PF or any VF will return "Operation not permitted".
+
+fw fatal reporter
+-----------------
+The fw fatal reporter implements `dump` and `recover` callbacks.
+It follows fatal errors indications by CR-space dump and recover flow.
+The CR-space dump uses vsc interface which is valid even if the FW command
+interface is not functional, which is the case in most FW fatal errors.
+The recover function runs recover flow which reloads the driver and triggers fw
+reset if needed.
+On firmware error, the health buffer is dumped into the dmesg. The log
+level is derived from the error's severity (given in health buffer).
+
+User commands examples:
+
+- Run fw recover flow manually::
+
+ $ devlink health recover pci/0000:82:00.0 reporter fw_fatal
+
+- Read FW CR-space dump if already stored or trigger new one::
+
+ $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal
+
+NOTE: This command can run only on PF.
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/index.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/index.rst
new file mode 100644
index 000000000000..3fdcd6b61ccf
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/index.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+.. include:: <isonum.txt>
+
+Mellanox ConnectX(R) mlx5 core VPI Network Driver
+=================================================
+
+:Copyright: |copy| 2019, Mellanox Technologies LTD.
+:Copyright: |copy| 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+Contents:
+
+.. toctree::
+ :maxdepth: 2
+
+ kconfig
+ devlink
+ switchdev
+ tracepoints
+ counters
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst
new file mode 100644
index 000000000000..43b1f7e87ec4
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst
@@ -0,0 +1,168 @@
+.. SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+.. include:: <isonum.txt>
+
+=======================================
+Enabling the driver and kconfig options
+=======================================
+
+:Copyright: |copy| 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+| mlx5 core is modular and most of the major mlx5 core driver features can be selected (compiled in/out)
+| at build time via kernel Kconfig flags.
+| Basic features, ethernet net device rx/tx offloads and XDP, are available with the most basic flags
+| CONFIG_MLX5_CORE=y/m and CONFIG_MLX5_CORE_EN=y.
+| For the list of advanced features, please see below.
+
+**CONFIG_MLX5_BRIDGE=(y/n)**
+
+| Enable :ref:`Ethernet Bridging (BRIDGE) offloading support <mlx5_bridge_offload>`.
+| This will provide the ability to add representors of mlx5 uplink and VF
+| ports to Bridge and offloading rules for traffic between such ports.
+| Supports VLANs (trunk and access modes).
+
+
+**CONFIG_MLX5_CORE=(y/m/n)** (module mlx5_core.ko)
+
+| The driver can be enabled by choosing CONFIG_MLX5_CORE=y/m in kernel config.
+| This will provide mlx5 core driver for mlx5 ulps to interface with (mlx5e, mlx5_ib).
+
+
+**CONFIG_MLX5_CORE_EN=(y/n)**
+
+| Choosing this option will allow basic ethernet netdevice support with all of the standard rx/tx offloads.
+| mlx5e is the mlx5 ulp driver which provides netdevice kernel interface, when chosen, mlx5e will be
+| built-in into mlx5_core.ko.
+
+
+**CONFIG_MLX5_CORE_EN_DCB=(y/n)**:
+
+| Enables `Data Center Bridging (DCB) Support <https://community.mellanox.com/s/article/howto-auto-config-pfc-and-ets-on-connectx-4-via-lldp-dcbx>`_.
+
+
+**CONFIG_MLX5_CORE_IPOIB=(y/n)**
+
+| IPoIB offloads & acceleration support.
+| Requires CONFIG_MLX5_CORE_EN to provide an accelerated interface for the rdma
+| IPoIB ulp netdevice.
+
+
+**CONFIG_MLX5_CLS_ACT=(y/n)**
+
+| Enables offload support for TC classifier action (NET_CLS_ACT).
+| Works in both native NIC mode and Switchdev SRIOV mode.
+| Flow-based classifiers, such as those registered through
+| `tc-flower(8)`, are processed by the device, rather than the
+| host. Actions that would then overwrite matching classification
+| results would then be instant due to the offload.
+
+
+**CONFIG_MLX5_EN_ARFS=(y/n)**
+
+| Enables Hardware-accelerated receive flow steering (arfs) support, and ntuple filtering.
+| https://community.mellanox.com/s/article/howto-configure-arfs-on-connectx-4
+
+
+**CONFIG_MLX5_EN_IPSEC=(y/n)**
+
+| Enables `IPSec XFRM cryptography-offload acceleration <https://support.mellanox.com/s/article/ConnectX-6DX-Bluefield-2-IPsec-HW-Full-Offload-Configuration-Guide>`_.
+
+
+**CONFIG_MLX5_EN_MACSEC=(y/n)**
+
+| Build support for MACsec cryptography-offload acceleration in the NIC.
+
+
+**CONFIG_MLX5_EN_RXNFC=(y/n)**
+
+| Enables ethtool receive network flow classification, which allows user defined
+| flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc API.
+
+
+**CONFIG_MLX5_EN_TLS=(y/n)**
+
+| TLS cryptography-offload acceleration.
+
+
+**CONFIG_MLX5_ESWITCH=(y/n)**
+
+| Ethernet SRIOV E-Switch support in ConnectX NIC. E-Switch provides internal SRIOV packet steering
+| and switching for the enabled VFs and PF in two available modes:
+| 1) `Legacy SRIOV mode (L2 mac vlan steering based) <https://community.mellanox.com/s/article/howto-configure-sr-iov-for-connectx-4-connectx-5-with-kvm--ethernet-x>`_.
+| 2) `Switchdev mode (eswitch offloads) <https://www.mellanox.com/related-docs/prod_software/ASAP2_Hardware_Offloading_for_vSwitches_User_Manual_v4.4.pdf>`_.
+
+
+**CONFIG_MLX5_FPGA=(y/n)**
+
+| Build support for the Innova family of network cards by Mellanox Technologies.
+| Innova network cards are comprised of a ConnectX chip and an FPGA chip on one board.
+| If you select this option, the mlx5_core driver will include the Innova FPGA core and allow
+| building sandbox-specific client drivers.
+
+
+**CONFIG_MLX5_INFINIBAND=(y/n/m)** (module mlx5_ib.ko)
+
+| Provides low-level InfiniBand/RDMA and `RoCE <https://community.mellanox.com/s/article/recommended-network-configuration-examples-for-roce-deployment>`_ support.
+
+
+**CONFIG_MLX5_MPFS=(y/n)**
+
+| Ethernet Multi-Physical Function Switch (MPFS) support in ConnectX NIC.
+| MPFs is required for when `Multi-Host <http://www.mellanox.com/page/multihost>`_ configuration is enabled to allow passing
+| user configured unicast MAC addresses to the requesting PF.
+
+
+**CONFIG_MLX5_SF=(y/n)**
+
+| Build support for subfunction.
+| Subfunctons are more light weight than PCI SRIOV VFs. Choosing this option
+| will enable support for creating subfunction devices.
+
+
+**CONFIG_MLX5_SF_MANAGER=(y/n)**
+
+| Build support for subfuction port in the NIC. A Mellanox subfunction
+| port is managed through devlink. A subfunction supports RDMA, netdevice
+| and vdpa device. It is similar to a SRIOV VF but it doesn't require
+| SRIOV support.
+
+
+**CONFIG_MLX5_SW_STEERING=(y/n)**
+
+| Build support for software-managed steering in the NIC.
+
+
+**CONFIG_MLX5_TC_CT=(y/n)**
+
+| Support offloading connection tracking rules via tc ct action.
+
+
+**CONFIG_MLX5_TC_SAMPLE=(y/n)**
+
+| Support offloading sample rules via tc sample action.
+
+
+**CONFIG_MLX5_VDPA=(y/n)**
+
+| Support library for Mellanox VDPA drivers. Provides code that is
+| common for all types of VDPA drivers. The following drivers are planned:
+| net, block.
+
+
+**CONFIG_MLX5_VDPA_NET=(y/n)**
+
+| VDPA network driver for ConnectX6 and newer. Provides offloading
+| of virtio net datapath such that descriptors put on the ring will
+| be executed by the hardware. It also supports a variety of stateless
+| offloads depending on the actual device used and firmware version.
+
+
+**CONFIG_MLX5_VFIO_PCI=(y/n)**
+
+| This provides migration support for MLX5 devices using the VFIO framework.
+
+
+**External options** ( Choose if the corresponding mlx5 feature is required )
+
+- CONFIG_MLXFW: When chosen, mlx5 firmware flashing support will be enabled (via devlink and ethtool).
+- CONFIG_PTP_1588_CLOCK: When chosen, mlx5 ptp support will be enabled
+- CONFIG_VXLAN: When chosen, mlx5 vxlan support will be enabled.
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
new file mode 100644
index 000000000000..01deedb71597
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
@@ -0,0 +1,239 @@
+.. SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+.. include:: <isonum.txt>
+
+=========
+Switchdev
+=========
+
+:Copyright: |copy| 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+.. _mlx5_bridge_offload:
+
+Bridge offload
+==============
+
+The mlx5 driver implements support for offloading bridge rules when in switchdev
+mode. Linux bridge FDBs are automatically offloaded when mlx5 switchdev
+representor is attached to bridge.
+
+- Change device to switchdev mode::
+
+ $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
+
+- Attach mlx5 switchdev representor 'enp8s0f0' to bridge netdev 'bridge1'::
+
+ $ ip link set enp8s0f0 master bridge1
+
+VLANs
+-----
+
+Following bridge VLAN functions are supported by mlx5:
+
+- VLAN filtering (including multiple VLANs per port)::
+
+ $ ip link set bridge1 type bridge vlan_filtering 1
+ $ bridge vlan add dev enp8s0f0 vid 2-3
+
+- VLAN push on bridge ingress::
+
+ $ bridge vlan add dev enp8s0f0 vid 3 pvid
+
+- VLAN pop on bridge egress::
+
+ $ bridge vlan add dev enp8s0f0 vid 3 untagged
+
+Subfunction
+===========
+
+mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface.
+
+A subfunction has its own function capabilities and its own resources. This
+means a subfunction has its own dedicated queues (txq, rxq, cq, eq). These
+queues are neither shared nor stolen from the parent PCI function.
+
+When a subfunction is RDMA capable, it has its own QP1, GID table, and RDMA
+resources neither shared nor stolen from the parent PCI function.
+
+A subfunction has a dedicated window in PCI BAR space that is not shared
+with the other subfunctions or the parent PCI function. This ensures that all
+devices (netdev, rdma, vdpa, etc.) of the subfunction accesses only assigned
+PCI BAR space.
+
+A subfunction supports eswitch representation through which it supports tc
+offloads. The user configures eswitch to send/receive packets from/to
+the subfunction port.
+
+Subfunctions share PCI level resources such as PCI MSI-X IRQs with
+other subfunctions and/or with its parent PCI function.
+
+Example mlx5 software, system, and device view::
+
+ _______
+ | admin |
+ | user |----------
+ |_______| |
+ | |
+ ____|____ __|______ _________________
+ | | | | | |
+ | devlink | | tc tool | | user |
+ | tool | |_________| | applications |
+ |_________| | |_________________|
+ | | | |
+ | | | | Userspace
+ +---------|-------------|-------------------|----------|--------------------+
+ | | +----------+ +----------+ Kernel
+ | | | netdev | | rdma dev |
+ | | +----------+ +----------+
+ (devlink port add/del | ^ ^
+ port function set) | | |
+ | | +---------------|
+ _____|___ | | _______|_______
+ | | | | | mlx5 class |
+ | devlink | +------------+ | | drivers |
+ | kernel | | rep netdev | | |(mlx5_core,ib) |
+ |_________| +------------+ | |_______________|
+ | | | ^
+ (devlink ops) | | (probe/remove)
+ _________|________ | | ____|________
+ | subfunction | | +---------------+ | subfunction |
+ | management driver|----- | subfunction |---| driver |
+ | (mlx5_core) | | auxiliary dev | | (mlx5_core) |
+ |__________________| +---------------+ |_____________|
+ | ^
+ (sf add/del, vhca events) |
+ | (device add/del)
+ _____|____ ____|________
+ | | | subfunction |
+ | PCI NIC |--- activate/deactivate events--->| host driver |
+ |__________| | (mlx5_core) |
+ |_____________|
+
+Subfunction is created using devlink port interface.
+
+- Change device to switchdev mode::
+
+ $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
+
+- Add a devlink port of subfunction flavour::
+
+ $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88
+ pci/0000:06:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false
+ function:
+ hw_addr 00:00:00:00:00:00 state inactive opstate detached
+
+- Show a devlink port of the subfunction::
+
+ $ devlink port show pci/0000:06:00.0/32768
+ pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
+ function:
+ hw_addr 00:00:00:00:00:00 state inactive opstate detached
+
+- Delete a devlink port of subfunction after use::
+
+ $ devlink port del pci/0000:06:00.0/32768
+
+Function attributes
+===================
+
+The mlx5 driver provides a mechanism to setup PCI VF/SF function attributes in
+a unified way for SmartNIC and non-SmartNIC.
+
+This is supported only when the eswitch mode is set to switchdev. Port function
+configuration of the PCI VF/SF is supported through devlink eswitch port.
+
+Port function attributes should be set before PCI VF/SF is enumerated by the
+driver.
+
+MAC address setup
+-----------------
+
+mlx5 driver support devlink port function attr mechanism to setup MAC
+address. (refer to Documentation/networking/devlink/devlink-port.rst)
+
+RoCE capability setup
+~~~~~~~~~~~~~~~~~~~~~
+Not all mlx5 PCI devices/SFs require RoCE capability.
+
+When RoCE capability is disabled, it saves 1 Mbytes worth of system memory per
+PCI devices/SF.
+
+mlx5 driver support devlink port function attr mechanism to setup RoCE
+capability. (refer to Documentation/networking/devlink/devlink-port.rst)
+
+migratable capability setup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+User who wants mlx5 PCI VFs to be able to perform live migration need to
+explicitly enable the VF migratable capability.
+
+mlx5 driver support devlink port function attr mechanism to setup migratable
+capability. (refer to Documentation/networking/devlink/devlink-port.rst)
+
+SF state setup
+--------------
+
+To use the SF, the user must activate the SF using the SF function state
+attribute.
+
+- Get the state of the SF identified by its unique devlink port index::
+
+ $ devlink port show ens2f0npf0sf88
+ pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false
+ function:
+ hw_addr 00:00:00:00:88:88 state inactive opstate detached
+
+- Activate the function and verify its state is active::
+
+ $ devlink port function set ens2f0npf0sf88 state active
+
+ $ devlink port show ens2f0npf0sf88
+ pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false
+ function:
+ hw_addr 00:00:00:00:88:88 state active opstate detached
+
+Upon function activation, the PF driver instance gets the event from the device
+that a particular SF was activated. It's the cue to put the device on bus, probe
+it and instantiate the devlink instance and class specific auxiliary devices
+for it.
+
+- Show the auxiliary device and port of the subfunction::
+
+ $ devlink dev show
+ devlink dev show auxiliary/mlx5_core.sf.4
+
+ $ devlink port show auxiliary/mlx5_core.sf.4/1
+ auxiliary/mlx5_core.sf.4/1: type eth netdev p0sf88 flavour virtual port 0 splittable false
+
+ $ rdma link show mlx5_0/1
+ link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev p0sf88
+
+ $ rdma dev show
+ 8: rocep6s0f1: node_type ca fw 16.29.0550 node_guid 248a:0703:00b3:d113 sys_image_guid 248a:0703:00b3:d112
+ 13: mlx5_0: node_type ca fw 16.29.0550 node_guid 0000:00ff:fe00:8888 sys_image_guid 248a:0703:00b3:d112
+
+- Subfunction auxiliary device and class device hierarchy::
+
+ mlx5_core.sf.4
+ (subfunction auxiliary device)
+ /\
+ / \
+ / \
+ / \
+ / \
+ mlx5_core.eth.4 mlx5_core.rdma.4
+ (sf eth aux dev) (sf rdma aux dev)
+ | |
+ | |
+ p0sf88 mlx5_0
+ (sf netdev) (sf rdma device)
+
+Additionally, the SF port also gets the event when the driver attaches to the
+auxiliary device of the subfunction. This results in changing the operational
+state of the function. This provides visibility to the user to decide when is it
+safe to delete the SF port for graceful termination of the subfunction.
+
+- Show the SF port operational state::
+
+ $ devlink port show ens2f0npf0sf88
+ pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false
+ function:
+ hw_addr 00:00:00:00:88:88 state active opstate attached
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/tracepoints.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/tracepoints.rst
new file mode 100644
index 000000000000..a9d3e123adc4
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/tracepoints.rst
@@ -0,0 +1,229 @@
+.. SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+.. include:: <isonum.txt>
+
+===========
+Tracepoints
+===========
+
+:Copyright: |copy| 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+mlx5 driver provides internal tracepoints for tracking and debugging using
+kernel tracepoints interfaces (refer to Documentation/trace/ftrace.rst).
+
+For the list of support mlx5 events, check `/sys/kernel/debug/tracing/events/mlx5/`.
+
+tc and eswitch offloads tracepoints:
+
+- mlx5e_configure_flower: trace flower filter actions and cookies offloaded to mlx5::
+
+ $ echo mlx5:mlx5e_configure_flower >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ tc-6535 [019] ...1 2672.404466: mlx5e_configure_flower: cookie=0000000067874a55 actions= REDIRECT
+
+- mlx5e_delete_flower: trace flower filter actions and cookies deleted from mlx5::
+
+ $ echo mlx5:mlx5e_delete_flower >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ tc-6569 [010] .N.1 2686.379075: mlx5e_delete_flower: cookie=0000000067874a55 actions= NULL
+
+- mlx5e_stats_flower: trace flower stats request::
+
+ $ echo mlx5:mlx5e_stats_flower >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ tc-6546 [010] ...1 2679.704889: mlx5e_stats_flower: cookie=0000000060eb3d6a bytes=0 packets=0 lastused=4295560217
+
+- mlx5e_tc_update_neigh_used_value: trace tunnel rule neigh update value offloaded to mlx5::
+
+ $ echo mlx5:mlx5e_tc_update_neigh_used_value >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ kworker/u48:4-8806 [009] ...1 55117.882428: mlx5e_tc_update_neigh_used_value: netdev: ens1f0 IPv4: 1.1.1.10 IPv6: ::ffff:1.1.1.10 neigh_used=1
+
+- mlx5e_rep_neigh_update: trace neigh update tasks scheduled due to neigh state change events::
+
+ $ echo mlx5:mlx5e_rep_neigh_update >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ kworker/u48:7-2221 [009] ...1 1475.387435: mlx5e_rep_neigh_update: netdev: ens1f0 MAC: 24:8a:07:9a:17:9a IPv4: 1.1.1.10 IPv6: ::ffff:1.1.1.10 neigh_connected=1
+
+Bridge offloads tracepoints:
+
+- mlx5_esw_bridge_fdb_entry_init: trace bridge FDB entry offloaded to mlx5::
+
+ $ echo mlx5:mlx5_esw_bridge_fdb_entry_init >> set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ kworker/u20:9-2217 [003] ...1 318.582243: mlx5_esw_bridge_fdb_entry_init: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=0 flags=0 used=0
+
+- mlx5_esw_bridge_fdb_entry_cleanup: trace bridge FDB entry deleted from mlx5::
+
+ $ echo mlx5:mlx5_esw_bridge_fdb_entry_cleanup >> set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ ip-2581 [005] ...1 318.629871: mlx5_esw_bridge_fdb_entry_cleanup: net_device=enp8s0f0_1 addr=e4:fd:05:08:00:03 vid=0 flags=0 used=16
+
+- mlx5_esw_bridge_fdb_entry_refresh: trace bridge FDB entry offload refreshed in
+ mlx5::
+
+ $ echo mlx5:mlx5_esw_bridge_fdb_entry_refresh >> set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ kworker/u20:8-3849 [003] ...1 466716: mlx5_esw_bridge_fdb_entry_refresh: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=3 flags=0 used=0
+
+- mlx5_esw_bridge_vlan_create: trace bridge VLAN object add on mlx5
+ representor::
+
+ $ echo mlx5:mlx5_esw_bridge_vlan_create >> set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ ip-2560 [007] ...1 318.460258: mlx5_esw_bridge_vlan_create: vid=1 flags=6
+
+- mlx5_esw_bridge_vlan_cleanup: trace bridge VLAN object delete from mlx5
+ representor::
+
+ $ echo mlx5:mlx5_esw_bridge_vlan_cleanup >> set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ bridge-2582 [007] ...1 318.653496: mlx5_esw_bridge_vlan_cleanup: vid=2 flags=8
+
+- mlx5_esw_bridge_vport_init: trace mlx5 vport assigned with bridge upper
+ device::
+
+ $ echo mlx5:mlx5_esw_bridge_vport_init >> set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ ip-2560 [007] ...1 318.458915: mlx5_esw_bridge_vport_init: vport_num=1
+
+- mlx5_esw_bridge_vport_cleanup: trace mlx5 vport removed from bridge upper
+ device::
+
+ $ echo mlx5:mlx5_esw_bridge_vport_cleanup >> set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ ip-5387 [000] ...1 573713: mlx5_esw_bridge_vport_cleanup: vport_num=1
+
+Eswitch QoS tracepoints:
+
+- mlx5_esw_vport_qos_create: trace creation of transmit scheduler arbiter for vport::
+
+ $ echo mlx5:mlx5_esw_vport_qos_create >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-23496 [018] .... 73136.838831: mlx5_esw_vport_qos_create: (0000:82:00.0) vport=2 tsar_ix=4 bw_share=0, max_rate=0 group=000000007b576bb3
+
+- mlx5_esw_vport_qos_config: trace configuration of transmit scheduler arbiter for vport::
+
+ $ echo mlx5:mlx5_esw_vport_qos_config >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-26548 [023] .... 75754.223823: mlx5_esw_vport_qos_config: (0000:82:00.0) vport=1 tsar_ix=3 bw_share=34, max_rate=10000 group=000000007b576bb3
+
+- mlx5_esw_vport_qos_destroy: trace deletion of transmit scheduler arbiter for vport::
+
+ $ echo mlx5:mlx5_esw_vport_qos_destroy >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-27418 [004] .... 76546.680901: mlx5_esw_vport_qos_destroy: (0000:82:00.0) vport=1 tsar_ix=3
+
+- mlx5_esw_group_qos_create: trace creation of transmit scheduler arbiter for rate group::
+
+ $ echo mlx5:mlx5_esw_group_qos_create >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-26578 [008] .... 75776.022112: mlx5_esw_group_qos_create: (0000:82:00.0) group=000000008dac63ea tsar_ix=5
+
+- mlx5_esw_group_qos_config: trace configuration of transmit scheduler arbiter for rate group::
+
+ $ echo mlx5:mlx5_esw_group_qos_config >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-27303 [020] .... 76461.455356: mlx5_esw_group_qos_config: (0000:82:00.0) group=000000008dac63ea tsar_ix=5 bw_share=100 max_rate=20000
+
+- mlx5_esw_group_qos_destroy: trace deletion of transmit scheduler arbiter for group::
+
+ $ echo mlx5:mlx5_esw_group_qos_destroy >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-27418 [006] .... 76547.187258: mlx5_esw_group_qos_destroy: (0000:82:00.0) group=000000007b576bb3 tsar_ix=1
+
+SF tracepoints:
+
+- mlx5_sf_add: trace addition of the SF port::
+
+ $ echo mlx5:mlx5_sf_add >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ devlink-9363 [031] ..... 24610.188722: mlx5_sf_add: (0000:06:00.0) port_index=32768 controller=0 hw_id=0x8000 sfnum=88
+
+- mlx5_sf_free: trace freeing of the SF port::
+
+ $ echo mlx5:mlx5_sf_free >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ devlink-9830 [038] ..... 26300.404749: mlx5_sf_free: (0000:06:00.0) port_index=32768 controller=0 hw_id=0x8000
+
+- mlx5_sf_activate: trace activation of the SF port::
+
+ $ echo mlx5:mlx5_sf_activate >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ devlink-29841 [008] ..... 3669.635095: mlx5_sf_activate: (0000:08:00.0) port_index=32768 controller=0 hw_id=0x8000
+
+- mlx5_sf_deactivate: trace deactivation of the SF port::
+
+ $ echo mlx5:mlx5_sf_deactivate >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ devlink-29994 [008] ..... 4015.969467: mlx5_sf_deactivate: (0000:08:00.0) port_index=32768 controller=0 hw_id=0x8000
+
+- mlx5_sf_hwc_alloc: trace allocating of the hardware SF context::
+
+ $ echo mlx5:mlx5_sf_hwc_alloc >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ devlink-9775 [031] ..... 26296.385259: mlx5_sf_hwc_alloc: (0000:06:00.0) controller=0 hw_id=0x8000 sfnum=88
+
+- mlx5_sf_hwc_free: trace freeing of the hardware SF context::
+
+ $ echo mlx5:mlx5_sf_hwc_free >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ kworker/u128:3-9093 [046] ..... 24625.365771: mlx5_sf_hwc_free: (0000:06:00.0) hw_id=0x8000
+
+- mlx5_sf_hwc_deferred_free: trace deferred freeing of the hardware SF context::
+
+ $ echo mlx5:mlx5_sf_hwc_deferred_free >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ devlink-9519 [046] ..... 24624.400271: mlx5_sf_hwc_deferred_free: (0000:06:00.0) hw_id=0x8000
+
+- mlx5_sf_update_state: trace state updates for SF contexts::
+
+ $ echo mlx5:mlx5_sf_update_state >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ kworker/u20:3-29490 [009] ..... 4141.453530: mlx5_sf_update_state: (0000:08:00.0) port_index=32768 controller=0 hw_id=0x8000 state=2
+
+- mlx5_sf_vhca_event: trace SF vhca event and state::
+
+ $ echo mlx5:mlx5_sf_vhca_event >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ kworker/u128:3-9093 [046] ..... 24625.365525: mlx5_sf_vhca_event: (0000:06:00.0) hw_id=0x8000 sfnum=88 vhca_state=1
+
+- mlx5_sf_dev_add: trace SF device add event::
+
+ $ echo mlx5:mlx5_sf_dev_add>> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ kworker/u128:3-9093 [000] ..... 24616.524495: mlx5_sf_dev_add: (0000:06:00.0) sfdev=00000000fc5d96fd aux_id=4 hw_id=0x8000 sfnum=88
+
+- mlx5_sf_dev_del: trace SF device delete event::
+
+ $ echo mlx5:mlx5_sf_dev_del >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ kworker/u128:3-9093 [044] ..... 24624.400749: mlx5_sf_dev_del: (0000:06:00.0) sfdev=00000000fc5d96fd aux_id=4 hw_id=0x8000 sfnum=88
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 159ae740ba3c..1c0aeaa13cde 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -28,11 +28,9 @@ EXPORT_SYMBOL_GPL(enetc_port_mac_wr);
static int enetc_num_stack_tx_queues(struct enetc_ndev_priv *priv)
{
int num_tx_rings = priv->num_tx_rings;
- int i;
- for (i = 0; i < priv->num_rx_rings; i++)
- if (priv->rx_ring[i]->xdp.prog)
- return num_tx_rings - num_possible_cpus();
+ if (priv->xdp_prog)
+ return num_tx_rings - num_possible_cpus();
return num_tx_rings;
}
@@ -2456,7 +2454,6 @@ int enetc_open(struct net_device *ndev)
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
struct enetc_bdr_resource *tx_res, *rx_res;
- int num_stack_tx_queues;
bool extended;
int err;
@@ -2482,16 +2479,6 @@ int enetc_open(struct net_device *ndev)
goto err_alloc_rx;
}
- num_stack_tx_queues = enetc_num_stack_tx_queues(priv);
-
- err = netif_set_real_num_tx_queues(ndev, num_stack_tx_queues);
- if (err)
- goto err_set_queues;
-
- err = netif_set_real_num_rx_queues(ndev, priv->num_rx_rings);
- if (err)
- goto err_set_queues;
-
enetc_tx_onestep_tstamp_init(priv);
enetc_assign_tx_resources(priv, tx_res);
enetc_assign_rx_resources(priv, rx_res);
@@ -2500,8 +2487,6 @@ int enetc_open(struct net_device *ndev)
return 0;
-err_set_queues:
- enetc_free_rx_resources(rx_res, priv->num_rx_rings);
err_alloc_rx:
enetc_free_tx_resources(tx_res, priv->num_tx_rings);
err_alloc_tx:
@@ -2576,8 +2561,11 @@ static int enetc_reconfigure(struct enetc_ndev_priv *priv, bool extended,
* without reconfiguration.
*/
if (!netif_running(priv->ndev)) {
- if (cb)
- cb(priv, ctx);
+ if (cb) {
+ err = cb(priv, ctx);
+ if (err)
+ return err;
+ }
return 0;
}
@@ -2598,8 +2586,11 @@ static int enetc_reconfigure(struct enetc_ndev_priv *priv, bool extended,
enetc_free_rxtx_rings(priv);
/* Interface is down, run optional callback now */
- if (cb)
- cb(priv, ctx);
+ if (cb) {
+ err = cb(priv, ctx);
+ if (err)
+ goto out_restart;
+ }
enetc_assign_tx_resources(priv, tx_res);
enetc_assign_rx_resources(priv, rx_res);
@@ -2608,6 +2599,10 @@ static int enetc_reconfigure(struct enetc_ndev_priv *priv, bool extended,
return 0;
+out_restart:
+ enetc_setup_bdrs(priv, extended);
+ enetc_start(priv->ndev);
+ enetc_free_rx_resources(rx_res, priv->num_rx_rings);
out_free_tx_res:
enetc_free_tx_resources(tx_res, priv->num_tx_rings);
out:
@@ -2631,6 +2626,7 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
if (!num_tc) {
netdev_reset_tc(ndev);
netif_set_real_num_tx_queues(ndev, num_stack_tx_queues);
+ priv->min_num_stack_tx_queues = num_possible_cpus();
/* Reset all ring priorities to 0 */
for (i = 0; i < priv->num_tx_rings; i++) {
@@ -2661,6 +2657,7 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
/* Reset the number of netdev queues based on the TC count */
netif_set_real_num_tx_queues(ndev, num_tc);
+ priv->min_num_stack_tx_queues = num_tc;
netdev_set_num_tc(ndev, num_tc);
@@ -2675,9 +2672,18 @@ EXPORT_SYMBOL_GPL(enetc_setup_tc_mqprio);
static int enetc_reconfigure_xdp_cb(struct enetc_ndev_priv *priv, void *ctx)
{
struct bpf_prog *old_prog, *prog = ctx;
- int i;
+ int num_stack_tx_queues;
+ int err, i;
old_prog = xchg(&priv->xdp_prog, prog);
+
+ num_stack_tx_queues = enetc_num_stack_tx_queues(priv);
+ err = netif_set_real_num_tx_queues(priv->ndev, num_stack_tx_queues);
+ if (err) {
+ xchg(&priv->xdp_prog, old_prog);
+ return err;
+ }
+
if (old_prog)
bpf_prog_put(old_prog);
@@ -2698,9 +2704,20 @@ static int enetc_reconfigure_xdp_cb(struct enetc_ndev_priv *priv, void *ctx)
static int enetc_setup_xdp_prog(struct net_device *ndev, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
+ int num_xdp_tx_queues = prog ? num_possible_cpus() : 0;
struct enetc_ndev_priv *priv = netdev_priv(ndev);
bool extended;
+ if (priv->min_num_stack_tx_queues + num_xdp_tx_queues >
+ priv->num_tx_rings) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Reserving %d XDP TXQs does not leave a minimum of %d TXQs for network stack (total %d available)",
+ num_xdp_tx_queues,
+ priv->min_num_stack_tx_queues,
+ priv->num_tx_rings);
+ return -EBUSY;
+ }
+
extended = !!(priv->active_offloads & ENETC_F_RX_TSTAMP);
/* The buffer layout is changing, so we need to drain the old
@@ -2898,6 +2915,7 @@ EXPORT_SYMBOL_GPL(enetc_ioctl);
int enetc_alloc_msix(struct enetc_ndev_priv *priv)
{
struct pci_dev *pdev = priv->si->pdev;
+ int num_stack_tx_queues;
int first_xdp_tx_ring;
int i, n, err, nvec;
int v_tx_rings;
@@ -2974,6 +2992,17 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
}
}
+ num_stack_tx_queues = enetc_num_stack_tx_queues(priv);
+
+ err = netif_set_real_num_tx_queues(priv->ndev, num_stack_tx_queues);
+ if (err)
+ goto fail;
+
+ err = netif_set_real_num_rx_queues(priv->ndev, priv->num_rx_rings);
+ if (err)
+ goto fail;
+
+ priv->min_num_stack_tx_queues = num_possible_cpus();
first_xdp_tx_ring = priv->num_tx_rings - num_possible_cpus();
priv->xdp_tx_ring = &priv->tx_ring[first_xdp_tx_ring];
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 1fe8dfd6b6d4..e21d096c5a90 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -369,6 +369,9 @@ struct enetc_ndev_priv {
struct psfp_cap psfp_cap;
+ /* Minimum number of TX queues required by the network stack */
+ unsigned int min_num_stack_tx_queues;
+
struct phylink *phylink;
int ic_mode;
u32 tx_ictt;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 6f8723cc6874..125c7cb7d839 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -454,6 +454,7 @@ struct mlx5e_txqsq {
struct mlx5_clock *clock;
struct net_device *netdev;
struct mlx5_core_dev *mdev;
+ struct mlx5e_channel *channel;
struct mlx5e_priv *priv;
/* control path */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 8469e9c38670..9a1bc93b7dc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -771,8 +771,8 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
mlx5e_ptp_rx_set_fs(c->priv);
mlx5e_activate_rq(&c->rq);
- mlx5e_trigger_napi_sched(&c->napi);
}
+ mlx5e_trigger_napi_sched(&c->napi);
}
void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index b195dbbf6c90..41e356d9d785 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -81,6 +81,10 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
sq->stats->recover++;
clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
mlx5e_activate_txqsq(sq);
+ if (sq->channel)
+ mlx5e_trigger_napi_icosq(sq->channel);
+ else
+ mlx5e_trigger_napi_sched(sq->cq.napi);
return 0;
out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
index 78c427b38048..c095a12346de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -232,9 +232,9 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
parse_state->ifindexes[if_count] = out_dev->ifindex;
parse_state->if_count++;
is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev);
- err = mlx5_lag_do_mirred(priv->mdev, out_dev);
- if (err)
- return err;
+
+ if (mlx5_lag_mpesw_do_mirred(priv->mdev, out_dev, extack))
+ return -EOPNOTSUPP;
out_dev = get_fdb_out_dev(uplink_dev, out_dev);
if (!out_dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index e84c3400ba1d..7b0d3de0ec6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -158,6 +158,11 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
attrs->family = x->props.family;
attrs->type = x->xso.type;
attrs->reqid = x->props.reqid;
+ attrs->upspec.dport = ntohs(x->sel.dport);
+ attrs->upspec.dport_mask = ntohs(x->sel.dport_mask);
+ attrs->upspec.sport = ntohs(x->sel.sport);
+ attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
+ attrs->upspec.proto = x->sel.proto;
mlx5e_ipsec_init_limits(sa_entry, attrs);
}
@@ -221,6 +226,13 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv");
return -EINVAL;
}
+
+ if (x->sel.proto != IPPROTO_IP &&
+ (x->sel.proto != IPPROTO_UDP || x->xso.dir != XFRM_DEV_OFFLOAD_OUT)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction");
+ return -EINVAL;
+ }
+
switch (x->xso.type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
@@ -517,6 +529,12 @@ static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x,
return -EINVAL;
}
+ if (x->selector.proto != IPPROTO_IP &&
+ (x->selector.proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -537,6 +555,11 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
attrs->action = x->action;
attrs->type = XFRM_DEV_OFFLOAD_PACKET;
attrs->reqid = x->xfrm_vec[0].reqid;
+ attrs->upspec.dport = ntohs(sel->dport);
+ attrs->upspec.dport_mask = ntohs(sel->dport_mask);
+ attrs->upspec.sport = ntohs(sel->sport);
+ attrs->upspec.sport_mask = ntohs(sel->sport_mask);
+ attrs->upspec.proto = sel->proto;
}
static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 8bed9c361075..b387adca9c20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -52,6 +52,14 @@ struct aes_gcm_keymat {
u32 aes_key[256 / 32];
};
+struct upspec {
+ u16 dport;
+ u16 dport_mask;
+ u16 sport;
+ u16 sport_mask;
+ u8 proto;
+};
+
struct mlx5_accel_esp_xfrm_attrs {
u32 esn;
u32 spi;
@@ -68,6 +76,7 @@ struct mlx5_accel_esp_xfrm_attrs {
__be32 a6[4];
} daddr;
+ struct upspec upspec;
u8 dir : 2;
u8 esn_overlap : 1;
u8 esn_trigger : 1;
@@ -181,6 +190,7 @@ struct mlx5_accel_pol_xfrm_attrs {
__be32 a6[4];
} daddr;
+ struct upspec upspec;
u8 family;
u8 action;
u8 type : 2;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 9f19f4b59a70..5da6fe68eea6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -467,6 +467,27 @@ static void setup_fte_reg_c0(struct mlx5_flow_spec *spec, u32 reqid)
misc_parameters_2.metadata_reg_c_0, reqid);
}
+static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upspec *upspec)
+{
+ if (upspec->proto != IPPROTO_UDP)
+ return;
+
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, spec->match_criteria, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, ip_protocol, upspec->proto);
+ if (upspec->dport) {
+ MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport,
+ upspec->dport_mask);
+ MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->dport);
+ }
+
+ if (upspec->sport) {
+ MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport,
+ upspec->sport_mask);
+ MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->sport);
+ }
+}
+
static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
struct mlx5_flow_act *flow_act)
{
@@ -654,6 +675,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
setup_fte_no_frags(spec);
+ setup_fte_upper_proto_match(spec, &attrs->upspec);
switch (attrs->type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
@@ -728,6 +750,7 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
setup_fte_no_frags(spec);
+ setup_fte_upper_proto_match(spec, &attrs->upspec);
err = setup_modify_header(mdev, attrs->reqid, XFRM_DEV_OFFLOAD_OUT,
&flow_act);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0e87432ec6f1..27f90baac768 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1470,6 +1470,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
sq->mdev = c->mdev;
+ sq->channel = c;
sq->priv = c->priv;
sq->ch_ix = c->ix;
sq->txq_ix = txq_ix;
@@ -2482,8 +2483,6 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
mlx5e_activate_xsk(c);
else
mlx5e_activate_rq(&c->rq);
-
- mlx5e_trigger_napi_icosq(c);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -2575,13 +2574,19 @@ err_free:
return err;
}
-static void mlx5e_activate_channels(struct mlx5e_channels *chs)
+static void mlx5e_activate_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
{
int i;
for (i = 0; i < chs->num; i++)
mlx5e_activate_channel(chs->c[i]);
+ if (priv->htb)
+ mlx5e_qos_activate_queues(priv);
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_trigger_napi_icosq(chs->c[i]);
+
if (chs->ptp)
mlx5e_ptp_activate_channel(chs->ptp);
}
@@ -2888,9 +2893,7 @@ out:
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
{
mlx5e_build_txq_maps(priv);
- mlx5e_activate_channels(&priv->channels);
- if (priv->htb)
- mlx5e_qos_activate_queues(priv);
+ mlx5e_activate_channels(priv, &priv->channels);
mlx5e_xdp_tx_enable(priv);
/* dev_watchdog() wants all TX queues to be started when the carrier is
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index eff92dc0927c..11fefb99d685 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -172,6 +172,7 @@ enum mlx5_ptys_rate {
MLX5_PTYS_RATE_EDR = 1 << 5,
MLX5_PTYS_RATE_HDR = 1 << 6,
MLX5_PTYS_RATE_NDR = 1 << 7,
+ MLX5_PTYS_RATE_XDR = 1 << 8,
};
static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
@@ -185,6 +186,7 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
case MLX5_PTYS_RATE_EDR: return 25000;
case MLX5_PTYS_RATE_HDR: return 50000;
case MLX5_PTYS_RATE_NDR: return 100000;
+ case MLX5_PTYS_RATE_XDR: return 200000;
default: return -1;
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
index b8feaf0f5c4c..f4b777d4e108 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
@@ -22,7 +22,7 @@ static int type_show(struct seq_file *file, void *priv)
struct mlx5_lag *ldev;
char *mode = NULL;
- ldev = dev->priv.lag;
+ ldev = mlx5_lag_dev(dev);
mutex_lock(&ldev->lock);
if (__mlx5_lag_is_active(ldev))
mode = get_str_mode_type(ldev);
@@ -41,7 +41,7 @@ static int port_sel_mode_show(struct seq_file *file, void *priv)
int ret = 0;
char *mode;
- ldev = dev->priv.lag;
+ ldev = mlx5_lag_dev(dev);
mutex_lock(&ldev->lock);
if (__mlx5_lag_is_active(ldev))
mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags);
@@ -61,7 +61,7 @@ static int state_show(struct seq_file *file, void *priv)
struct mlx5_lag *ldev;
bool active;
- ldev = dev->priv.lag;
+ ldev = mlx5_lag_dev(dev);
mutex_lock(&ldev->lock);
active = __mlx5_lag_is_active(ldev);
mutex_unlock(&ldev->lock);
@@ -77,7 +77,7 @@ static int flags_show(struct seq_file *file, void *priv)
bool shared_fdb;
bool lag_active;
- ldev = dev->priv.lag;
+ ldev = mlx5_lag_dev(dev);
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
if (!lag_active)
@@ -108,7 +108,7 @@ static int mapping_show(struct seq_file *file, void *priv)
int num_ports;
int i;
- ldev = dev->priv.lag;
+ ldev = mlx5_lag_dev(dev);
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
if (lag_active) {
@@ -142,7 +142,7 @@ static int members_show(struct seq_file *file, void *priv)
struct mlx5_lag *ldev;
int i;
- ldev = dev->priv.lag;
+ ldev = mlx5_lag_dev(dev);
mutex_lock(&ldev->lock);
for (i = 0; i < ldev->ports; i++) {
if (!ldev->pf[i].dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index ad32b80e8501..dbf218cac535 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1187,7 +1187,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
tmp_dev = mlx5_get_next_phys_dev_lag(dev);
if (tmp_dev)
- ldev = tmp_dev->priv.lag;
+ ldev = mlx5_lag_dev(tmp_dev);
if (!ldev) {
ldev = mlx5_lag_dev_alloc(dev);
@@ -1386,8 +1386,7 @@ bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
- res = ldev && __mlx5_lag_is_sriov(ldev) &&
- test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
spin_unlock_irqrestore(&lag_lock, flags);
return res;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index f30ac2de639f..66013bef9939 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -50,19 +50,6 @@ struct lag_tracker {
enum netdev_lag_hash hash_type;
};
-enum mpesw_op {
- MLX5_MPESW_OP_ENABLE,
- MLX5_MPESW_OP_DISABLE,
-};
-
-struct mlx5_mpesw_work_st {
- struct work_struct work;
- struct mlx5_lag *lag;
- enum mpesw_op op;
- struct completion comp;
- int result;
-};
-
/* LAG data of a ConnectX card.
* It serves both its phys functions.
*/
@@ -124,8 +111,6 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev);
bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev);
-void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev);
-int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev);
char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags);
void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index d9fcb9ed726f..d85a8dfc153d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -28,13 +28,9 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
{
- struct mlx5_lag *ldev;
- bool res;
-
- ldev = mlx5_lag_dev(dev);
- res = ldev && __mlx5_lag_is_multipath(ldev);
+ struct mlx5_lag *ldev = mlx5_lag_dev(dev);
- return res;
+ return ldev && __mlx5_lag_is_multipath(ldev);
}
/**
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index c17e8f1ec914..3799f89ed1a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -58,7 +58,7 @@ static void mlx5_mpesw_work(struct work_struct *work)
static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev,
enum mpesw_op op)
{
- struct mlx5_lag *ldev = dev->priv.lag;
+ struct mlx5_lag *ldev = mlx5_lag_dev(dev);
struct mlx5_mpesw_work_st *work;
int err = 0;
@@ -96,25 +96,27 @@ int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE);
}
-int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
+int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack)
{
- struct mlx5_lag *ldev = mdev->priv.lag;
+ struct mlx5_lag *ldev = mlx5_lag_dev(mdev);
if (!netif_is_bond_master(out_dev) || !ldev)
return 0;
- if (ldev->mode == MLX5_LAG_MODE_MPESW)
- return -EOPNOTSUPP;
+ if (ldev->mode != MLX5_LAG_MODE_MPESW)
+ return 0;
- return 0;
+ NL_SET_ERR_MSG_MOD(extack, "can't forward to bond in mpesw mode");
+ return -EOPNOTSUPP;
}
bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev)
{
- bool ret;
+ struct mlx5_lag *ldev = mlx5_lag_dev(dev);
- ret = dev->priv.lag && dev->priv.lag->mode == MLX5_LAG_MODE_MPESW;
- return ret;
+ return ldev && ldev->mode == MLX5_LAG_MODE_MPESW;
}
void mlx5_lag_mpesw_init(struct mlx5_lag *ldev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
index 88e8daffcf92..818f19b5a984 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
@@ -12,8 +12,25 @@ struct lag_mpesw {
atomic_t mpesw_rule_count;
};
-int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev);
+enum mpesw_op {
+ MLX5_MPESW_OP_ENABLE,
+ MLX5_MPESW_OP_DISABLE,
+};
+
+struct mlx5_mpesw_work_st {
+ struct work_struct work;
+ struct mlx5_lag *lag;
+ enum mpesw_op op;
+ struct completion comp;
+ int result;
+};
+
+int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack);
bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev);
+void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev);
+int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev);
#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
void mlx5_lag_mpesw_init(struct mlx5_lag *ldev);
void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 75510a12ab02..4c9a40211059 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -362,7 +362,7 @@ static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta)
return mlx5_ptp_adjtime(ptp, delta);
}
-static int mlx5_ptp_adjfreq_real_time(struct mlx5_core_dev *mdev, s32 freq)
+static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm)
{
u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
@@ -370,7 +370,15 @@ static int mlx5_ptp_adjfreq_real_time(struct mlx5_core_dev *mdev, s32 freq)
return 0;
MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC);
- MLX5_SET(mtutc_reg, in, freq_adjustment, freq);
+
+ if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units)) {
+ MLX5_SET(mtutc_reg, in, freq_adj_units,
+ MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM);
+ MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm);
+ } else {
+ MLX5_SET(mtutc_reg, in, freq_adj_units, MLX5_MTUTC_FREQ_ADJ_UNITS_PPB);
+ MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm_to_ppb(scaled_ppm));
+ }
return mlx5_set_mtutc(mdev, in, sizeof(in));
}
@@ -385,7 +393,8 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
int err;
mdev = container_of(clock, struct mlx5_core_dev, clock);
- err = mlx5_ptp_adjfreq_real_time(mdev, scaled_ppm_to_ppb(scaled_ppm));
+
+ err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 60596357bfc7..96e57f1812a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -211,7 +211,8 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function)
n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
if (n >= MLX5_NUM_4K_IN_PAGE) {
- mlx5_core_warn(dev, "alloc 4k bug\n");
+ mlx5_core_warn(dev, "alloc 4k bug: fw page = 0x%llx, n = %u, bitmask: %lu, max num of 4K pages: %d\n",
+ fp->addr, n, fp->bitmask, MLX5_NUM_4K_IN_PAGE);
return -ENOENT;
}
clear_bit(n, &fp->bitmask);
diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index d0ed7090aa54..1cb1cc3f1a85 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -9,7 +9,8 @@ sparx5-switch-y := sparx5_main.o sparx5_packet.o \
sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o \
sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o sparx5_fdma.o \
sparx5_ptp.o sparx5_pgid.o sparx5_tc.o sparx5_qos.o \
- sparx5_vcap_impl.o sparx5_vcap_ag_api.o sparx5_tc_flower.o sparx5_tc_matchall.o
+ sparx5_vcap_impl.o sparx5_vcap_ag_api.o sparx5_tc_flower.o \
+ sparx5_tc_matchall.o sparx5_pool.o sparx5_sdlb.o sparx5_police.o sparx5_psfp.o
sparx5-switch-$(CONFIG_SPARX5_DCB) += sparx5_dcb.o
sparx5-switch-$(CONFIG_DEBUG_FS) += sparx5_vcap_debugfs.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 300fb7247bb3..c213a4414e65 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -205,6 +205,7 @@ static const struct sparx5_main_io_resource sparx5_main_iomap[] = {
{ TARGET_EACL, 0x112c0000, 2 }, /* 0x6112c0000 */
{ TARGET_ANA_CL, 0x11400000, 2 }, /* 0x611400000 */
{ TARGET_ANA_L3, 0x11480000, 2 }, /* 0x611480000 */
+ { TARGET_ANA_AC_SDLB, 0x11500000, 2 }, /* 0x611500000 */
{ TARGET_HSCH, 0x11580000, 2 }, /* 0x611580000 */
{ TARGET_REW, 0x11600000, 2 }, /* 0x611600000 */
{ TARGET_ANA_L2, 0x11800000, 2 }, /* 0x611800000 */
@@ -501,8 +502,8 @@ static int sparx5_init_coreclock(struct sparx5 *sparx5)
clk_period = sparx5_clk_period(freq);
- spx5_rmw(HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS_SET(clk_period / 100),
- HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS,
+ spx5_rmw(HSCH_SYS_CLK_PER_100PS_SET(clk_period / 100),
+ HSCH_SYS_CLK_PER_100PS,
sparx5,
HSCH_SYS_CLK_PER);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 4a574cdcb584..72e7928912eb 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -396,6 +396,7 @@ int sparx5_ptp_txtstamp_request(struct sparx5_port *port,
void sparx5_ptp_txtstamp_release(struct sparx5_port *port,
struct sk_buff *skb);
irqreturn_t sparx5_ptp_irq_handler(int irq, void *args);
+int sparx5_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
/* sparx5_vcap_impl.c */
int sparx5_vcap_init(struct sparx5 *sparx5);
@@ -413,6 +414,129 @@ int sparx5_pgid_alloc_glag(struct sparx5 *spx5, u16 *idx);
int sparx5_pgid_alloc_mcast(struct sparx5 *spx5, u16 *idx);
int sparx5_pgid_free(struct sparx5 *spx5, u16 idx);
+/* sparx5_pool.c */
+struct sparx5_pool_entry {
+ u16 ref_cnt;
+ u32 idx; /* tc index */
+};
+
+u32 sparx5_pool_idx_to_id(u32 idx);
+int sparx5_pool_put(struct sparx5_pool_entry *pool, int size, u32 id);
+int sparx5_pool_get(struct sparx5_pool_entry *pool, int size, u32 *id);
+int sparx5_pool_get_with_idx(struct sparx5_pool_entry *pool, int size, u32 idx,
+ u32 *id);
+
+/* sparx5_sdlb.c */
+#define SPX5_SDLB_PUP_TOKEN_DISABLE 0x1FFF
+#define SPX5_SDLB_PUP_TOKEN_MAX (SPX5_SDLB_PUP_TOKEN_DISABLE - 1)
+#define SPX5_SDLB_GROUP_RATE_MAX 25000000000ULL
+#define SPX5_SDLB_2CYCLES_TYPE2_THRES_OFFSET 13
+#define SPX5_SDLB_CNT 4096
+#define SPX5_SDLB_GROUP_CNT 10
+#define SPX5_CLK_PER_100PS_DEFAULT 16
+
+struct sparx5_sdlb_group {
+ u64 max_rate;
+ u32 min_burst;
+ u32 frame_size;
+ u32 pup_interval;
+ u32 nsets;
+};
+
+extern struct sparx5_sdlb_group sdlb_groups[SPX5_SDLB_GROUP_CNT];
+int sparx5_sdlb_pup_token_get(struct sparx5 *sparx5, u32 pup_interval,
+ u64 rate);
+
+int sparx5_sdlb_clk_hz_get(struct sparx5 *sparx5);
+int sparx5_sdlb_group_get_by_rate(struct sparx5 *sparx5, u32 rate, u32 burst);
+int sparx5_sdlb_group_get_by_index(struct sparx5 *sparx5, u32 idx, u32 *group);
+
+int sparx5_sdlb_group_add(struct sparx5 *sparx5, u32 group, u32 idx);
+int sparx5_sdlb_group_del(struct sparx5 *sparx5, u32 group, u32 idx);
+
+void sparx5_sdlb_group_init(struct sparx5 *sparx5, u64 max_rate, u32 min_burst,
+ u32 frame_size, u32 idx);
+
+/* sparx5_police.c */
+enum {
+ /* More policer types will be added later */
+ SPX5_POL_SERVICE
+};
+
+struct sparx5_policer {
+ u32 type;
+ u32 idx;
+ u64 rate;
+ u32 burst;
+ u32 group;
+ u8 event_mask;
+};
+
+int sparx5_policer_conf_set(struct sparx5 *sparx5, struct sparx5_policer *pol);
+
+/* sparx5_psfp.c */
+#define SPX5_PSFP_GCE_CNT 4
+#define SPX5_PSFP_SG_CNT 1024
+#define SPX5_PSFP_SG_MIN_CYCLE_TIME_NS (1 * NSEC_PER_USEC)
+#define SPX5_PSFP_SG_MAX_CYCLE_TIME_NS ((1 * NSEC_PER_SEC) - 1)
+#define SPX5_PSFP_SG_MAX_IPV (SPX5_PRIOS - 1)
+#define SPX5_PSFP_SG_OPEN (SPX5_PSFP_SG_CNT - 1)
+#define SPX5_PSFP_SG_CYCLE_TIME_DEFAULT 1000000
+#define SPX5_PSFP_SF_MAX_SDU 16383
+
+struct sparx5_psfp_fm {
+ struct sparx5_policer pol;
+};
+
+struct sparx5_psfp_gce {
+ bool gate_state; /* StreamGateState */
+ u32 interval; /* TimeInterval */
+ u32 ipv; /* InternalPriorityValue */
+ u32 maxoctets; /* IntervalOctetMax */
+};
+
+struct sparx5_psfp_sg {
+ bool gate_state; /* PSFPAdminGateStates */
+ bool gate_enabled; /* PSFPGateEnabled */
+ u32 ipv; /* PSFPAdminIPV */
+ struct timespec64 basetime; /* PSFPAdminBaseTime */
+ u32 cycletime; /* PSFPAdminCycleTime */
+ u32 cycletimeext; /* PSFPAdminCycleTimeExtension */
+ u32 num_entries; /* PSFPAdminControlListLength */
+ struct sparx5_psfp_gce gce[SPX5_PSFP_GCE_CNT];
+};
+
+struct sparx5_psfp_sf {
+ bool sblock_osize_ena;
+ bool sblock_osize;
+ u32 max_sdu;
+ u32 sgid; /* Gate id */
+ u32 fmid; /* Flow meter id */
+};
+
+int sparx5_psfp_fm_add(struct sparx5 *sparx5, u32 uidx,
+ struct sparx5_psfp_fm *fm, u32 *id);
+int sparx5_psfp_fm_del(struct sparx5 *sparx5, u32 id);
+
+int sparx5_psfp_sg_add(struct sparx5 *sparx5, u32 uidx,
+ struct sparx5_psfp_sg *sg, u32 *id);
+int sparx5_psfp_sg_del(struct sparx5 *sparx5, u32 id);
+
+int sparx5_psfp_sf_add(struct sparx5 *sparx5, const struct sparx5_psfp_sf *sf,
+ u32 *id);
+int sparx5_psfp_sf_del(struct sparx5 *sparx5, u32 id);
+
+u32 sparx5_psfp_isdx_get_sf(struct sparx5 *sparx5, u32 isdx);
+u32 sparx5_psfp_isdx_get_fm(struct sparx5 *sparx5, u32 isdx);
+u32 sparx5_psfp_sf_get_sg(struct sparx5 *sparx5, u32 sfid);
+void sparx5_isdx_conf_set(struct sparx5 *sparx5, u32 isdx, u32 sfid, u32 fmid);
+
+void sparx5_psfp_init(struct sparx5 *sparx5);
+
+/* sparx5_qos.c */
+void sparx5_new_base_time(struct sparx5 *sparx5, const u32 cycle_time,
+ const ktime_t org_base_time, ktime_t *new_base_time);
+
/* Clock period in picoseconds */
static inline u32 sparx5_clk_period(enum sparx5_core_clockfreq cclock)
{
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h
index 4813433b435c..bd73742939d3 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h
@@ -19,6 +19,7 @@ enum sparx5_target {
TARGET_ANA_AC = 1,
TARGET_ANA_ACL = 2,
TARGET_ANA_AC_POL = 4,
+ TARGET_ANA_AC_SDLB = 5,
TARGET_ANA_CL = 6,
TARGET_ANA_L2 = 7,
TARGET_ANA_L3 = 8,
@@ -130,6 +131,254 @@ enum sparx5_target {
#define ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_GET(x)\
FIELD_GET(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA, x)
+/* ANA_AC:TSN_SF:TSN_SF */
+#define ANA_AC_TSN_SF \
+ __REG(TARGET_ANA_AC, 0, 1, 839136, 0, 1, 4, 0, 0, 1, 4)
+
+#define ANA_AC_TSN_SF_TSN_STREAM_BLOCK_OVERSIZE_STICKY BIT(9)
+#define ANA_AC_TSN_SF_TSN_STREAM_BLOCK_OVERSIZE_STICKY_SET(x)\
+ FIELD_PREP(ANA_AC_TSN_SF_TSN_STREAM_BLOCK_OVERSIZE_STICKY, x)
+#define ANA_AC_TSN_SF_TSN_STREAM_BLOCK_OVERSIZE_STICKY_GET(x)\
+ FIELD_GET(ANA_AC_TSN_SF_TSN_STREAM_BLOCK_OVERSIZE_STICKY, x)
+
+#define ANA_AC_TSN_SF_PORT_NUM GENMASK(8, 0)
+#define ANA_AC_TSN_SF_PORT_NUM_SET(x)\
+ FIELD_PREP(ANA_AC_TSN_SF_PORT_NUM, x)
+#define ANA_AC_TSN_SF_PORT_NUM_GET(x)\
+ FIELD_GET(ANA_AC_TSN_SF_PORT_NUM, x)
+
+/* ANA_AC:TSN_SF_CFG:TSN_SF_CFG */
+#define ANA_AC_TSN_SF_CFG(g) \
+ __REG(TARGET_ANA_AC, 0, 1, 839680, g, 1024, 4, 0, 0, 1, 4)
+
+#define ANA_AC_TSN_SF_CFG_TSN_SGID GENMASK(25, 16)
+#define ANA_AC_TSN_SF_CFG_TSN_SGID_SET(x)\
+ FIELD_PREP(ANA_AC_TSN_SF_CFG_TSN_SGID, x)
+#define ANA_AC_TSN_SF_CFG_TSN_SGID_GET(x)\
+ FIELD_GET(ANA_AC_TSN_SF_CFG_TSN_SGID, x)
+
+#define ANA_AC_TSN_SF_CFG_TSN_MAX_SDU GENMASK(15, 2)
+#define ANA_AC_TSN_SF_CFG_TSN_MAX_SDU_SET(x)\
+ FIELD_PREP(ANA_AC_TSN_SF_CFG_TSN_MAX_SDU, x)
+#define ANA_AC_TSN_SF_CFG_TSN_MAX_SDU_GET(x)\
+ FIELD_GET(ANA_AC_TSN_SF_CFG_TSN_MAX_SDU, x)
+
+#define ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_ENA BIT(1)
+#define ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_ENA_SET(x) \
+ FIELD_PREP(ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_ENA, x)
+#define ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_ENA_GET(x) \
+ FIELD_GET(ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_ENA, x)
+
+#define ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_STATE BIT(0)
+#define ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_STATE_SET(x) \
+ FIELD_PREP(ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_STATE, x)
+#define ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_STATE_GET(x) \
+ FIELD_GET(ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_STATE, x)
+
+/* ANA_AC:TSN_SF_STATUS:TSN_SF_STATUS */
+#define ANA_AC_TSN_SF_STATUS \
+ __REG(TARGET_ANA_AC, 0, 1, 839072, 0, 1, 16, 0, 0, 1, 4)
+
+#define ANA_AC_TSN_SF_STATUS_FRM_LEN GENMASK(25, 12)
+#define ANA_AC_TSN_SF_STATUS_FRM_LEN_SET(x)\
+ FIELD_PREP(ANA_AC_TSN_SF_STATUS_FRM_LEN, x)
+#define ANA_AC_TSN_SF_STATUS_FRM_LEN_GET(x)\
+ FIELD_GET(ANA_AC_TSN_SF_STATUS_FRM_LEN, x)
+
+#define ANA_AC_TSN_SF_STATUS_DLB_DROP BIT(11)
+#define ANA_AC_TSN_SF_STATUS_DLB_DROP_SET(x)\
+ FIELD_PREP(ANA_AC_TSN_SF_STATUS_DLB_DROP, x)
+#define ANA_AC_TSN_SF_STATUS_DLB_DROP_GET(x)\
+ FIELD_GET(ANA_AC_TSN_SF_STATUS_DLB_DROP, x)
+
+#define ANA_AC_TSN_SF_STATUS_TSN_SFID GENMASK(10, 1)
+#define ANA_AC_TSN_SF_STATUS_TSN_SFID_SET(x)\
+ FIELD_PREP(ANA_AC_TSN_SF_STATUS_TSN_SFID, x)
+#define ANA_AC_TSN_SF_STATUS_TSN_SFID_GET(x)\
+ FIELD_GET(ANA_AC_TSN_SF_STATUS_TSN_SFID, x)
+
+#define ANA_AC_TSN_SF_STATUS_TSTAMP_VLD BIT(0)
+#define ANA_AC_TSN_SF_STATUS_TSTAMP_VLD_SET(x)\
+ FIELD_PREP(ANA_AC_TSN_SF_STATUS_TSTAMP_VLD, x)
+#define ANA_AC_TSN_SF_STATUS_TSTAMP_VLD_GET(x)\
+ FIELD_GET(ANA_AC_TSN_SF_STATUS_TSTAMP_VLD, x)
+
+/* ANA_AC:SG_ACCESS:SG_ACCESS_CTRL */
+#define ANA_AC_SG_ACCESS_CTRL \
+ __REG(TARGET_ANA_AC, 0, 1, 839140, 0, 1, 12, 0, 0, 1, 4)
+
+#define ANA_AC_SG_ACCESS_CTRL_SGID GENMASK(9, 0)
+#define ANA_AC_SG_ACCESS_CTRL_SGID_SET(x)\
+ FIELD_PREP(ANA_AC_SG_ACCESS_CTRL_SGID, x)
+#define ANA_AC_SG_ACCESS_CTRL_SGID_GET(x)\
+ FIELD_GET(ANA_AC_SG_ACCESS_CTRL_SGID, x)
+
+#define ANA_AC_SG_ACCESS_CTRL_CONFIG_CHANGE BIT(28)
+#define ANA_AC_SG_ACCESS_CTRL_CONFIG_CHANGE_SET(x)\
+ FIELD_PREP(ANA_AC_SG_ACCESS_CTRL_CONFIG_CHANGE, x)
+#define ANA_AC_SG_ACCESS_CTRL_CONFIG_CHANGE_GET(x)\
+ FIELD_GET(ANA_AC_SG_ACCESS_CTRL_CONFIG_CHANGE, x)
+
+/* ANA_AC:SG_ACCESS:SG_CYCLETIME_UPDATE_PERIOD */
+#define ANA_AC_SG_CYCLETIME_UPDATE_PERIOD \
+ __REG(TARGET_ANA_AC, 0, 1, 839140, 0, 1, 12, 8, 0, 1, 4)
+
+#define ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_CLKS GENMASK(15, 0)
+#define ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_CLKS_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_CLKS, x)
+#define ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_CLKS_GET(x)\
+ FIELD_GET(ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_CLKS, x)
+
+#define ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_UPDATE_ENA BIT(31)
+#define ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_UPDATE_ENA_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_UPDATE_ENA, x)
+#define ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_UPDATE_ENA_GET(x)\
+ FIELD_GET(ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_UPDATE_ENA, x)
+
+/* ANA_AC:SG_CONFIG:SG_CONFIG_REG_1 */
+#define ANA_AC_SG_CONFIG_REG_1 \
+ __REG(TARGET_ANA_AC, 0, 1, 851584, 0, 1, 128, 48, 0, 1, 4)
+
+/* ANA_AC:SG_CONFIG:SG_CONFIG_REG_2 */
+#define ANA_AC_SG_CONFIG_REG_2 \
+ __REG(TARGET_ANA_AC, 0, 1, 851584, 0, 1, 128, 52, 0, 1, 4)
+
+/* ANA_AC:SG_CONFIG:SG_CONFIG_REG_3 */
+#define ANA_AC_SG_CONFIG_REG_3 \
+ __REG(TARGET_ANA_AC, 0, 1, 851584, 0, 1, 128, 56, 0, 1, 4)
+
+#define ANA_AC_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB GENMASK(15, 0)
+#define ANA_AC_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB, x)
+#define ANA_AC_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB_GET(x)\
+ FIELD_GET(ANA_AC_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB, x)
+
+#define ANA_AC_SG_CONFIG_REG_3_LIST_LENGTH GENMASK(18, 16)
+#define ANA_AC_SG_CONFIG_REG_3_LIST_LENGTH_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CONFIG_REG_3_LIST_LENGTH, x)
+#define ANA_AC_SG_CONFIG_REG_3_LIST_LENGTH_GET(x)\
+ FIELD_GET(ANA_AC_SG_CONFIG_REG_3_LIST_LENGTH, x)
+
+#define ANA_AC_SG_CONFIG_REG_3_GATE_ENABLE BIT(20)
+#define ANA_AC_SG_CONFIG_REG_3_GATE_ENABLE_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CONFIG_REG_3_GATE_ENABLE, x)
+#define ANA_AC_SG_CONFIG_REG_3_GATE_ENABLE_GET(x)\
+ FIELD_GET(ANA_AC_SG_CONFIG_REG_3_GATE_ENABLE, x)
+
+#define ANA_AC_SG_CONFIG_REG_3_INIT_IPS GENMASK(24, 21)
+#define ANA_AC_SG_CONFIG_REG_3_INIT_IPS_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CONFIG_REG_3_INIT_IPS, x)
+#define ANA_AC_SG_CONFIG_REG_3_INIT_IPS_GET(x)\
+ FIELD_GET(ANA_AC_SG_CONFIG_REG_3_INIT_IPS, x)
+
+#define ANA_AC_SG_CONFIG_REG_3_INIT_GATE_STATE BIT(25)
+#define ANA_AC_SG_CONFIG_REG_3_INIT_GATE_STATE_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CONFIG_REG_3_INIT_GATE_STATE, x)
+#define ANA_AC_SG_CONFIG_REG_3_INIT_GATE_STATE_GET(x)\
+ FIELD_GET(ANA_AC_SG_CONFIG_REG_3_INIT_GATE_STATE, x)
+
+#define ANA_AC_SG_CONFIG_REG_3_INVALID_RX_ENA BIT(26)
+#define ANA_AC_SG_CONFIG_REG_3_INVALID_RX_ENA_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CONFIG_REG_3_INVALID_RX_ENA, x)
+#define ANA_AC_SG_CONFIG_REG_3_INVALID_RX_ENA_GET(x)\
+ FIELD_GET(ANA_AC_SG_CONFIG_REG_3_INVALID_RX_ENA, x)
+
+#define ANA_AC_SG_CONFIG_REG_3_INVALID_RX BIT(27)
+#define ANA_AC_SG_CONFIG_REG_3_INVALID_RX_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CONFIG_REG_3_INVALID_RX, x)
+#define ANA_AC_SG_CONFIG_REG_3_INVALID_RX_GET(x)\
+ FIELD_GET(ANA_AC_SG_CONFIG_REG_3_INVALID_RX, x)
+
+#define ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED_ENA BIT(28)
+#define ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED_ENA_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED_ENA, x)
+#define ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED_ENA_GET(x)\
+ FIELD_GET(ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED_ENA, x)
+
+#define ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED BIT(29)
+#define ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED_SET(x)\
+ FIELD_PREP(ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED, x)
+#define ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED_GET(x)\
+ FIELD_GET(ANA_AC_SG_CONFIG_REG_3_OCTETS_EXCEEDED, x)
+
+/* ANA_AC:SG_CONFIG:SG_CONFIG_REG_4 */
+#define ANA_AC_SG_CONFIG_REG_4 \
+ __REG(TARGET_ANA_AC, 0, 1, 851584, 0, 1, 128, 60, 0, 1, 4)
+
+/* ANA_AC:SG_CONFIG:SG_CONFIG_REG_5 */
+#define ANA_AC_SG_CONFIG_REG_5 \
+ __REG(TARGET_ANA_AC, 0, 1, 851584, 0, 1, 128, 64, 0, 1, 4)
+
+/* ANA_AC:SG_CONFIG:SG_GCL_GS_CONFIG */
+#define ANA_AC_SG_GCL_GS_CONFIG(r) \
+ __REG(TARGET_ANA_AC, 0, 1, 851584, 0, 1, 128, 0, r, 4, 4)
+
+#define ANA_AC_SG_GCL_GS_CONFIG_IPS GENMASK(3, 0)
+#define ANA_AC_SG_GCL_GS_CONFIG_IPS_SET(x)\
+ FIELD_PREP(ANA_AC_SG_GCL_GS_CONFIG_IPS, x)
+#define ANA_AC_SG_GCL_GS_CONFIG_IPS_GET(x)\
+ FIELD_GET(ANA_AC_SG_GCL_GS_CONFIG_IPS, x)
+
+#define ANA_AC_SG_GCL_GS_CONFIG_GATE_STATE BIT(4)
+#define ANA_AC_SG_GCL_GS_CONFIG_GATE_STATE_SET(x)\
+ FIELD_PREP(ANA_AC_SG_GCL_GS_CONFIG_GATE_STATE, x)
+#define ANA_AC_SG_GCL_GS_CONFIG_GATE_STATE_GET(x)\
+ FIELD_GET(ANA_AC_SG_GCL_GS_CONFIG_GATE_STATE, x)
+
+/* ANA_AC:SG_CONFIG:SG_GCL_TI_CONFIG */
+#define ANA_AC_SG_GCL_TI_CONFIG(r) \
+ __REG(TARGET_ANA_AC, 0, 1, 851584, 0, 1, 128, 16, r, 4, 4)
+
+/* ANA_AC:SG_CONFIG:SG_GCL_OCT_CONFIG */
+#define ANA_AC_SG_GCL_OCT_CONFIG(r) \
+ __REG(TARGET_ANA_AC, 0, 1, 851584, 0, 1, 128, 32, r, 4, 4)
+
+/* ANA_AC:SG_STATUS:SG_STATUS_REG_1 */
+#define ANA_AC_SG_STATUS_REG_1 \
+ __REG(TARGET_ANA_AC, 0, 1, 839088, 0, 1, 16, 0, 0, 1, 4)
+
+/* ANA_AC:SG_STATUS:SG_STATUS_REG_2 */
+#define ANA_AC_SG_STATUS_REG_2 \
+ __REG(TARGET_ANA_AC, 0, 1, 839088, 0, 1, 16, 4, 0, 1, 4)
+
+/* ANA_AC:SG_STATUS:SG_STATUS_REG_3 */
+#define ANA_AC_SG_STATUS_REG_3 \
+ __REG(TARGET_ANA_AC, 0, 1, 839088, 0, 1, 16, 8, 0, 1, 4)
+
+#define ANA_AC_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB GENMASK(15, 0)
+#define ANA_AC_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB_SET(x)\
+ FIELD_PREP(ANA_AC_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB, x)
+#define ANA_AC_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB_GET(x)\
+ FIELD_GET(ANA_AC_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB, x)
+
+#define ANA_AC_SG_STATUS_REG_3_GATE_STATE BIT(16)
+#define ANA_AC_SG_STATUS_REG_3_GATE_STATE_SET(x)\
+ FIELD_PREP(ANA_AC_SG_STATUS_REG_3_GATE_STATE, x)
+#define ANA_AC_SG_STATUS_REG_3_GATE_STATE_GET(x)\
+ FIELD_GET(ANA_AC_SG_STATUS_REG_3_GATE_STATE, x)
+
+#define ANA_AC_SG_STATUS_REG_3_IPS GENMASK(23, 20)
+#define ANA_AC_SG_STATUS_REG_3_IPS_SET(x)\
+ FIELD_PREP(ANA_AC_SG_STATUS_REG_3_IPS, x)
+#define ANA_AC_SG_STATUS_REG_3_IPS_GET(x)\
+ FIELD_GET(ANA_AC_SG_STATUS_REG_3_IPS, x)
+
+#define ANA_AC_SG_STATUS_REG_3_CONFIG_PENDING BIT(24)
+#define ANA_AC_SG_STATUS_REG_3_CONFIG_PENDING_SET(x)\
+ FIELD_PREP(ANA_AC_SG_STATUS_REG_3_CONFIG_PENDING, x)
+#define ANA_AC_SG_STATUS_REG_3_CONFIG_PENDING_GET(x)\
+ FIELD_GET(ANA_AC_SG_STATUS_REG_3_CONFIG_PENDING, x)
+
+#define ANA_AC_SG_STATUS_REG_3_GCL_OCTET_INDEX GENMASK(27, 25)
+#define ANA_AC_SG_STATUS_REG_3_GCL_OCTET_INDEX_SET(x)\
+ FIELD_PREP(ANA_AC_SG_STATUS_REG_3_GCL_OCTET_INDEX, x)
+#define ANA_AC_SG_STATUS_REG_3_GCL_OCTET_INDEX_GET(x)\
+ FIELD_GET(ANA_AC_SG_STATUS_REG_3_GCL_OCTET_INDEX, x)
+
+/* ANA_AC:SG_STATUS:SG_STATUS_REG_4 */
+#define ANA_AC_SG_STATUS_REG_4 \
+ __REG(TARGET_ANA_AC, 0, 1, 839088, 0, 1, 16, 12, 0, 1, 4)
+
/* ANA_AC:STAT_GLOBAL_CFG_PORT:STAT_GLOBAL_EVENT_MASK */
#define ANA_AC_PORT_SGE_CFG(r) __REG(TARGET_ANA_AC, 0, 1, 851552, 0, 1, 20, 0, r, 4, 4)
@@ -568,6 +817,232 @@ enum sparx5_target {
#define ANA_AC_POL_SLB_DLB_CTRL_DLB_ADD_ENA_GET(x)\
FIELD_GET(ANA_AC_POL_SLB_DLB_CTRL_DLB_ADD_ENA, x)
+/* ANA_AC_SDLB:LBGRP_TBL:XLB_START */
+#define ANA_AC_SDLB_XLB_START(g) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 295468, g, 10, 24, 0, 0, 1, 4)
+
+#define ANA_AC_SDLB_XLB_START_LBSET_START GENMASK(12, 0)
+#define ANA_AC_SDLB_XLB_START_LBSET_START_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_XLB_START_LBSET_START, x)
+#define ANA_AC_SDLB_XLB_START_LBSET_START_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_XLB_START_LBSET_START, x)
+
+/* ANA_AC_SDLB:LBGRP_TBL:PUP_INTERVAL */
+#define ANA_AC_SDLB_PUP_INTERVAL(g) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 295468, g, 10, 24, 4, 0, 1, 4)
+
+#define ANA_AC_SDLB_PUP_INTERVAL_PUP_INTERVAL GENMASK(19, 0)
+#define ANA_AC_SDLB_PUP_INTERVAL_PUP_INTERVAL_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_PUP_INTERVAL_PUP_INTERVAL, x)
+#define ANA_AC_SDLB_PUP_INTERVAL_PUP_INTERVAL_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_PUP_INTERVAL_PUP_INTERVAL, x)
+
+/* ANA_AC_SDLB:LBGRP_TBL:PUP_CTRL */
+#define ANA_AC_SDLB_PUP_CTRL(g) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 295468, g, 10, 24, 8, 0, 1, 4)
+
+#define ANA_AC_SDLB_PUP_CTRL_PUP_LB_DT GENMASK(18, 0)
+#define ANA_AC_SDLB_PUP_CTRL_PUP_LB_DT_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_PUP_CTRL_PUP_LB_DT, x)
+#define ANA_AC_SDLB_PUP_CTRL_PUP_LB_DT_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_PUP_CTRL_PUP_LB_DT, x)
+
+#define ANA_AC_SDLB_PUP_CTRL_PUP_ENA BIT(24)
+#define ANA_AC_SDLB_PUP_CTRL_PUP_ENA_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_PUP_CTRL_PUP_ENA, x)
+#define ANA_AC_SDLB_PUP_CTRL_PUP_ENA_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_PUP_CTRL_PUP_ENA, x)
+
+/* ANA_AC_SDLB:LBGRP_TBL:LBGRP_MISC */
+#define ANA_AC_SDLB_LBGRP_MISC(g)\
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 295468, g, 10, 24, 12, 0, 1, 4)
+
+#define ANA_AC_SDLB_LBGRP_MISC_THRES_SHIFT GENMASK(12, 8)
+#define ANA_AC_SDLB_LBGRP_MISC_THRES_SHIFT_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_LBGRP_MISC_THRES_SHIFT, x)
+#define ANA_AC_SDLB_LBGRP_MISC_THRES_SHIFT_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_LBGRP_MISC_THRES_SHIFT, x)
+
+/* ANA_AC_SDLB:LBGRP_TBL:FRM_RATE_TOKENS */
+#define ANA_AC_SDLB_FRM_RATE_TOKENS(g) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 295468, g, 10, 24, 16, 0, 1, 4)
+
+#define ANA_AC_SDLB_FRM_RATE_TOKENS_FRM_RATE_TOKENS GENMASK(12, 0)
+#define ANA_AC_SDLB_FRM_RATE_TOKENS_FRM_RATE_TOKENS_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_FRM_RATE_TOKENS_FRM_RATE_TOKENS, x)
+#define ANA_AC_SDLB_FRM_RATE_TOKENS_FRM_RATE_TOKENS_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_FRM_RATE_TOKENS_FRM_RATE_TOKENS, x)
+
+/* ANA_AC_SDLB:LBGRP_TBL:LBGRP_STATE_TBL */
+#define ANA_AC_SDLB_LBGRP_STATE_TBL(g) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 295468, g, 10, 24, 20, 0, 1, 4)
+
+#define ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_ONGOING BIT(0)
+#define ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_ONGOING_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_ONGOING, x)
+#define ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_ONGOING_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_ONGOING, x)
+
+#define ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_WAIT_ACK BIT(1)
+#define ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_WAIT_ACK_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_WAIT_ACK, x)
+#define ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_WAIT_ACK_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_WAIT_ACK, x)
+
+#define ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_LBSET_NEXT GENMASK(28, 16)
+#define ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_LBSET_NEXT_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_LBSET_NEXT, x)
+#define ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_LBSET_NEXT_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_LBGRP_STATE_TBL_PUP_LBSET_NEXT, x)
+
+/* ANA_AC_SDLB:LBSET_TBL:PUP_TOKENS */
+#define ANA_AC_SDLB_PUP_TOKENS(g, r) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 0, g, 4616, 64, 0, r, 2, 4)
+
+#define ANA_AC_SDLB_PUP_TOKENS_PUP_TOKENS GENMASK(12, 0)
+#define ANA_AC_SDLB_PUP_TOKENS_PUP_TOKENS_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_PUP_TOKENS_PUP_TOKENS, x)
+#define ANA_AC_SDLB_PUP_TOKENS_PUP_TOKENS_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_PUP_TOKENS_PUP_TOKENS, x)
+
+/* ANA_AC_SDLB:LBSET_TBL:THRES */
+#define ANA_AC_SDLB_THRES(g, r) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 0, g, 4616, 64, 8, r, 2, 4)
+
+#define ANA_AC_SDLB_THRES_THRES GENMASK(9, 0)
+#define ANA_AC_SDLB_THRES_THRES_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_THRES_THRES, x)
+#define ANA_AC_SDLB_THRES_THRES_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_THRES_THRES, x)
+
+#define ANA_AC_SDLB_THRES_THRES_HYS GENMASK(25, 16)
+#define ANA_AC_SDLB_THRES_THRES_HYS_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_THRES_THRES_HYS, x)
+#define ANA_AC_SDLB_THRES_THRES_HYS_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_THRES_THRES_HYS, x)
+
+/* ANA_AC_SDLB:LBSET_TBL:XLB_NEXT */
+#define ANA_AC_SDLB_XLB_NEXT(g) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 0, g, 4616, 64, 16, 0, 1, 4)
+
+#define ANA_AC_SDLB_XLB_NEXT_LBSET_NEXT GENMASK(12, 0)
+#define ANA_AC_SDLB_XLB_NEXT_LBSET_NEXT_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_XLB_NEXT_LBSET_NEXT, x)
+#define ANA_AC_SDLB_XLB_NEXT_LBSET_NEXT_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_XLB_NEXT_LBSET_NEXT, x)
+
+#define ANA_AC_SDLB_XLB_NEXT_LBGRP GENMASK(27, 24)
+#define ANA_AC_SDLB_XLB_NEXT_LBGRP_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_XLB_NEXT_LBGRP, x)
+#define ANA_AC_SDLB_XLB_NEXT_LBGRP_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_XLB_NEXT_LBGRP, x)
+
+/* ANA_AC_SDLB:LBSET_TBL:INH_CTRL */
+#define ANA_AC_SDLB_INH_CTRL(g, r) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 0, g, 4616, 64, 20, r, 2, 4)
+
+#define ANA_AC_SDLB_INH_CTRL_PUP_TOKENS_MAX GENMASK(12, 0)
+#define ANA_AC_SDLB_INH_CTRL_PUP_TOKENS_MAX_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_INH_CTRL_PUP_TOKENS_MAX, x)
+#define ANA_AC_SDLB_INH_CTRL_PUP_TOKENS_MAX_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_INH_CTRL_PUP_TOKENS_MAX, x)
+
+#define ANA_AC_SDLB_INH_CTRL_INH_MODE GENMASK(21, 20)
+#define ANA_AC_SDLB_INH_CTRL_INH_MODE_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_INH_CTRL_INH_MODE, x)
+#define ANA_AC_SDLB_INH_CTRL_INH_MODE_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_INH_CTRL_INH_MODE, x)
+
+#define ANA_AC_SDLB_INH_CTRL_INH_LB BIT(24)
+#define ANA_AC_SDLB_INH_CTRL_INH_LB_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_INH_CTRL_INH_LB, x)
+#define ANA_AC_SDLB_INH_CTRL_INH_LB_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_INH_CTRL_INH_LB, x)
+
+/* ANA_AC_SDLB:LBSET_TBL:INH_LBSET_ADDR */
+#define ANA_AC_SDLB_INH_LBSET_ADDR(g) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 0, g, 4616, 64, 28, 0, 1, 4)
+
+#define ANA_AC_SDLB_INH_LBSET_ADDR_INH_LBSET_ADDR GENMASK(12, 0)
+#define ANA_AC_SDLB_INH_LBSET_ADDR_INH_LBSET_ADDR_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_INH_LBSET_ADDR_INH_LBSET_ADDR, x)
+#define ANA_AC_SDLB_INH_LBSET_ADDR_INH_LBSET_ADDR_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_INH_LBSET_ADDR_INH_LBSET_ADDR, x)
+
+/* ANA_AC_SDLB:LBSET_TBL:DLB_MISC */
+#define ANA_AC_SDLB_DLB_MISC(g) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 0, g, 4616, 64, 32, 0, 1, 4)
+
+#define ANA_AC_SDLB_DLB_MISC_DLB_FRM_RATE_ENA BIT(0)
+#define ANA_AC_SDLB_DLB_MISC_DLB_FRM_RATE_ENA_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_MISC_DLB_FRM_RATE_ENA, x)
+#define ANA_AC_SDLB_DLB_MISC_DLB_FRM_RATE_ENA_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_MISC_DLB_FRM_RATE_ENA, x)
+
+#define ANA_AC_SDLB_DLB_MISC_MARK_ALL_FRMS_RED_ENA BIT(6)
+#define ANA_AC_SDLB_DLB_MISC_MARK_ALL_FRMS_RED_ENA_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_MISC_MARK_ALL_FRMS_RED_ENA, x)
+#define ANA_AC_SDLB_DLB_MISC_MARK_ALL_FRMS_RED_ENA_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_MISC_MARK_ALL_FRMS_RED_ENA, x)
+
+#define ANA_AC_SDLB_DLB_MISC_DLB_FRM_ADJ GENMASK(14, 8)
+#define ANA_AC_SDLB_DLB_MISC_DLB_FRM_ADJ_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_MISC_DLB_FRM_ADJ, x)
+#define ANA_AC_SDLB_DLB_MISC_DLB_FRM_ADJ_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_MISC_DLB_FRM_ADJ, x)
+
+/* ANA_AC_SDLB:LBSET_TBL:DLB_CFG */
+#define ANA_AC_SDLB_DLB_CFG(g) \
+ __REG(TARGET_ANA_AC_SDLB, 0, 1, 0, g, 4616, 64, 36, 0, 1, 4)
+
+#define ANA_AC_SDLB_DLB_CFG_DROP_ON_YELLOW_ENA BIT(11)
+#define ANA_AC_SDLB_DLB_CFG_DROP_ON_YELLOW_ENA_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_CFG_DROP_ON_YELLOW_ENA, x)
+#define ANA_AC_SDLB_DLB_CFG_DROP_ON_YELLOW_ENA_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_CFG_DROP_ON_YELLOW_ENA, x)
+
+#define ANA_AC_SDLB_DLB_CFG_DP_BYPASS_LVL GENMASK(10, 9)
+#define ANA_AC_SDLB_DLB_CFG_DP_BYPASS_LVL_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_CFG_DP_BYPASS_LVL, x)
+#define ANA_AC_SDLB_DLB_CFG_DP_BYPASS_LVL_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_CFG_DP_BYPASS_LVL, x)
+
+#define ANA_AC_SDLB_DLB_CFG_HIER_DLB_DIS BIT(8)
+#define ANA_AC_SDLB_DLB_CFG_HIER_DLB_DIS_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_CFG_HIER_DLB_DIS, x)
+#define ANA_AC_SDLB_DLB_CFG_HIER_DLB_DIS_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_CFG_HIER_DLB_DIS, x)
+
+#define ANA_AC_SDLB_DLB_CFG_ENCAP_DATA_DIS BIT(7)
+#define ANA_AC_SDLB_DLB_CFG_ENCAP_DATA_DIS_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_CFG_ENCAP_DATA_DIS, x)
+#define ANA_AC_SDLB_DLB_CFG_ENCAP_DATA_DIS_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_CFG_ENCAP_DATA_DIS, x)
+
+#define ANA_AC_SDLB_DLB_CFG_COLOR_AWARE_LVL GENMASK(6, 5)
+#define ANA_AC_SDLB_DLB_CFG_COLOR_AWARE_LVL_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_CFG_COLOR_AWARE_LVL, x)
+#define ANA_AC_SDLB_DLB_CFG_COLOR_AWARE_LVL_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_CFG_COLOR_AWARE_LVL, x)
+
+#define ANA_AC_SDLB_DLB_CFG_CIR_INC_DP_VAL GENMASK(4, 3)
+#define ANA_AC_SDLB_DLB_CFG_CIR_INC_DP_VAL_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_CFG_CIR_INC_DP_VAL, x)
+#define ANA_AC_SDLB_DLB_CFG_CIR_INC_DP_VAL_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_CFG_CIR_INC_DP_VAL, x)
+
+#define ANA_AC_SDLB_DLB_CFG_DLB_MODE BIT(2)
+#define ANA_AC_SDLB_DLB_CFG_DLB_MODE_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_CFG_DLB_MODE, x)
+#define ANA_AC_SDLB_DLB_CFG_DLB_MODE_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_CFG_DLB_MODE, x)
+
+#define ANA_AC_SDLB_DLB_CFG_TRAFFIC_TYPE_MASK GENMASK(1, 0)
+#define ANA_AC_SDLB_DLB_CFG_TRAFFIC_TYPE_MASK_SET(x)\
+ FIELD_PREP(ANA_AC_SDLB_DLB_CFG_TRAFFIC_TYPE_MASK, x)
+#define ANA_AC_SDLB_DLB_CFG_TRAFFIC_TYPE_MASK_GET(x)\
+ FIELD_GET(ANA_AC_SDLB_DLB_CFG_TRAFFIC_TYPE_MASK, x)
+
/* ANA_CL:PORT:FILTER_CTRL */
#define ANA_CL_FILTER_CTRL(g) __REG(TARGET_ANA_CL, 0, 1, 131072, g, 70, 512, 4, 0, 1, 4)
@@ -956,6 +1431,82 @@ enum sparx5_target {
#define ANA_CL_QOS_MAP_CFG_DSCP_REWR_VAL_GET(x)\
FIELD_GET(ANA_CL_QOS_MAP_CFG_DSCP_REWR_VAL, x)
+/* ANA_L2:COMMON:FWD_CFG */
+#define ANA_L2_FWD_CFG \
+ __REG(TARGET_ANA_L2, 0, 1, 566024, 0, 1, 700, 0, 0, 1, 4)
+
+#define ANA_L2_FWD_CFG_MAC_TBL_SPLIT_SEL GENMASK(21, 20)
+#define ANA_L2_FWD_CFG_MAC_TBL_SPLIT_SEL_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_MAC_TBL_SPLIT_SEL, x)
+#define ANA_L2_FWD_CFG_MAC_TBL_SPLIT_SEL_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_MAC_TBL_SPLIT_SEL, x)
+
+#define ANA_L2_FWD_CFG_PORT_DEFAULT_BDLB_ENA BIT(18)
+#define ANA_L2_FWD_CFG_PORT_DEFAULT_BDLB_ENA_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_PORT_DEFAULT_BDLB_ENA, x)
+#define ANA_L2_FWD_CFG_PORT_DEFAULT_BDLB_ENA_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_PORT_DEFAULT_BDLB_ENA, x)
+
+#define ANA_L2_FWD_CFG_QUEUE_DEFAULT_SDLB_ENA BIT(17)
+#define ANA_L2_FWD_CFG_QUEUE_DEFAULT_SDLB_ENA_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_QUEUE_DEFAULT_SDLB_ENA, x)
+#define ANA_L2_FWD_CFG_QUEUE_DEFAULT_SDLB_ENA_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_QUEUE_DEFAULT_SDLB_ENA, x)
+
+#define ANA_L2_FWD_CFG_ISDX_LOOKUP_ENA BIT(16)
+#define ANA_L2_FWD_CFG_ISDX_LOOKUP_ENA_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_ISDX_LOOKUP_ENA, x)
+#define ANA_L2_FWD_CFG_ISDX_LOOKUP_ENA_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_ISDX_LOOKUP_ENA, x)
+
+#define ANA_L2_FWD_CFG_CPU_DMAC_QU GENMASK(10, 8)
+#define ANA_L2_FWD_CFG_CPU_DMAC_QU_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_CPU_DMAC_QU, x)
+#define ANA_L2_FWD_CFG_CPU_DMAC_QU_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_CPU_DMAC_QU, x)
+
+#define ANA_L2_FWD_CFG_LOOPBACK_ENA BIT(7)
+#define ANA_L2_FWD_CFG_LOOPBACK_ENA_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_LOOPBACK_ENA, x)
+#define ANA_L2_FWD_CFG_LOOPBACK_ENA_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_LOOPBACK_ENA, x)
+
+#define ANA_L2_FWD_CFG_CPU_DMAC_COPY_ENA BIT(6)
+#define ANA_L2_FWD_CFG_CPU_DMAC_COPY_ENA_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_CPU_DMAC_COPY_ENA, x)
+#define ANA_L2_FWD_CFG_CPU_DMAC_COPY_ENA_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_CPU_DMAC_COPY_ENA, x)
+
+#define ANA_L2_FWD_CFG_FILTER_MODE_SEL BIT(4)
+#define ANA_L2_FWD_CFG_FILTER_MODE_SEL_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_FILTER_MODE_SEL, x)
+#define ANA_L2_FWD_CFG_FILTER_MODE_SEL_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_FILTER_MODE_SEL, x)
+
+#define ANA_L2_FWD_CFG_FLOOD_MIRROR_ENA BIT(3)
+#define ANA_L2_FWD_CFG_FLOOD_MIRROR_ENA_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_FLOOD_MIRROR_ENA, x)
+#define ANA_L2_FWD_CFG_FLOOD_MIRROR_ENA_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_FLOOD_MIRROR_ENA, x)
+
+#define ANA_L2_FWD_CFG_FLOOD_IGNORE_VLAN_ENA BIT(2)
+#define ANA_L2_FWD_CFG_FLOOD_IGNORE_VLAN_ENA_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_FLOOD_IGNORE_VLAN_ENA, x)
+#define ANA_L2_FWD_CFG_FLOOD_IGNORE_VLAN_ENA_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_FLOOD_IGNORE_VLAN_ENA, x)
+
+#define ANA_L2_FWD_CFG_FLOOD_CPU_COPY_ENA BIT(1)
+#define ANA_L2_FWD_CFG_FLOOD_CPU_COPY_ENA_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_FLOOD_CPU_COPY_ENA, x)
+#define ANA_L2_FWD_CFG_FLOOD_CPU_COPY_ENA_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_FLOOD_CPU_COPY_ENA, x)
+
+#define ANA_L2_FWD_CFG_FWD_ENA BIT(0)
+#define ANA_L2_FWD_CFG_FWD_ENA_SET(x)\
+ FIELD_PREP(ANA_L2_FWD_CFG_FWD_ENA, x)
+#define ANA_L2_FWD_CFG_FWD_ENA_GET(x)\
+ FIELD_GET(ANA_L2_FWD_CFG_FWD_ENA, x)
+
/* ANA_L2:COMMON:AUTO_LRN_CFG */
#define ANA_L2_AUTO_LRN_CFG __REG(TARGET_ANA_L2, 0, 1, 566024, 0, 1, 700, 24, 0, 1, 4)
@@ -980,6 +1531,26 @@ enum sparx5_target {
#define ANA_L2_OWN_UPSID_OWN_UPSID_GET(x)\
FIELD_GET(ANA_L2_OWN_UPSID_OWN_UPSID, x)
+/* ANA_L2:ISDX:DLB_CFG */
+#define ANA_L2_DLB_CFG(g) \
+ __REG(TARGET_ANA_L2, 0, 1, 0, g, 4096, 128, 56, 0, 1, 4)
+
+#define ANA_L2_DLB_CFG_DLB_IDX GENMASK(12, 0)
+#define ANA_L2_DLB_CFG_DLB_IDX_SET(x)\
+ FIELD_PREP(ANA_L2_DLB_CFG_DLB_IDX, x)
+#define ANA_L2_DLB_CFG_DLB_IDX_GET(x)\
+ FIELD_GET(ANA_L2_DLB_CFG_DLB_IDX, x)
+
+/* ANA_L2:ISDX:TSN_CFG */
+#define ANA_L2_TSN_CFG(g) \
+ __REG(TARGET_ANA_L2, 0, 1, 0, g, 4096, 128, 100, 0, 1, 4)
+
+#define ANA_L2_TSN_CFG_TSN_SFID GENMASK(9, 0)
+#define ANA_L2_TSN_CFG_TSN_SFID_SET(x)\
+ FIELD_PREP(ANA_L2_TSN_CFG_TSN_SFID, x)
+#define ANA_L2_TSN_CFG_TSN_SFID_GET(x)\
+ FIELD_GET(ANA_L2_TSN_CFG_TSN_SFID, x)
+
/* ANA_L3:COMMON:VLAN_CTRL */
#define ANA_L3_VLAN_CTRL __REG(TARGET_ANA_L3, 0, 1, 493632, 0, 1, 184, 4, 0, 1, 4)
@@ -3736,11 +4307,11 @@ enum sparx5_target {
/* HSCH:HSCH_MISC:SYS_CLK_PER */
#define HSCH_SYS_CLK_PER __REG(TARGET_HSCH, 0, 1, 163104, 0, 1, 648, 640, 0, 1, 4)
-#define HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS GENMASK(7, 0)
-#define HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS_SET(x)\
- FIELD_PREP(HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS, x)
-#define HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS_GET(x)\
- FIELD_GET(HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS, x)
+#define HSCH_SYS_CLK_PER_100PS GENMASK(7, 0)
+#define HSCH_SYS_CLK_PER_100PS_SET(x)\
+ FIELD_PREP(HSCH_SYS_CLK_PER_100PS, x)
+#define HSCH_SYS_CLK_PER_100PS_GET(x)\
+ FIELD_GET(HSCH_SYS_CLK_PER_100PS, x)
/* HSCH:HSCH_LEAK_LISTS:HSCH_TIMER_CFG */
#define HSCH_HSCH_TIMER_CFG(g, r) __REG(TARGET_HSCH, 0, 1, 161664, g, 4, 32, 0, r, 4, 4)
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_police.c b/drivers/net/ethernet/microchip/sparx5/sparx5_police.c
new file mode 100644
index 000000000000..8ada5cee1342
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_police.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2023 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+static int sparx5_policer_service_conf_set(struct sparx5 *sparx5,
+ struct sparx5_policer *pol)
+{
+ u32 idx, pup_tokens, max_pup_tokens, burst, thres;
+ struct sparx5_sdlb_group *g;
+ u64 rate;
+
+ g = &sdlb_groups[pol->group];
+ idx = pol->idx;
+
+ rate = pol->rate * 1000;
+ burst = pol->burst;
+
+ pup_tokens = sparx5_sdlb_pup_token_get(sparx5, g->pup_interval, rate);
+ max_pup_tokens =
+ sparx5_sdlb_pup_token_get(sparx5, g->pup_interval, g->max_rate);
+
+ thres = DIV_ROUND_UP(burst, g->min_burst);
+
+ spx5_wr(ANA_AC_SDLB_PUP_TOKENS_PUP_TOKENS_SET(pup_tokens), sparx5,
+ ANA_AC_SDLB_PUP_TOKENS(idx, 0));
+
+ spx5_rmw(ANA_AC_SDLB_INH_CTRL_PUP_TOKENS_MAX_SET(max_pup_tokens),
+ ANA_AC_SDLB_INH_CTRL_PUP_TOKENS_MAX, sparx5,
+ ANA_AC_SDLB_INH_CTRL(idx, 0));
+
+ spx5_rmw(ANA_AC_SDLB_THRES_THRES_SET(thres), ANA_AC_SDLB_THRES_THRES,
+ sparx5, ANA_AC_SDLB_THRES(idx, 0));
+
+ return 0;
+}
+
+int sparx5_policer_conf_set(struct sparx5 *sparx5, struct sparx5_policer *pol)
+{
+ /* More policer types will be added later */
+ switch (pol->type) {
+ case SPX5_POL_SERVICE:
+ return sparx5_policer_service_conf_set(sparx5, pol);
+ default:
+ break;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_pool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_pool.c
new file mode 100644
index 000000000000..b4b280c6138b
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_pool.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2023 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+static u32 sparx5_pool_id_to_idx(u32 id)
+{
+ return --id;
+}
+
+u32 sparx5_pool_idx_to_id(u32 idx)
+{
+ return ++idx;
+}
+
+/* Release resource from pool.
+ * Return reference count on success, otherwise return error.
+ */
+int sparx5_pool_put(struct sparx5_pool_entry *pool, int size, u32 id)
+{
+ struct sparx5_pool_entry *e_itr;
+
+ e_itr = (pool + sparx5_pool_id_to_idx(id));
+ if (e_itr->ref_cnt == 0)
+ return -EINVAL;
+
+ return --e_itr->ref_cnt;
+}
+
+/* Get resource from pool.
+ * Return reference count on success, otherwise return error.
+ */
+int sparx5_pool_get(struct sparx5_pool_entry *pool, int size, u32 *id)
+{
+ struct sparx5_pool_entry *e_itr;
+ int i;
+
+ for (i = 0, e_itr = pool; i < size; i++, e_itr++) {
+ if (e_itr->ref_cnt == 0) {
+ *id = sparx5_pool_idx_to_id(i);
+ return ++e_itr->ref_cnt;
+ }
+ }
+
+ return -ENOSPC;
+}
+
+/* Get resource from pool that matches index.
+ * Return reference count on success, otherwise return error.
+ */
+int sparx5_pool_get_with_idx(struct sparx5_pool_entry *pool, int size, u32 idx,
+ u32 *id)
+{
+ struct sparx5_pool_entry *e_itr;
+ int i, ret = -ENOSPC;
+
+ for (i = 0, e_itr = pool; i < size; i++, e_itr++) {
+ /* Pool index of first free entry */
+ if (e_itr->ref_cnt == 0 && ret == -ENOSPC)
+ ret = i;
+ /* Tc index already in use ? */
+ if (e_itr->idx == idx && e_itr->ref_cnt > 0) {
+ ret = i;
+ break;
+ }
+ }
+
+ /* Did we find a free entry? */
+ if (ret >= 0) {
+ *id = sparx5_pool_idx_to_id(ret);
+ e_itr = (pool + ret);
+ e_itr->idx = idx;
+ return ++e_itr->ref_cnt;
+ }
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_psfp.c b/drivers/net/ethernet/microchip/sparx5/sparx5_psfp.c
new file mode 100644
index 000000000000..8dee1ab1fa75
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_psfp.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2023 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+#define SPX5_PSFP_SF_CNT 1024
+#define SPX5_PSFP_SG_CONFIG_CHANGE_SLEEP 1000
+#define SPX5_PSFP_SG_CONFIG_CHANGE_TIMEO 100000
+
+/* Pool of available service policers */
+static struct sparx5_pool_entry sparx5_psfp_fm_pool[SPX5_SDLB_CNT];
+
+/* Pool of available stream gates */
+static struct sparx5_pool_entry sparx5_psfp_sg_pool[SPX5_PSFP_SG_CNT];
+
+/* Pool of available stream filters */
+static struct sparx5_pool_entry sparx5_psfp_sf_pool[SPX5_PSFP_SF_CNT];
+
+static int sparx5_psfp_sf_get(u32 *id)
+{
+ return sparx5_pool_get(sparx5_psfp_sf_pool, SPX5_PSFP_SF_CNT, id);
+}
+
+static int sparx5_psfp_sf_put(u32 id)
+{
+ return sparx5_pool_put(sparx5_psfp_sf_pool, SPX5_PSFP_SF_CNT, id);
+}
+
+static int sparx5_psfp_sg_get(u32 idx, u32 *id)
+{
+ return sparx5_pool_get_with_idx(sparx5_psfp_sg_pool, SPX5_PSFP_SG_CNT,
+ idx, id);
+}
+
+static int sparx5_psfp_sg_put(u32 id)
+{
+ return sparx5_pool_put(sparx5_psfp_sg_pool, SPX5_PSFP_SG_CNT, id);
+}
+
+static int sparx5_psfp_fm_get(u32 idx, u32 *id)
+{
+ return sparx5_pool_get_with_idx(sparx5_psfp_fm_pool, SPX5_SDLB_CNT, idx,
+ id);
+}
+
+static int sparx5_psfp_fm_put(u32 id)
+{
+ return sparx5_pool_put(sparx5_psfp_fm_pool, SPX5_SDLB_CNT, id);
+}
+
+u32 sparx5_psfp_isdx_get_sf(struct sparx5 *sparx5, u32 isdx)
+{
+ return ANA_L2_TSN_CFG_TSN_SFID_GET(spx5_rd(sparx5,
+ ANA_L2_TSN_CFG(isdx)));
+}
+
+u32 sparx5_psfp_isdx_get_fm(struct sparx5 *sparx5, u32 isdx)
+{
+ return ANA_L2_DLB_CFG_DLB_IDX_GET(spx5_rd(sparx5,
+ ANA_L2_DLB_CFG(isdx)));
+}
+
+u32 sparx5_psfp_sf_get_sg(struct sparx5 *sparx5, u32 sfid)
+{
+ return ANA_AC_TSN_SF_CFG_TSN_SGID_GET(spx5_rd(sparx5,
+ ANA_AC_TSN_SF_CFG(sfid)));
+}
+
+void sparx5_isdx_conf_set(struct sparx5 *sparx5, u32 isdx, u32 sfid, u32 fmid)
+{
+ spx5_rmw(ANA_L2_TSN_CFG_TSN_SFID_SET(sfid), ANA_L2_TSN_CFG_TSN_SFID,
+ sparx5, ANA_L2_TSN_CFG(isdx));
+
+ spx5_rmw(ANA_L2_DLB_CFG_DLB_IDX_SET(fmid), ANA_L2_DLB_CFG_DLB_IDX,
+ sparx5, ANA_L2_DLB_CFG(isdx));
+}
+
+/* Internal priority value to internal priority selector */
+static u32 sparx5_psfp_ipv_to_ips(s32 ipv)
+{
+ return ipv > 0 ? (ipv | BIT(3)) : 0;
+}
+
+static int sparx5_psfp_sgid_get_status(struct sparx5 *sparx5)
+{
+ return spx5_rd(sparx5, ANA_AC_SG_ACCESS_CTRL);
+}
+
+static int sparx5_psfp_sgid_wait_for_completion(struct sparx5 *sparx5)
+{
+ u32 val;
+
+ return readx_poll_timeout(sparx5_psfp_sgid_get_status, sparx5, val,
+ !ANA_AC_SG_ACCESS_CTRL_CONFIG_CHANGE_GET(val),
+ SPX5_PSFP_SG_CONFIG_CHANGE_SLEEP,
+ SPX5_PSFP_SG_CONFIG_CHANGE_TIMEO);
+}
+
+static void sparx5_psfp_sg_config_change(struct sparx5 *sparx5, u32 id)
+{
+ spx5_wr(ANA_AC_SG_ACCESS_CTRL_SGID_SET(id), sparx5,
+ ANA_AC_SG_ACCESS_CTRL);
+
+ spx5_wr(ANA_AC_SG_ACCESS_CTRL_CONFIG_CHANGE_SET(1) |
+ ANA_AC_SG_ACCESS_CTRL_SGID_SET(id),
+ sparx5, ANA_AC_SG_ACCESS_CTRL);
+
+ if (sparx5_psfp_sgid_wait_for_completion(sparx5) < 0)
+ pr_debug("%s:%d timed out waiting for sgid completion",
+ __func__, __LINE__);
+}
+
+static void sparx5_psfp_sf_set(struct sparx5 *sparx5, u32 id,
+ const struct sparx5_psfp_sf *sf)
+{
+ /* Configure stream gate*/
+ spx5_rmw(ANA_AC_TSN_SF_CFG_TSN_SGID_SET(sf->sgid) |
+ ANA_AC_TSN_SF_CFG_TSN_MAX_SDU_SET(sf->max_sdu) |
+ ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_STATE_SET(sf->sblock_osize) |
+ ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_ENA_SET(sf->sblock_osize_ena),
+ ANA_AC_TSN_SF_CFG_TSN_SGID | ANA_AC_TSN_SF_CFG_TSN_MAX_SDU |
+ ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_STATE |
+ ANA_AC_TSN_SF_CFG_BLOCK_OVERSIZE_ENA,
+ sparx5, ANA_AC_TSN_SF_CFG(id));
+}
+
+static int sparx5_psfp_sg_set(struct sparx5 *sparx5, u32 id,
+ const struct sparx5_psfp_sg *sg)
+{
+ u32 ips, base_lsb, base_msb, accum_time_interval = 0;
+ const struct sparx5_psfp_gce *gce;
+ int i;
+
+ ips = sparx5_psfp_ipv_to_ips(sg->ipv);
+ base_lsb = sg->basetime.tv_sec & 0xffffffff;
+ base_msb = sg->basetime.tv_sec >> 32;
+
+ /* Set stream gate id */
+ spx5_wr(ANA_AC_SG_ACCESS_CTRL_SGID_SET(id), sparx5,
+ ANA_AC_SG_ACCESS_CTRL);
+
+ /* Write AdminPSFP values */
+ spx5_wr(sg->basetime.tv_nsec, sparx5, ANA_AC_SG_CONFIG_REG_1);
+ spx5_wr(base_lsb, sparx5, ANA_AC_SG_CONFIG_REG_2);
+
+ spx5_rmw(ANA_AC_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB_SET(base_msb) |
+ ANA_AC_SG_CONFIG_REG_3_INIT_IPS_SET(ips) |
+ ANA_AC_SG_CONFIG_REG_3_LIST_LENGTH_SET(sg->num_entries) |
+ ANA_AC_SG_CONFIG_REG_3_INIT_GATE_STATE_SET(sg->gate_state) |
+ ANA_AC_SG_CONFIG_REG_3_GATE_ENABLE_SET(1),
+ ANA_AC_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB |
+ ANA_AC_SG_CONFIG_REG_3_INIT_IPS |
+ ANA_AC_SG_CONFIG_REG_3_LIST_LENGTH |
+ ANA_AC_SG_CONFIG_REG_3_INIT_GATE_STATE |
+ ANA_AC_SG_CONFIG_REG_3_GATE_ENABLE,
+ sparx5, ANA_AC_SG_CONFIG_REG_3);
+
+ spx5_wr(sg->cycletime, sparx5, ANA_AC_SG_CONFIG_REG_4);
+ spx5_wr(sg->cycletimeext, sparx5, ANA_AC_SG_CONFIG_REG_5);
+
+ /* For each scheduling entry */
+ for (i = 0; i < sg->num_entries; i++) {
+ gce = &sg->gce[i];
+ ips = sparx5_psfp_ipv_to_ips(gce->ipv);
+ /* hardware needs TimeInterval to be cumulative */
+ accum_time_interval += gce->interval;
+ /* Set gate state */
+ spx5_wr(ANA_AC_SG_GCL_GS_CONFIG_IPS_SET(ips) |
+ ANA_AC_SG_GCL_GS_CONFIG_GATE_STATE_SET(gce->gate_state),
+ sparx5, ANA_AC_SG_GCL_GS_CONFIG(i));
+
+ /* Set time interval */
+ spx5_wr(accum_time_interval, sparx5,
+ ANA_AC_SG_GCL_TI_CONFIG(i));
+
+ /* Set maximum octets */
+ spx5_wr(gce->maxoctets, sparx5, ANA_AC_SG_GCL_OCT_CONFIG(i));
+ }
+
+ return 0;
+}
+
+static int sparx5_sdlb_conf_set(struct sparx5 *sparx5,
+ struct sparx5_psfp_fm *fm)
+{
+ int (*sparx5_sdlb_group_action)(struct sparx5 *sparx5, u32 group,
+ u32 idx);
+
+ if (!fm->pol.rate && !fm->pol.burst)
+ sparx5_sdlb_group_action = &sparx5_sdlb_group_del;
+ else
+ sparx5_sdlb_group_action = &sparx5_sdlb_group_add;
+
+ sparx5_policer_conf_set(sparx5, &fm->pol);
+
+ return sparx5_sdlb_group_action(sparx5, fm->pol.group, fm->pol.idx);
+}
+
+int sparx5_psfp_sf_add(struct sparx5 *sparx5, const struct sparx5_psfp_sf *sf,
+ u32 *id)
+{
+ int ret;
+
+ ret = sparx5_psfp_sf_get(id);
+ if (ret < 0)
+ return ret;
+
+ sparx5_psfp_sf_set(sparx5, *id, sf);
+
+ return 0;
+}
+
+int sparx5_psfp_sf_del(struct sparx5 *sparx5, u32 id)
+{
+ const struct sparx5_psfp_sf sf = { 0 };
+
+ sparx5_psfp_sf_set(sparx5, id, &sf);
+
+ return sparx5_psfp_sf_put(id);
+}
+
+int sparx5_psfp_sg_add(struct sparx5 *sparx5, u32 uidx,
+ struct sparx5_psfp_sg *sg, u32 *id)
+{
+ ktime_t basetime;
+ int ret;
+
+ ret = sparx5_psfp_sg_get(uidx, id);
+ if (ret < 0)
+ return ret;
+ /* Was already in use, no need to reconfigure */
+ if (ret > 1)
+ return 0;
+
+ /* Calculate basetime for this stream gate */
+ sparx5_new_base_time(sparx5, sg->cycletime, 0, &basetime);
+ sg->basetime = ktime_to_timespec64(basetime);
+
+ sparx5_psfp_sg_set(sparx5, *id, sg);
+
+ /* Signal hardware to copy AdminPSFP values into OperPSFP values */
+ sparx5_psfp_sg_config_change(sparx5, *id);
+
+ return 0;
+}
+
+int sparx5_psfp_sg_del(struct sparx5 *sparx5, u32 id)
+{
+ const struct sparx5_psfp_sg sg = { 0 };
+ int ret;
+
+ ret = sparx5_psfp_sg_put(id);
+ if (ret < 0)
+ return ret;
+ /* Stream gate still in use ? */
+ if (ret > 0)
+ return 0;
+
+ return sparx5_psfp_sg_set(sparx5, id, &sg);
+}
+
+int sparx5_psfp_fm_add(struct sparx5 *sparx5, u32 uidx,
+ struct sparx5_psfp_fm *fm, u32 *id)
+{
+ struct sparx5_policer *pol = &fm->pol;
+ int ret;
+
+ /* Get flow meter */
+ ret = sparx5_psfp_fm_get(uidx, &fm->pol.idx);
+ if (ret < 0)
+ return ret;
+ /* Was already in use, no need to reconfigure */
+ if (ret > 1)
+ return 0;
+
+ ret = sparx5_sdlb_group_get_by_rate(sparx5, pol->rate, pol->burst);
+ if (ret < 0)
+ return ret;
+
+ fm->pol.group = ret;
+
+ ret = sparx5_sdlb_conf_set(sparx5, fm);
+ if (ret < 0)
+ return ret;
+
+ *id = fm->pol.idx;
+
+ return 0;
+}
+
+int sparx5_psfp_fm_del(struct sparx5 *sparx5, u32 id)
+{
+ struct sparx5_psfp_fm fm = { .pol.idx = id,
+ .pol.type = SPX5_POL_SERVICE };
+ int ret;
+
+ /* Find the group that this lb belongs to */
+ ret = sparx5_sdlb_group_get_by_index(sparx5, id, &fm.pol.group);
+ if (ret < 0)
+ return ret;
+
+ ret = sparx5_psfp_fm_put(id);
+ if (ret < 0)
+ return ret;
+ /* Do not reset flow-meter if still in use. */
+ if (ret > 0)
+ return 0;
+
+ return sparx5_sdlb_conf_set(sparx5, &fm);
+}
+
+void sparx5_psfp_init(struct sparx5 *sparx5)
+{
+ const struct sparx5_sdlb_group *group;
+ int i;
+
+ for (i = 0; i < SPX5_SDLB_GROUP_CNT; i++) {
+ group = &sdlb_groups[i];
+ sparx5_sdlb_group_init(sparx5, group->max_rate,
+ group->min_burst, group->frame_size, i);
+ }
+
+ spx5_wr(ANA_AC_SG_CYCLETIME_UPDATE_PERIOD_SG_CT_UPDATE_ENA_SET(1),
+ sparx5, ANA_AC_SG_CYCLETIME_UPDATE_PERIOD);
+
+ spx5_rmw(ANA_L2_FWD_CFG_ISDX_LOOKUP_ENA_SET(1),
+ ANA_L2_FWD_CFG_ISDX_LOOKUP_ENA, sparx5, ANA_L2_FWD_CFG);
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c
index 0ed1ea7727c5..af85d66248b2 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c
@@ -476,8 +476,7 @@ static int sparx5_ptp_settime64(struct ptp_clock_info *ptp,
return 0;
}
-static int sparx5_ptp_gettime64(struct ptp_clock_info *ptp,
- struct timespec64 *ts)
+int sparx5_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts)
{
struct sparx5_phc *phc = container_of(ptp, struct sparx5_phc, info);
struct sparx5 *sparx5 = phc->sparx5;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_qos.c b/drivers/net/ethernet/microchip/sparx5/sparx5_qos.c
index 379e540e5e6a..5f34febaee6b 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_qos.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_qos.c
@@ -9,6 +9,63 @@
#include "sparx5_main.h"
#include "sparx5_qos.h"
+/* Calculate new base_time based on cycle_time.
+ *
+ * The hardware requires a base_time that is always in the future.
+ * We define threshold_time as current_time + (2 * cycle_time).
+ * If base_time is below threshold_time this function recalculates it to be in
+ * the interval:
+ * threshold_time <= base_time < (threshold_time + cycle_time)
+ *
+ * A very simple algorithm could be like this:
+ * new_base_time = org_base_time + N * cycle_time
+ * using the lowest N so (new_base_time >= threshold_time
+ */
+void sparx5_new_base_time(struct sparx5 *sparx5, const u32 cycle_time,
+ const ktime_t org_base_time, ktime_t *new_base_time)
+{
+ ktime_t current_time, threshold_time, new_time;
+ struct timespec64 ts;
+ u64 nr_of_cycles_p2;
+ u64 nr_of_cycles;
+ u64 diff_time;
+
+ new_time = org_base_time;
+
+ sparx5_ptp_gettime64(&sparx5->phc[SPARX5_PHC_PORT].info, &ts);
+ current_time = timespec64_to_ktime(ts);
+ threshold_time = current_time + (2 * cycle_time);
+ diff_time = threshold_time - new_time;
+ nr_of_cycles = div_u64(diff_time, cycle_time);
+ nr_of_cycles_p2 = 1; /* Use 2^0 as start value */
+
+ if (new_time >= threshold_time) {
+ *new_base_time = new_time;
+ return;
+ }
+
+ /* Calculate the smallest power of 2 (nr_of_cycles_p2)
+ * that is larger than nr_of_cycles.
+ */
+ while (nr_of_cycles_p2 < nr_of_cycles)
+ nr_of_cycles_p2 <<= 1; /* Next (higher) power of 2 */
+
+ /* Add as big chunks (power of 2 * cycle_time)
+ * as possible for each power of 2
+ */
+ while (nr_of_cycles_p2) {
+ if (new_time < threshold_time) {
+ new_time += cycle_time * nr_of_cycles_p2;
+ while (new_time < threshold_time)
+ new_time += cycle_time * nr_of_cycles_p2;
+ new_time -= cycle_time * nr_of_cycles_p2;
+ }
+ nr_of_cycles_p2 >>= 1; /* Next (lower) power of 2 */
+ }
+ new_time += cycle_time;
+ *new_base_time = new_time;
+}
+
/* Max rates for leak groups */
static const u32 spx5_hsch_max_group_rate[SPX5_HSCH_LEAK_GRP_CNT] = {
1048568, /* 1.049 Gbps */
@@ -393,6 +450,8 @@ int sparx5_qos_init(struct sparx5 *sparx5)
if (ret < 0)
return ret;
+ sparx5_psfp_init(sparx5);
+
return 0;
}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_sdlb.c b/drivers/net/ethernet/microchip/sparx5/sparx5_sdlb.c
new file mode 100644
index 000000000000..f5267218caeb
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_sdlb.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2023 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+struct sparx5_sdlb_group sdlb_groups[SPX5_SDLB_GROUP_CNT] = {
+ { SPX5_SDLB_GROUP_RATE_MAX, 8192 / 1, 64 }, /* 25 G */
+ { 15000000000ULL, 8192 / 1, 64 }, /* 15 G */
+ { 10000000000ULL, 8192 / 1, 64 }, /* 10 G */
+ { 5000000000ULL, 8192 / 1, 64 }, /* 5 G */
+ { 2500000000ULL, 8192 / 1, 64 }, /* 2.5 G */
+ { 1000000000ULL, 8192 / 2, 64 }, /* 1 G */
+ { 500000000ULL, 8192 / 2, 64 }, /* 500 M */
+ { 100000000ULL, 8192 / 4, 64 }, /* 100 M */
+ { 50000000ULL, 8192 / 4, 64 }, /* 50 M */
+ { 5000000ULL, 8192 / 8, 64 } /* 5 M */
+};
+
+int sparx5_sdlb_clk_hz_get(struct sparx5 *sparx5)
+{
+ u32 clk_per_100ps;
+ u64 clk_hz;
+
+ clk_per_100ps = HSCH_SYS_CLK_PER_100PS_GET(spx5_rd(sparx5,
+ HSCH_SYS_CLK_PER));
+ if (!clk_per_100ps)
+ clk_per_100ps = SPX5_CLK_PER_100PS_DEFAULT;
+
+ clk_hz = (10 * 1000 * 1000) / clk_per_100ps;
+ return clk_hz *= 1000;
+}
+
+static int sparx5_sdlb_pup_interval_get(struct sparx5 *sparx5, u32 max_token,
+ u64 max_rate)
+{
+ u64 clk_hz;
+
+ clk_hz = sparx5_sdlb_clk_hz_get(sparx5);
+
+ return div64_u64((8 * clk_hz * max_token), max_rate);
+}
+
+int sparx5_sdlb_pup_token_get(struct sparx5 *sparx5, u32 pup_interval, u64 rate)
+{
+ u64 clk_hz;
+
+ if (!rate)
+ return SPX5_SDLB_PUP_TOKEN_DISABLE;
+
+ clk_hz = sparx5_sdlb_clk_hz_get(sparx5);
+
+ return DIV64_U64_ROUND_UP((rate * pup_interval), (clk_hz * 8));
+}
+
+static void sparx5_sdlb_group_disable(struct sparx5 *sparx5, u32 group)
+{
+ spx5_rmw(ANA_AC_SDLB_PUP_CTRL_PUP_ENA_SET(0),
+ ANA_AC_SDLB_PUP_CTRL_PUP_ENA, sparx5,
+ ANA_AC_SDLB_PUP_CTRL(group));
+}
+
+static void sparx5_sdlb_group_enable(struct sparx5 *sparx5, u32 group)
+{
+ spx5_rmw(ANA_AC_SDLB_PUP_CTRL_PUP_ENA_SET(1),
+ ANA_AC_SDLB_PUP_CTRL_PUP_ENA, sparx5,
+ ANA_AC_SDLB_PUP_CTRL(group));
+}
+
+static u32 sparx5_sdlb_group_get_first(struct sparx5 *sparx5, u32 group)
+{
+ u32 val;
+
+ val = spx5_rd(sparx5, ANA_AC_SDLB_XLB_START(group));
+
+ return ANA_AC_SDLB_XLB_START_LBSET_START_GET(val);
+}
+
+static u32 sparx5_sdlb_group_get_next(struct sparx5 *sparx5, u32 group,
+ u32 lb)
+{
+ u32 val;
+
+ val = spx5_rd(sparx5, ANA_AC_SDLB_XLB_NEXT(lb));
+
+ return ANA_AC_SDLB_XLB_NEXT_LBSET_NEXT_GET(val);
+}
+
+static bool sparx5_sdlb_group_is_first(struct sparx5 *sparx5, u32 group,
+ u32 lb)
+{
+ return lb == sparx5_sdlb_group_get_first(sparx5, group);
+}
+
+static bool sparx5_sdlb_group_is_last(struct sparx5 *sparx5, u32 group,
+ u32 lb)
+{
+ return lb == sparx5_sdlb_group_get_next(sparx5, group, lb);
+}
+
+static bool sparx5_sdlb_group_is_empty(struct sparx5 *sparx5, u32 group)
+{
+ u32 val;
+
+ val = spx5_rd(sparx5, ANA_AC_SDLB_PUP_CTRL(group));
+
+ return ANA_AC_SDLB_PUP_CTRL_PUP_ENA_GET(val) == 0;
+}
+
+static u32 sparx5_sdlb_group_get_last(struct sparx5 *sparx5, u32 group)
+{
+ u32 itr, next;
+
+ itr = sparx5_sdlb_group_get_first(sparx5, group);
+
+ for (;;) {
+ next = sparx5_sdlb_group_get_next(sparx5, group, itr);
+ if (itr == next)
+ return itr;
+
+ itr = next;
+ }
+}
+
+static bool sparx5_sdlb_group_is_singular(struct sparx5 *sparx5, u32 group)
+{
+ if (sparx5_sdlb_group_is_empty(sparx5, group))
+ return false;
+
+ return sparx5_sdlb_group_get_first(sparx5, group) ==
+ sparx5_sdlb_group_get_last(sparx5, group);
+}
+
+static int sparx5_sdlb_group_get_adjacent(struct sparx5 *sparx5, u32 group,
+ u32 idx, u32 *prev, u32 *next,
+ u32 *first)
+{
+ u32 itr;
+
+ *first = sparx5_sdlb_group_get_first(sparx5, group);
+ *prev = *first;
+ *next = *first;
+ itr = *first;
+
+ for (;;) {
+ *next = sparx5_sdlb_group_get_next(sparx5, group, itr);
+
+ if (itr == idx)
+ return 0; /* Found it */
+
+ if (itr == *next)
+ return -EINVAL; /* Was not found */
+
+ *prev = itr;
+ itr = *next;
+ }
+}
+
+static int sparx5_sdlb_group_get_count(struct sparx5 *sparx5, u32 group)
+{
+ u32 itr, next;
+ int count = 0;
+
+ itr = sparx5_sdlb_group_get_first(sparx5, group);
+
+ for (;;) {
+ next = sparx5_sdlb_group_get_next(sparx5, group, itr);
+ if (itr == next)
+ return count;
+
+ itr = next;
+ count++;
+ }
+}
+
+int sparx5_sdlb_group_get_by_rate(struct sparx5 *sparx5, u32 rate, u32 burst)
+{
+ const struct sparx5_sdlb_group *group;
+ u64 rate_bps;
+ int i, count;
+
+ rate_bps = rate * 1000;
+
+ for (i = SPX5_SDLB_GROUP_CNT - 1; i >= 0; i--) {
+ group = &sdlb_groups[i];
+
+ count = sparx5_sdlb_group_get_count(sparx5, i);
+
+ /* Check that this group is not full.
+ * According to LB group configuration rules: the number of XLBs
+ * in a group must not exceed PUP_INTERVAL/4 - 1.
+ */
+ if (count > ((group->pup_interval / 4) - 1))
+ continue;
+
+ if (rate_bps < group->max_rate)
+ return i;
+ }
+
+ return -ENOSPC;
+}
+
+int sparx5_sdlb_group_get_by_index(struct sparx5 *sparx5, u32 idx, u32 *group)
+{
+ u32 itr, next;
+ int i;
+
+ for (i = 0; i < SPX5_SDLB_GROUP_CNT; i++) {
+ if (sparx5_sdlb_group_is_empty(sparx5, i))
+ continue;
+
+ itr = sparx5_sdlb_group_get_first(sparx5, i);
+
+ for (;;) {
+ next = sparx5_sdlb_group_get_next(sparx5, i, itr);
+
+ if (itr == idx) {
+ *group = i;
+ return 0; /* Found it */
+ }
+ if (itr == next)
+ break; /* Was not found */
+
+ itr = next;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int sparx5_sdlb_group_link(struct sparx5 *sparx5, u32 group, u32 idx,
+ u32 first, u32 next, bool empty)
+{
+ /* Stop leaking */
+ sparx5_sdlb_group_disable(sparx5, group);
+
+ if (empty)
+ return 0;
+
+ /* Link insertion lb to next lb */
+ spx5_wr(ANA_AC_SDLB_XLB_NEXT_LBSET_NEXT_SET(next) |
+ ANA_AC_SDLB_XLB_NEXT_LBGRP_SET(group),
+ sparx5, ANA_AC_SDLB_XLB_NEXT(idx));
+
+ /* Set the first lb */
+ spx5_wr(ANA_AC_SDLB_XLB_START_LBSET_START_SET(first), sparx5,
+ ANA_AC_SDLB_XLB_START(group));
+
+ /* Start leaking */
+ sparx5_sdlb_group_enable(sparx5, group);
+
+ return 0;
+};
+
+int sparx5_sdlb_group_add(struct sparx5 *sparx5, u32 group, u32 idx)
+{
+ u32 first, next;
+
+ /* We always add to head of the list */
+ first = idx;
+
+ if (sparx5_sdlb_group_is_empty(sparx5, group))
+ next = idx;
+ else
+ next = sparx5_sdlb_group_get_first(sparx5, group);
+
+ return sparx5_sdlb_group_link(sparx5, group, idx, first, next, false);
+}
+
+int sparx5_sdlb_group_del(struct sparx5 *sparx5, u32 group, u32 idx)
+{
+ u32 first, next, prev;
+ bool empty = false;
+
+ if (sparx5_sdlb_group_get_adjacent(sparx5, group, idx, &prev, &next,
+ &first) < 0) {
+ pr_err("%s:%d Could not find idx: %d in group: %d", __func__,
+ __LINE__, idx, group);
+ return -EINVAL;
+ }
+
+ if (sparx5_sdlb_group_is_singular(sparx5, group)) {
+ empty = true;
+ } else if (sparx5_sdlb_group_is_last(sparx5, group, idx)) {
+ /* idx is removed, prev is now last */
+ idx = prev;
+ next = prev;
+ } else if (sparx5_sdlb_group_is_first(sparx5, group, idx)) {
+ /* idx is removed and points to itself, first is next */
+ first = next;
+ next = idx;
+ } else {
+ /* Next is not touched */
+ idx = prev;
+ }
+
+ return sparx5_sdlb_group_link(sparx5, group, idx, first, next, empty);
+}
+
+void sparx5_sdlb_group_init(struct sparx5 *sparx5, u64 max_rate, u32 min_burst,
+ u32 frame_size, u32 idx)
+{
+ u32 thres_shift, mask = 0x01, power = 0;
+ struct sparx5_sdlb_group *group;
+ u64 max_token;
+
+ group = &sdlb_groups[idx];
+
+ /* Number of positions to right-shift LB's threshold value. */
+ while ((min_burst & mask) == 0) {
+ power++;
+ mask <<= 1;
+ }
+ thres_shift = SPX5_SDLB_2CYCLES_TYPE2_THRES_OFFSET - power;
+
+ max_token = (min_burst > SPX5_SDLB_PUP_TOKEN_MAX) ?
+ SPX5_SDLB_PUP_TOKEN_MAX :
+ min_burst;
+ group->pup_interval =
+ sparx5_sdlb_pup_interval_get(sparx5, max_token, max_rate);
+
+ group->frame_size = frame_size;
+
+ spx5_wr(ANA_AC_SDLB_PUP_INTERVAL_PUP_INTERVAL_SET(group->pup_interval),
+ sparx5, ANA_AC_SDLB_PUP_INTERVAL(idx));
+
+ spx5_wr(ANA_AC_SDLB_FRM_RATE_TOKENS_FRM_RATE_TOKENS_SET(frame_size),
+ sparx5, ANA_AC_SDLB_FRM_RATE_TOKENS(idx));
+
+ spx5_wr(ANA_AC_SDLB_LBGRP_MISC_THRES_SHIFT_SET(thres_shift), sparx5,
+ ANA_AC_SDLB_LBGRP_MISC(idx));
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
index 217ff127e3c7..f962304272c2 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
@@ -4,6 +4,7 @@
* Copyright (c) 2022 Microchip Technology Inc. and its subsidiaries.
*/
+#include <net/tc_act/tc_gate.h>
#include <net/tcp.h>
#include "sparx5_tc.h"
@@ -989,19 +990,156 @@ out:
return err;
}
+static int sparx5_tc_flower_parse_act_gate(struct sparx5_psfp_sg *sg,
+ struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ if (act->gate.prio < -1 || act->gate.prio > SPX5_PSFP_SG_MAX_IPV) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid gate priority");
+ return -EINVAL;
+ }
+
+ if (act->gate.cycletime < SPX5_PSFP_SG_MIN_CYCLE_TIME_NS ||
+ act->gate.cycletime > SPX5_PSFP_SG_MAX_CYCLE_TIME_NS) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid gate cycletime");
+ return -EINVAL;
+ }
+
+ if (act->gate.cycletimeext > SPX5_PSFP_SG_MAX_CYCLE_TIME_NS) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid gate cycletimeext");
+ return -EINVAL;
+ }
+
+ if (act->gate.num_entries >= SPX5_PSFP_GCE_CNT) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid number of gate entries");
+ return -EINVAL;
+ }
+
+ sg->gate_state = true;
+ sg->ipv = act->gate.prio;
+ sg->num_entries = act->gate.num_entries;
+ sg->cycletime = act->gate.cycletime;
+ sg->cycletimeext = act->gate.cycletimeext;
+
+ for (i = 0; i < sg->num_entries; i++) {
+ sg->gce[i].gate_state = !!act->gate.entries[i].gate_state;
+ sg->gce[i].interval = act->gate.entries[i].interval;
+ sg->gce[i].ipv = act->gate.entries[i].ipv;
+ sg->gce[i].maxoctets = act->gate.entries[i].maxoctets;
+ }
+
+ return 0;
+}
+
+static int sparx5_tc_flower_parse_act_police(struct sparx5_policer *pol,
+ struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ pol->type = SPX5_POL_SERVICE;
+ pol->rate = div_u64(act->police.rate_bytes_ps, 1000) * 8;
+ pol->burst = act->police.burst;
+ pol->idx = act->hw_index;
+
+ /* rate is now in kbit */
+ if (pol->rate > DIV_ROUND_UP(SPX5_SDLB_GROUP_RATE_MAX, 1000)) {
+ NL_SET_ERR_MSG_MOD(extack, "Maximum rate exceeded");
+ return -EINVAL;
+ }
+
+ if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when exceed action is not drop");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
+ act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform action is not pipe or ok");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int sparx5_tc_flower_psfp_setup(struct sparx5 *sparx5,
+ struct vcap_rule *vrule, int sg_idx,
+ int pol_idx, struct sparx5_psfp_sg *sg,
+ struct sparx5_psfp_fm *fm,
+ struct sparx5_psfp_sf *sf)
+{
+ u32 psfp_sfid = 0, psfp_fmid = 0, psfp_sgid = 0;
+ int ret;
+
+ /* Must always have a stream gate - max sdu (filter option) is evaluated
+ * after frames have passed the gate, so in case of only a policer, we
+ * allocate a stream gate that is always open.
+ */
+ if (sg_idx < 0) {
+ sg_idx = sparx5_pool_idx_to_id(SPX5_PSFP_SG_OPEN);
+ sg->ipv = 0; /* Disabled */
+ sg->cycletime = SPX5_PSFP_SG_CYCLE_TIME_DEFAULT;
+ sg->num_entries = 1;
+ sg->gate_state = 1; /* Open */
+ sg->gate_enabled = 1;
+ sg->gce[0].gate_state = 1;
+ sg->gce[0].interval = SPX5_PSFP_SG_CYCLE_TIME_DEFAULT;
+ sg->gce[0].ipv = 0;
+ sg->gce[0].maxoctets = 0; /* Disabled */
+ }
+
+ ret = sparx5_psfp_sg_add(sparx5, sg_idx, sg, &psfp_sgid);
+ if (ret < 0)
+ return ret;
+
+ if (pol_idx >= 0) {
+ /* Add new flow-meter */
+ ret = sparx5_psfp_fm_add(sparx5, pol_idx, fm, &psfp_fmid);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* Map stream filter to stream gate */
+ sf->sgid = psfp_sgid;
+
+ /* Add new stream-filter and map it to a steam gate */
+ ret = sparx5_psfp_sf_add(sparx5, sf, &psfp_sfid);
+ if (ret < 0)
+ return ret;
+
+ /* Streams are classified by ISDX - map ISDX 1:1 to sfid for now. */
+ sparx5_isdx_conf_set(sparx5, psfp_sfid, psfp_sfid, psfp_fmid);
+
+ ret = vcap_rule_add_action_bit(vrule, VCAP_AF_ISDX_ADD_REPLACE_SEL,
+ VCAP_BIT_1);
+ if (ret)
+ return ret;
+
+ ret = vcap_rule_add_action_u32(vrule, VCAP_AF_ISDX_VAL, psfp_sfid);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
static int sparx5_tc_flower_replace(struct net_device *ndev,
struct flow_cls_offload *fco,
struct vcap_admin *admin,
bool ingress)
{
+ struct sparx5_psfp_sf sf = { .max_sdu = SPX5_PSFP_SF_MAX_SDU };
+ struct netlink_ext_ack *extack = fco->common.extack;
+ int err, idx, tc_sg_idx = -1, tc_pol_idx = -1;
struct sparx5_port *port = netdev_priv(ndev);
struct sparx5_multiple_rules multi = {};
+ struct sparx5 *sparx5 = port->sparx5;
+ struct sparx5_psfp_sg sg = { 0 };
+ struct sparx5_psfp_fm fm = { 0 };
struct flow_action_entry *act;
struct vcap_control *vctrl;
struct flow_rule *frule;
struct vcap_rule *vrule;
u16 l3_proto;
- int err, idx;
vctrl = port->sparx5->vcap_ctrl;
@@ -1033,6 +1171,26 @@ static int sparx5_tc_flower_replace(struct net_device *ndev,
frule = flow_cls_offload_flow_rule(fco);
flow_action_for_each(idx, act, &frule->action) {
switch (act->id) {
+ case FLOW_ACTION_GATE: {
+ err = sparx5_tc_flower_parse_act_gate(&sg, act, extack);
+ if (err < 0)
+ goto out;
+
+ tc_sg_idx = act->hw_index;
+
+ break;
+ }
+ case FLOW_ACTION_POLICE: {
+ err = sparx5_tc_flower_parse_act_police(&fm.pol, act,
+ extack);
+ if (err < 0)
+ goto out;
+
+ tc_pol_idx = fm.pol.idx;
+ sf.max_sdu = act->police.mtu;
+
+ break;
+ }
case FLOW_ACTION_TRAP:
if (admin->vtype != VCAP_TYPE_IS2 &&
admin->vtype != VCAP_TYPE_ES2) {
@@ -1079,6 +1237,14 @@ static int sparx5_tc_flower_replace(struct net_device *ndev,
}
}
+ /* Setup PSFP */
+ if (tc_sg_idx >= 0 || tc_pol_idx >= 0) {
+ err = sparx5_tc_flower_psfp_setup(sparx5, vrule, tc_sg_idx,
+ tc_pol_idx, &sg, &fm, &sf);
+ if (err)
+ goto out;
+ }
+
err = sparx5_tc_select_protocol_keyset(ndev, vrule, admin, l3_proto,
&multi);
if (err) {
@@ -1107,19 +1273,86 @@ out:
return err;
}
+static void sparx5_tc_free_psfp_resources(struct sparx5 *sparx5,
+ struct vcap_rule *vrule)
+{
+ struct vcap_client_actionfield *afield;
+ u32 isdx, sfid, sgid, fmid;
+
+ /* Check if VCAP_AF_ISDX_VAL action is set for this rule - and if
+ * it is used for stream and/or flow-meter classification.
+ */
+ afield = vcap_find_actionfield(vrule, VCAP_AF_ISDX_VAL);
+ if (!afield)
+ return;
+
+ isdx = afield->data.u32.value;
+ sfid = sparx5_psfp_isdx_get_sf(sparx5, isdx);
+
+ if (!sfid)
+ return;
+
+ fmid = sparx5_psfp_isdx_get_fm(sparx5, isdx);
+ sgid = sparx5_psfp_sf_get_sg(sparx5, sfid);
+
+ if (fmid && sparx5_psfp_fm_del(sparx5, fmid) < 0)
+ pr_err("%s:%d Could not delete invalid fmid: %d", __func__,
+ __LINE__, fmid);
+
+ if (sgid && sparx5_psfp_sg_del(sparx5, sgid) < 0)
+ pr_err("%s:%d Could not delete invalid sgid: %d", __func__,
+ __LINE__, sgid);
+
+ if (sparx5_psfp_sf_del(sparx5, sfid) < 0)
+ pr_err("%s:%d Could not delete invalid sfid: %d", __func__,
+ __LINE__, sfid);
+
+ sparx5_isdx_conf_set(sparx5, isdx, 0, 0);
+}
+
+static int sparx5_tc_free_rule_resources(struct net_device *ndev,
+ struct vcap_control *vctrl,
+ int rule_id)
+{
+ struct sparx5_port *port = netdev_priv(ndev);
+ struct sparx5 *sparx5 = port->sparx5;
+ struct vcap_rule *vrule;
+ int ret = 0;
+
+ vrule = vcap_get_rule(vctrl, rule_id);
+ if (!vrule || IS_ERR(vrule))
+ return -EINVAL;
+
+ sparx5_tc_free_psfp_resources(sparx5, vrule);
+
+ vcap_free_rule(vrule);
+ return ret;
+}
+
static int sparx5_tc_flower_destroy(struct net_device *ndev,
struct flow_cls_offload *fco,
struct vcap_admin *admin)
{
struct sparx5_port *port = netdev_priv(ndev);
+ int err = -ENOENT, count = 0, rule_id;
struct vcap_control *vctrl;
- int err = -ENOENT, rule_id;
vctrl = port->sparx5->vcap_ctrl;
while (true) {
rule_id = vcap_lookup_rule_by_cookie(vctrl, fco->cookie);
if (rule_id <= 0)
break;
+ if (count == 0) {
+ /* Resources are attached to the first rule of
+ * a set of rules. Only works if the rules are
+ * in the correct order.
+ */
+ err = sparx5_tc_free_rule_resources(ndev, vctrl,
+ rule_id);
+ if (err)
+ pr_err("%s:%d: could not free resources %d\n",
+ __func__, __LINE__, rule_id);
+ }
err = vcap_del_rule(vctrl, ndev, rule_id);
if (err) {
pr_err("%s:%d: could not delete rule %d\n",
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c
index 660d7cd92fcc..6307d59f23da 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c
@@ -2755,7 +2755,7 @@ int vcap_rule_get_key_u32(struct vcap_rule *rule, enum vcap_key_field key,
EXPORT_SYMBOL_GPL(vcap_rule_get_key_u32);
/* Find a client action field in a rule */
-static struct vcap_client_actionfield *
+struct vcap_client_actionfield *
vcap_find_actionfield(struct vcap_rule *rule, enum vcap_action_field act)
{
struct vcap_rule_internal *ri = (struct vcap_rule_internal *)rule;
@@ -2766,6 +2766,7 @@ vcap_find_actionfield(struct vcap_rule *rule, enum vcap_action_field act)
return caf;
return NULL;
}
+EXPORT_SYMBOL_GPL(vcap_find_actionfield);
/* Check if the actionfield is already in the rule */
static bool vcap_actionfield_unique(struct vcap_rule *rule,
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h
index de29540fd190..417af9754bcc 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h
@@ -268,4 +268,7 @@ int vcap_rule_mod_action_u32(struct vcap_rule *rule,
/* Get a 32 bit key field value and mask from the rule */
int vcap_rule_get_key_u32(struct vcap_rule *rule, enum vcap_key_field key,
u32 *value, u32 *mask);
+
+struct vcap_client_actionfield *
+vcap_find_actionfield(struct vcap_rule *rule, enum vcap_action_field act);
#endif /* __VCAP_API_CLIENT__ */
diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig
index 0922beac3ec0..c9d88673d306 100644
--- a/drivers/net/ethernet/wangxun/Kconfig
+++ b/drivers/net/ethernet/wangxun/Kconfig
@@ -18,6 +18,7 @@ if NET_VENDOR_WANGXUN
config LIBWX
tristate
+ select PAGE_POOL
help
Common library for Wangxun(R) Ethernet drivers.
diff --git a/drivers/net/ethernet/wangxun/libwx/Makefile b/drivers/net/ethernet/wangxun/libwx/Makefile
index 1ed5e23af944..850d1615cd18 100644
--- a/drivers/net/ethernet/wangxun/libwx/Makefile
+++ b/drivers/net/ethernet/wangxun/libwx/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_LIBWX) += libwx.o
-libwx-objs := wx_hw.o
+libwx-objs := wx_hw.o wx_lib.o
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 3d7ba0c0df38..7db57f934a91 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -8,13 +8,14 @@
#include <linux/pci.h>
#include "wx_type.h"
+#include "wx_lib.h"
#include "wx_hw.h"
static void wx_intr_disable(struct wx *wx, u64 qmask)
{
u32 mask;
- mask = (qmask & 0xFFFFFFFF);
+ mask = (qmask & U32_MAX);
if (mask)
wr32(wx, WX_PX_IMS(0), mask);
@@ -25,6 +26,45 @@ static void wx_intr_disable(struct wx *wx, u64 qmask)
}
}
+void wx_intr_enable(struct wx *wx, u64 qmask)
+{
+ u32 mask;
+
+ mask = (qmask & U32_MAX);
+ if (mask)
+ wr32(wx, WX_PX_IMC(0), mask);
+ if (wx->mac.type == wx_mac_sp) {
+ mask = (qmask >> 32);
+ if (mask)
+ wr32(wx, WX_PX_IMC(1), mask);
+ }
+}
+EXPORT_SYMBOL(wx_intr_enable);
+
+/**
+ * wx_irq_disable - Mask off interrupt generation on the NIC
+ * @wx: board private structure
+ **/
+void wx_irq_disable(struct wx *wx)
+{
+ struct pci_dev *pdev = wx->pdev;
+
+ wr32(wx, WX_PX_MISC_IEN, 0);
+ wx_intr_disable(wx, WX_INTR_ALL);
+
+ if (pdev->msix_enabled) {
+ int vector;
+
+ for (vector = 0; vector < wx->num_q_vectors; vector++)
+ synchronize_irq(wx->msix_entries[vector].vector);
+
+ synchronize_irq(wx->msix_entries[vector].vector);
+ } else {
+ synchronize_irq(pdev->irq);
+ }
+}
+EXPORT_SYMBOL(wx_irq_disable);
+
/* cmd_addr is used for some special command:
* 1. to be sector address, when implemented erase sector command
* 2. to be flash address when implemented read, write flash address
@@ -765,6 +805,37 @@ void wx_flush_sw_mac_table(struct wx *wx)
}
EXPORT_SYMBOL(wx_flush_sw_mac_table);
+static int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool)
+{
+ u32 i;
+
+ if (is_zero_ether_addr(addr))
+ return -EINVAL;
+
+ for (i = 0; i < wx->mac.num_rar_entries; i++) {
+ if (wx->mac_table[i].state & WX_MAC_STATE_IN_USE) {
+ if (ether_addr_equal(addr, wx->mac_table[i].addr)) {
+ if (wx->mac_table[i].pools != (1ULL << pool)) {
+ memcpy(wx->mac_table[i].addr, addr, ETH_ALEN);
+ wx->mac_table[i].pools |= (1ULL << pool);
+ wx_sync_mac_table(wx);
+ return i;
+ }
+ }
+ }
+
+ if (wx->mac_table[i].state & WX_MAC_STATE_IN_USE)
+ continue;
+ wx->mac_table[i].state |= (WX_MAC_STATE_MODIFIED |
+ WX_MAC_STATE_IN_USE);
+ memcpy(wx->mac_table[i].addr, addr, ETH_ALEN);
+ wx->mac_table[i].pools |= (1ULL << pool);
+ wx_sync_mac_table(wx);
+ return i;
+ }
+ return -ENOMEM;
+}
+
static int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool)
{
u32 i;
@@ -789,6 +860,184 @@ static int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool)
return -ENOMEM;
}
+static int wx_available_rars(struct wx *wx)
+{
+ u32 i, count = 0;
+
+ for (i = 0; i < wx->mac.num_rar_entries; i++) {
+ if (wx->mac_table[i].state == 0)
+ count++;
+ }
+
+ return count;
+}
+
+/**
+ * wx_write_uc_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ * @pool: index for mac table
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ * 0 on no addresses written
+ * X on writing X addresses to the RAR table
+ **/
+static int wx_write_uc_addr_list(struct net_device *netdev, int pool)
+{
+ struct wx *wx = netdev_priv(netdev);
+ int count = 0;
+
+ /* return ENOMEM indicating insufficient memory for addresses */
+ if (netdev_uc_count(netdev) > wx_available_rars(wx))
+ return -ENOMEM;
+
+ if (!netdev_uc_empty(netdev)) {
+ struct netdev_hw_addr *ha;
+
+ netdev_for_each_uc_addr(ha, netdev) {
+ wx_del_mac_filter(wx, ha->addr, pool);
+ wx_add_mac_filter(wx, ha->addr, pool);
+ count++;
+ }
+ }
+ return count;
+}
+
+/**
+ * wx_mta_vector - Determines bit-vector in multicast table to set
+ * @wx: pointer to private structure
+ * @mc_addr: the multicast address
+ *
+ * Extracts the 12 bits, from a multicast address, to determine which
+ * bit-vector to set in the multicast table. The hardware uses 12 bits, from
+ * incoming rx multicast addresses, to determine the bit-vector to check in
+ * the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set
+ * by the MO field of the MCSTCTRL. The MO field is set during initialization
+ * to mc_filter_type.
+ **/
+static u32 wx_mta_vector(struct wx *wx, u8 *mc_addr)
+{
+ u32 vector = 0;
+
+ switch (wx->mac.mc_filter_type) {
+ case 0: /* use bits [47:36] of the address */
+ vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
+ break;
+ case 1: /* use bits [46:35] of the address */
+ vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
+ break;
+ case 2: /* use bits [45:34] of the address */
+ vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
+ break;
+ case 3: /* use bits [43:32] of the address */
+ vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
+ break;
+ default: /* Invalid mc_filter_type */
+ wx_err(wx, "MC filter type param set incorrectly\n");
+ break;
+ }
+
+ /* vector can only be 12-bits or boundary will be exceeded */
+ vector &= 0xFFF;
+ return vector;
+}
+
+/**
+ * wx_set_mta - Set bit-vector in multicast table
+ * @wx: pointer to private structure
+ * @mc_addr: Multicast address
+ *
+ * Sets the bit-vector in the multicast table.
+ **/
+static void wx_set_mta(struct wx *wx, u8 *mc_addr)
+{
+ u32 vector, vector_bit, vector_reg;
+
+ wx->addr_ctrl.mta_in_use++;
+
+ vector = wx_mta_vector(wx, mc_addr);
+ wx_dbg(wx, " bit-vector = 0x%03X\n", vector);
+
+ /* The MTA is a register array of 128 32-bit registers. It is treated
+ * like an array of 4096 bits. We want to set bit
+ * BitArray[vector_value]. So we figure out what register the bit is
+ * in, read it, OR in the new bit, then write back the new value. The
+ * register is determined by the upper 7 bits of the vector value and
+ * the bit within that register are determined by the lower 5 bits of
+ * the value.
+ */
+ vector_reg = (vector >> 5) & 0x7F;
+ vector_bit = vector & 0x1F;
+ wx->mac.mta_shadow[vector_reg] |= (1 << vector_bit);
+}
+
+/**
+ * wx_update_mc_addr_list - Updates MAC list of multicast addresses
+ * @wx: pointer to private structure
+ * @netdev: pointer to net device structure
+ *
+ * The given list replaces any existing list. Clears the MC addrs from receive
+ * address registers and the multicast table. Uses unused receive address
+ * registers for the first multicast addresses, and hashes the rest into the
+ * multicast table.
+ **/
+static void wx_update_mc_addr_list(struct wx *wx, struct net_device *netdev)
+{
+ struct netdev_hw_addr *ha;
+ u32 i, psrctl;
+
+ /* Set the new number of MC addresses that we are being requested to
+ * use.
+ */
+ wx->addr_ctrl.num_mc_addrs = netdev_mc_count(netdev);
+ wx->addr_ctrl.mta_in_use = 0;
+
+ /* Clear mta_shadow */
+ wx_dbg(wx, " Clearing MTA\n");
+ memset(&wx->mac.mta_shadow, 0, sizeof(wx->mac.mta_shadow));
+
+ /* Update mta_shadow */
+ netdev_for_each_mc_addr(ha, netdev) {
+ wx_dbg(wx, " Adding the multicast addresses:\n");
+ wx_set_mta(wx, ha->addr);
+ }
+
+ /* Enable mta */
+ for (i = 0; i < wx->mac.mcft_size; i++)
+ wr32a(wx, WX_PSR_MC_TBL(0), i,
+ wx->mac.mta_shadow[i]);
+
+ if (wx->addr_ctrl.mta_in_use > 0) {
+ psrctl = rd32(wx, WX_PSR_CTL);
+ psrctl &= ~(WX_PSR_CTL_MO | WX_PSR_CTL_MFE);
+ psrctl |= WX_PSR_CTL_MFE |
+ (wx->mac.mc_filter_type << WX_PSR_CTL_MO_SHIFT);
+ wr32(wx, WX_PSR_CTL, psrctl);
+ }
+
+ wx_dbg(wx, "Update mc addr list Complete\n");
+}
+
+/**
+ * wx_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: 0 on no addresses written
+ * X on writing X addresses to MTA
+ **/
+static int wx_write_mc_addr_list(struct net_device *netdev)
+{
+ struct wx *wx = netdev_priv(netdev);
+
+ if (!netif_running(netdev))
+ return 0;
+
+ wx_update_mc_addr_list(wx, netdev);
+
+ return netdev_mc_count(netdev);
+}
+
/**
* wx_set_mac - Change the Ethernet Address of the NIC
* @netdev: network interface device structure
@@ -844,6 +1093,430 @@ void wx_disable_rx(struct wx *wx)
}
EXPORT_SYMBOL(wx_disable_rx);
+static void wx_enable_rx(struct wx *wx)
+{
+ u32 psrctl;
+
+ /* enable mac receiver */
+ wr32m(wx, WX_MAC_RX_CFG,
+ WX_MAC_RX_CFG_RE, WX_MAC_RX_CFG_RE);
+
+ wr32m(wx, WX_RDB_PB_CTL,
+ WX_RDB_PB_CTL_RXEN, WX_RDB_PB_CTL_RXEN);
+
+ if (wx->mac.set_lben) {
+ psrctl = rd32(wx, WX_PSR_CTL);
+ psrctl |= WX_PSR_CTL_SW_EN;
+ wr32(wx, WX_PSR_CTL, psrctl);
+ wx->mac.set_lben = false;
+ }
+}
+
+/**
+ * wx_set_rxpba - Initialize Rx packet buffer
+ * @wx: pointer to private structure
+ **/
+static void wx_set_rxpba(struct wx *wx)
+{
+ u32 rxpktsize, txpktsize, txpbthresh;
+
+ rxpktsize = wx->mac.rx_pb_size << WX_RDB_PB_SZ_SHIFT;
+ wr32(wx, WX_RDB_PB_SZ(0), rxpktsize);
+
+ /* Only support an equally distributed Tx packet buffer strategy. */
+ txpktsize = wx->mac.tx_pb_size;
+ txpbthresh = (txpktsize / 1024) - WX_TXPKT_SIZE_MAX;
+ wr32(wx, WX_TDB_PB_SZ(0), txpktsize);
+ wr32(wx, WX_TDM_PB_THRE(0), txpbthresh);
+}
+
+static void wx_configure_port(struct wx *wx)
+{
+ u32 value, i;
+
+ value = WX_CFG_PORT_CTL_D_VLAN | WX_CFG_PORT_CTL_QINQ;
+ wr32m(wx, WX_CFG_PORT_CTL,
+ WX_CFG_PORT_CTL_D_VLAN |
+ WX_CFG_PORT_CTL_QINQ,
+ value);
+
+ wr32(wx, WX_CFG_TAG_TPID(0),
+ ETH_P_8021Q | ETH_P_8021AD << 16);
+ wx->tpid[0] = ETH_P_8021Q;
+ wx->tpid[1] = ETH_P_8021AD;
+ for (i = 1; i < 4; i++)
+ wr32(wx, WX_CFG_TAG_TPID(i),
+ ETH_P_8021Q | ETH_P_8021Q << 16);
+ for (i = 2; i < 8; i++)
+ wx->tpid[i] = ETH_P_8021Q;
+}
+
+/**
+ * wx_disable_sec_rx_path - Stops the receive data path
+ * @wx: pointer to private structure
+ *
+ * Stops the receive data path and waits for the HW to internally empty
+ * the Rx security block
+ **/
+static int wx_disable_sec_rx_path(struct wx *wx)
+{
+ u32 secrx;
+
+ wr32m(wx, WX_RSC_CTL,
+ WX_RSC_CTL_RX_DIS, WX_RSC_CTL_RX_DIS);
+
+ return read_poll_timeout(rd32, secrx, secrx & WX_RSC_ST_RSEC_RDY,
+ 1000, 40000, false, wx, WX_RSC_ST);
+}
+
+/**
+ * wx_enable_sec_rx_path - Enables the receive data path
+ * @wx: pointer to private structure
+ *
+ * Enables the receive data path.
+ **/
+static void wx_enable_sec_rx_path(struct wx *wx)
+{
+ wr32m(wx, WX_RSC_CTL, WX_RSC_CTL_RX_DIS, 0);
+ WX_WRITE_FLUSH(wx);
+}
+
+void wx_set_rx_mode(struct net_device *netdev)
+{
+ struct wx *wx = netdev_priv(netdev);
+ u32 fctrl, vmolr, vlnctrl;
+ int count;
+
+ /* Check for Promiscuous and All Multicast modes */
+ fctrl = rd32(wx, WX_PSR_CTL);
+ fctrl &= ~(WX_PSR_CTL_UPE | WX_PSR_CTL_MPE);
+ vmolr = rd32(wx, WX_PSR_VM_L2CTL(0));
+ vmolr &= ~(WX_PSR_VM_L2CTL_UPE |
+ WX_PSR_VM_L2CTL_MPE |
+ WX_PSR_VM_L2CTL_ROPE |
+ WX_PSR_VM_L2CTL_ROMPE);
+ vlnctrl = rd32(wx, WX_PSR_VLAN_CTL);
+ vlnctrl &= ~(WX_PSR_VLAN_CTL_VFE | WX_PSR_VLAN_CTL_CFIEN);
+
+ /* set all bits that we expect to always be set */
+ fctrl |= WX_PSR_CTL_BAM | WX_PSR_CTL_MFE;
+ vmolr |= WX_PSR_VM_L2CTL_BAM |
+ WX_PSR_VM_L2CTL_AUPE |
+ WX_PSR_VM_L2CTL_VACC;
+ vlnctrl |= WX_PSR_VLAN_CTL_VFE;
+
+ wx->addr_ctrl.user_set_promisc = false;
+ if (netdev->flags & IFF_PROMISC) {
+ wx->addr_ctrl.user_set_promisc = true;
+ fctrl |= WX_PSR_CTL_UPE | WX_PSR_CTL_MPE;
+ /* pf don't want packets routing to vf, so clear UPE */
+ vmolr |= WX_PSR_VM_L2CTL_MPE;
+ vlnctrl &= ~WX_PSR_VLAN_CTL_VFE;
+ }
+
+ if (netdev->flags & IFF_ALLMULTI) {
+ fctrl |= WX_PSR_CTL_MPE;
+ vmolr |= WX_PSR_VM_L2CTL_MPE;
+ }
+
+ if (netdev->features & NETIF_F_RXALL) {
+ vmolr |= (WX_PSR_VM_L2CTL_UPE | WX_PSR_VM_L2CTL_MPE);
+ vlnctrl &= ~WX_PSR_VLAN_CTL_VFE;
+ /* receive bad packets */
+ wr32m(wx, WX_RSC_CTL,
+ WX_RSC_CTL_SAVE_MAC_ERR,
+ WX_RSC_CTL_SAVE_MAC_ERR);
+ } else {
+ vmolr |= WX_PSR_VM_L2CTL_ROPE | WX_PSR_VM_L2CTL_ROMPE;
+ }
+
+ /* Write addresses to available RAR registers, if there is not
+ * sufficient space to store all the addresses then enable
+ * unicast promiscuous mode
+ */
+ count = wx_write_uc_addr_list(netdev, 0);
+ if (count < 0) {
+ vmolr &= ~WX_PSR_VM_L2CTL_ROPE;
+ vmolr |= WX_PSR_VM_L2CTL_UPE;
+ }
+
+ /* Write addresses to the MTA, if the attempt fails
+ * then we should just turn on promiscuous mode so
+ * that we can at least receive multicast traffic
+ */
+ count = wx_write_mc_addr_list(netdev);
+ if (count < 0) {
+ vmolr &= ~WX_PSR_VM_L2CTL_ROMPE;
+ vmolr |= WX_PSR_VM_L2CTL_MPE;
+ }
+
+ wr32(wx, WX_PSR_VLAN_CTL, vlnctrl);
+ wr32(wx, WX_PSR_CTL, fctrl);
+ wr32(wx, WX_PSR_VM_L2CTL(0), vmolr);
+}
+EXPORT_SYMBOL(wx_set_rx_mode);
+
+static void wx_set_rx_buffer_len(struct wx *wx)
+{
+ struct net_device *netdev = wx->netdev;
+ u32 mhadd, max_frame;
+
+ max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+ /* adjust max frame to be at least the size of a standard frame */
+ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+ max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN);
+
+ mhadd = rd32(wx, WX_PSR_MAX_SZ);
+ if (max_frame != mhadd)
+ wr32(wx, WX_PSR_MAX_SZ, max_frame);
+}
+
+/* Disable the specified rx queue */
+void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring)
+{
+ u8 reg_idx = ring->reg_idx;
+ u32 rxdctl;
+ int ret;
+
+ /* write value back with RRCFG.EN bit cleared */
+ wr32m(wx, WX_PX_RR_CFG(reg_idx),
+ WX_PX_RR_CFG_RR_EN, 0);
+
+ /* the hardware may take up to 100us to really disable the rx queue */
+ ret = read_poll_timeout(rd32, rxdctl, !(rxdctl & WX_PX_RR_CFG_RR_EN),
+ 10, 100, true, wx, WX_PX_RR_CFG(reg_idx));
+
+ if (ret == -ETIMEDOUT) {
+ /* Just for information */
+ wx_err(wx,
+ "RRCFG.EN on Rx queue %d not cleared within the polling period\n",
+ reg_idx);
+ }
+}
+EXPORT_SYMBOL(wx_disable_rx_queue);
+
+static void wx_enable_rx_queue(struct wx *wx, struct wx_ring *ring)
+{
+ u8 reg_idx = ring->reg_idx;
+ u32 rxdctl;
+ int ret;
+
+ ret = read_poll_timeout(rd32, rxdctl, rxdctl & WX_PX_RR_CFG_RR_EN,
+ 1000, 10000, true, wx, WX_PX_RR_CFG(reg_idx));
+
+ if (ret == -ETIMEDOUT) {
+ /* Just for information */
+ wx_err(wx,
+ "RRCFG.EN on Rx queue %d not set within the polling period\n",
+ reg_idx);
+ }
+}
+
+static void wx_configure_srrctl(struct wx *wx,
+ struct wx_ring *rx_ring)
+{
+ u16 reg_idx = rx_ring->reg_idx;
+ u32 srrctl;
+
+ srrctl = rd32(wx, WX_PX_RR_CFG(reg_idx));
+ srrctl &= ~(WX_PX_RR_CFG_RR_HDR_SZ |
+ WX_PX_RR_CFG_RR_BUF_SZ |
+ WX_PX_RR_CFG_SPLIT_MODE);
+ /* configure header buffer length, needed for RSC */
+ srrctl |= WX_RXBUFFER_256 << WX_PX_RR_CFG_BHDRSIZE_SHIFT;
+
+ /* configure the packet buffer length */
+ srrctl |= WX_RX_BUFSZ >> WX_PX_RR_CFG_BSIZEPKT_SHIFT;
+
+ wr32(wx, WX_PX_RR_CFG(reg_idx), srrctl);
+}
+
+static void wx_configure_tx_ring(struct wx *wx,
+ struct wx_ring *ring)
+{
+ u32 txdctl = WX_PX_TR_CFG_ENABLE;
+ u8 reg_idx = ring->reg_idx;
+ u64 tdba = ring->dma;
+ int ret;
+
+ /* disable queue to avoid issues while updating state */
+ wr32(wx, WX_PX_TR_CFG(reg_idx), WX_PX_TR_CFG_SWFLSH);
+ WX_WRITE_FLUSH(wx);
+
+ wr32(wx, WX_PX_TR_BAL(reg_idx), tdba & DMA_BIT_MASK(32));
+ wr32(wx, WX_PX_TR_BAH(reg_idx), upper_32_bits(tdba));
+
+ /* reset head and tail pointers */
+ wr32(wx, WX_PX_TR_RP(reg_idx), 0);
+ wr32(wx, WX_PX_TR_WP(reg_idx), 0);
+ ring->tail = wx->hw_addr + WX_PX_TR_WP(reg_idx);
+
+ if (ring->count < WX_MAX_TXD)
+ txdctl |= ring->count / 128 << WX_PX_TR_CFG_TR_SIZE_SHIFT;
+ txdctl |= 0x20 << WX_PX_TR_CFG_WTHRESH_SHIFT;
+
+ /* reinitialize tx_buffer_info */
+ memset(ring->tx_buffer_info, 0,
+ sizeof(struct wx_tx_buffer) * ring->count);
+
+ /* enable queue */
+ wr32(wx, WX_PX_TR_CFG(reg_idx), txdctl);
+
+ /* poll to verify queue is enabled */
+ ret = read_poll_timeout(rd32, txdctl, txdctl & WX_PX_TR_CFG_ENABLE,
+ 1000, 10000, true, wx, WX_PX_TR_CFG(reg_idx));
+ if (ret == -ETIMEDOUT)
+ wx_err(wx, "Could not enable Tx Queue %d\n", reg_idx);
+}
+
+static void wx_configure_rx_ring(struct wx *wx,
+ struct wx_ring *ring)
+{
+ u16 reg_idx = ring->reg_idx;
+ union wx_rx_desc *rx_desc;
+ u64 rdba = ring->dma;
+ u32 rxdctl;
+
+ /* disable queue to avoid issues while updating state */
+ rxdctl = rd32(wx, WX_PX_RR_CFG(reg_idx));
+ wx_disable_rx_queue(wx, ring);
+
+ wr32(wx, WX_PX_RR_BAL(reg_idx), rdba & DMA_BIT_MASK(32));
+ wr32(wx, WX_PX_RR_BAH(reg_idx), upper_32_bits(rdba));
+
+ if (ring->count == WX_MAX_RXD)
+ rxdctl |= 0 << WX_PX_RR_CFG_RR_SIZE_SHIFT;
+ else
+ rxdctl |= (ring->count / 128) << WX_PX_RR_CFG_RR_SIZE_SHIFT;
+
+ rxdctl |= 0x1 << WX_PX_RR_CFG_RR_THER_SHIFT;
+ wr32(wx, WX_PX_RR_CFG(reg_idx), rxdctl);
+
+ /* reset head and tail pointers */
+ wr32(wx, WX_PX_RR_RP(reg_idx), 0);
+ wr32(wx, WX_PX_RR_WP(reg_idx), 0);
+ ring->tail = wx->hw_addr + WX_PX_RR_WP(reg_idx);
+
+ wx_configure_srrctl(wx, ring);
+
+ /* initialize rx_buffer_info */
+ memset(ring->rx_buffer_info, 0,
+ sizeof(struct wx_rx_buffer) * ring->count);
+
+ /* initialize Rx descriptor 0 */
+ rx_desc = WX_RX_DESC(ring, 0);
+ rx_desc->wb.upper.length = 0;
+
+ /* enable receive descriptor ring */
+ wr32m(wx, WX_PX_RR_CFG(reg_idx),
+ WX_PX_RR_CFG_RR_EN, WX_PX_RR_CFG_RR_EN);
+
+ wx_enable_rx_queue(wx, ring);
+ wx_alloc_rx_buffers(ring, wx_desc_unused(ring));
+}
+
+/**
+ * wx_configure_tx - Configure Transmit Unit after Reset
+ * @wx: pointer to private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void wx_configure_tx(struct wx *wx)
+{
+ u32 i;
+
+ /* TDM_CTL.TE must be before Tx queues are enabled */
+ wr32m(wx, WX_TDM_CTL,
+ WX_TDM_CTL_TE, WX_TDM_CTL_TE);
+
+ /* Setup the HW Tx Head and Tail descriptor pointers */
+ for (i = 0; i < wx->num_tx_queues; i++)
+ wx_configure_tx_ring(wx, wx->tx_ring[i]);
+
+ wr32m(wx, WX_TSC_BUF_AE, WX_TSC_BUF_AE_THR, 0x10);
+
+ if (wx->mac.type == wx_mac_em)
+ wr32m(wx, WX_TSC_CTL, WX_TSC_CTL_TX_DIS | WX_TSC_CTL_TSEC_DIS, 0x1);
+
+ /* enable mac transmitter */
+ wr32m(wx, WX_MAC_TX_CFG,
+ WX_MAC_TX_CFG_TE, WX_MAC_TX_CFG_TE);
+}
+
+/**
+ * wx_configure_rx - Configure Receive Unit after Reset
+ * @wx: pointer to private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void wx_configure_rx(struct wx *wx)
+{
+ u32 psrtype, i;
+ int ret;
+
+ wx_disable_rx(wx);
+
+ psrtype = WX_RDB_PL_CFG_L4HDR |
+ WX_RDB_PL_CFG_L3HDR |
+ WX_RDB_PL_CFG_L2HDR |
+ WX_RDB_PL_CFG_TUN_TUNHDR |
+ WX_RDB_PL_CFG_TUN_TUNHDR;
+ wr32(wx, WX_RDB_PL_CFG(0), psrtype);
+
+ /* enable hw crc stripping */
+ wr32m(wx, WX_RSC_CTL, WX_RSC_CTL_CRC_STRIP, WX_RSC_CTL_CRC_STRIP);
+
+ if (wx->mac.type == wx_mac_sp) {
+ u32 psrctl;
+
+ /* RSC Setup */
+ psrctl = rd32(wx, WX_PSR_CTL);
+ psrctl |= WX_PSR_CTL_RSC_ACK; /* Disable RSC for ACK packets */
+ psrctl |= WX_PSR_CTL_RSC_DIS;
+ wr32(wx, WX_PSR_CTL, psrctl);
+ }
+
+ /* set_rx_buffer_len must be called before ring initialization */
+ wx_set_rx_buffer_len(wx);
+
+ /* Setup the HW Rx Head and Tail Descriptor Pointers and
+ * the Base and Length of the Rx Descriptor Ring
+ */
+ for (i = 0; i < wx->num_rx_queues; i++)
+ wx_configure_rx_ring(wx, wx->rx_ring[i]);
+
+ /* Enable all receives, disable security engine prior to block traffic */
+ ret = wx_disable_sec_rx_path(wx);
+ if (ret < 0)
+ wx_err(wx, "The register status is abnormal, please check device.");
+
+ wx_enable_rx(wx);
+ wx_enable_sec_rx_path(wx);
+}
+
+static void wx_configure_isb(struct wx *wx)
+{
+ /* set ISB Address */
+ wr32(wx, WX_PX_ISB_ADDR_L, wx->isb_dma & DMA_BIT_MASK(32));
+ if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+ wr32(wx, WX_PX_ISB_ADDR_H, upper_32_bits(wx->isb_dma));
+}
+
+void wx_configure(struct wx *wx)
+{
+ wx_set_rxpba(wx);
+ wx_configure_port(wx);
+
+ wx_set_rx_mode(wx->netdev);
+
+ wx_enable_sec_rx_path(wx);
+
+ wx_configure_tx(wx);
+ wx_configure_rx(wx);
+ wx_configure_isb(wx);
+}
+EXPORT_SYMBOL(wx_configure);
+
/**
* wx_disable_pcie_master - Disable PCI-express master access
* @wx: pointer to hardware structure
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
index 803983546f3a..44dfd6ea442a 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
@@ -4,6 +4,8 @@
#ifndef _WX_HW_H_
#define _WX_HW_H_
+void wx_intr_enable(struct wx *wx, u64 qmask);
+void wx_irq_disable(struct wx *wx);
int wx_check_flash_load(struct wx *wx, u32 check_bit);
void wx_control_hw(struct wx *wx, bool drv);
int wx_mng_present(struct wx *wx);
@@ -20,6 +22,9 @@ void wx_mac_set_default_filter(struct wx *wx, u8 *addr);
void wx_flush_sw_mac_table(struct wx *wx);
int wx_set_mac(struct net_device *netdev, void *p);
void wx_disable_rx(struct wx *wx);
+void wx_set_rx_mode(struct net_device *netdev);
+void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring);
+void wx_configure(struct wx *wx);
int wx_disable_pcie_master(struct wx *wx);
int wx_stop_adapter(struct wx *wx);
void wx_reset_misc(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
new file mode 100644
index 000000000000..57e1871ea0c6
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -0,0 +1,2004 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd. */
+
+#include <linux/etherdevice.h>
+#include <net/page_pool.h>
+#include <linux/iopoll.h>
+#include <linux/pci.h>
+
+#include "wx_type.h"
+#include "wx_lib.h"
+#include "wx_hw.h"
+
+/* wx_test_staterr - tests bits in Rx descriptor status and error fields */
+static __le32 wx_test_staterr(union wx_rx_desc *rx_desc,
+ const u32 stat_err_bits)
+{
+ return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+static bool wx_can_reuse_rx_page(struct wx_rx_buffer *rx_buffer,
+ int rx_buffer_pgcnt)
+{
+ unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+ struct page *page = rx_buffer->page;
+
+ /* avoid re-using remote and pfmemalloc pages */
+ if (!dev_page_is_reusable(page))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
+ return false;
+#endif
+
+ /* If we have drained the page fragment pool we need to update
+ * the pagecnt_bias and page count so that we fully restock the
+ * number of references the driver holds.
+ */
+ if (unlikely(pagecnt_bias == 1)) {
+ page_ref_add(page, USHRT_MAX - 1);
+ rx_buffer->pagecnt_bias = USHRT_MAX;
+ }
+
+ return true;
+}
+
+/**
+ * wx_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void wx_reuse_rx_page(struct wx_ring *rx_ring,
+ struct wx_rx_buffer *old_buff)
+{
+ u16 nta = rx_ring->next_to_alloc;
+ struct wx_rx_buffer *new_buff;
+
+ new_buff = &rx_ring->rx_buffer_info[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* transfer page from old buffer to new buffer */
+ new_buff->page = old_buff->page;
+ new_buff->page_dma = old_buff->page_dma;
+ new_buff->page_offset = old_buff->page_offset;
+ new_buff->pagecnt_bias = old_buff->pagecnt_bias;
+}
+
+static void wx_dma_sync_frag(struct wx_ring *rx_ring,
+ struct wx_rx_buffer *rx_buffer)
+{
+ struct sk_buff *skb = rx_buffer->skb;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ WX_CB(skb)->dma,
+ skb_frag_off(frag),
+ skb_frag_size(frag),
+ DMA_FROM_DEVICE);
+
+ /* If the page was released, just unmap it. */
+ if (unlikely(WX_CB(skb)->page_released))
+ page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
+}
+
+static struct wx_rx_buffer *wx_get_rx_buffer(struct wx_ring *rx_ring,
+ union wx_rx_desc *rx_desc,
+ struct sk_buff **skb,
+ int *rx_buffer_pgcnt)
+{
+ struct wx_rx_buffer *rx_buffer;
+ unsigned int size;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ size = le16_to_cpu(rx_desc->wb.upper.length);
+
+#if (PAGE_SIZE < 8192)
+ *rx_buffer_pgcnt = page_count(rx_buffer->page);
+#else
+ *rx_buffer_pgcnt = 0;
+#endif
+
+ prefetchw(rx_buffer->page);
+ *skb = rx_buffer->skb;
+
+ /* Delay unmapping of the first packet. It carries the header
+ * information, HW may still access the header after the writeback.
+ * Only unmap it when EOP is reached
+ */
+ if (!wx_test_staterr(rx_desc, WX_RXD_STAT_EOP)) {
+ if (!*skb)
+ goto skip_sync;
+ } else {
+ if (*skb)
+ wx_dma_sync_frag(rx_ring, rx_buffer);
+ }
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ size,
+ DMA_FROM_DEVICE);
+skip_sync:
+ rx_buffer->pagecnt_bias--;
+
+ return rx_buffer;
+}
+
+static void wx_put_rx_buffer(struct wx_ring *rx_ring,
+ struct wx_rx_buffer *rx_buffer,
+ struct sk_buff *skb,
+ int rx_buffer_pgcnt)
+{
+ if (wx_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
+ /* hand second half of page back to the ring */
+ wx_reuse_rx_page(rx_ring, rx_buffer);
+ } else {
+ if (!IS_ERR(skb) && WX_CB(skb)->dma == rx_buffer->dma)
+ /* the page has been released from the ring */
+ WX_CB(skb)->page_released = true;
+ else
+ page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
+
+ __page_frag_cache_drain(rx_buffer->page,
+ rx_buffer->pagecnt_bias);
+ }
+
+ /* clear contents of rx_buffer */
+ rx_buffer->page = NULL;
+ rx_buffer->skb = NULL;
+}
+
+static struct sk_buff *wx_build_skb(struct wx_ring *rx_ring,
+ struct wx_rx_buffer *rx_buffer,
+ union wx_rx_desc *rx_desc)
+{
+ unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = WX_RX_BUFSZ;
+#else
+ unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+#endif
+ struct sk_buff *skb = rx_buffer->skb;
+
+ if (!skb) {
+ void *page_addr = page_address(rx_buffer->page) +
+ rx_buffer->page_offset;
+
+ /* prefetch first cache line of first page */
+ prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+ prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+ /* allocate a skb to store the frags */
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, WX_RXBUFFER_256);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* we will be copying header into skb->data in
+ * pskb_may_pull so it is in our interest to prefetch
+ * it now to avoid a possible cache miss
+ */
+ prefetchw(skb->data);
+
+ if (size <= WX_RXBUFFER_256) {
+ memcpy(__skb_put(skb, size), page_addr,
+ ALIGN(size, sizeof(long)));
+ rx_buffer->pagecnt_bias++;
+
+ return skb;
+ }
+
+ if (!wx_test_staterr(rx_desc, WX_RXD_STAT_EOP))
+ WX_CB(skb)->dma = rx_buffer->dma;
+
+ skb_add_rx_frag(skb, 0, rx_buffer->page,
+ rx_buffer->page_offset,
+ size, truesize);
+ goto out;
+
+ } else {
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+ rx_buffer->page_offset, size, truesize);
+ }
+
+out:
+#if (PAGE_SIZE < 8192)
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= truesize;
+#else
+ /* move offset up to the next cache line */
+ rx_buffer->page_offset += truesize;
+#endif
+
+ return skb;
+}
+
+static bool wx_alloc_mapped_page(struct wx_ring *rx_ring,
+ struct wx_rx_buffer *bi)
+{
+ struct page *page = bi->page;
+ dma_addr_t dma;
+
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page))
+ return true;
+
+ page = page_pool_dev_alloc_pages(rx_ring->page_pool);
+ WARN_ON(!page);
+ dma = page_pool_get_dma_addr(page);
+
+ bi->page_dma = dma;
+ bi->page = page;
+ bi->page_offset = 0;
+ page_ref_add(page, USHRT_MAX - 1);
+ bi->pagecnt_bias = USHRT_MAX;
+
+ return true;
+}
+
+/**
+ * wx_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ **/
+void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count)
+{
+ u16 i = rx_ring->next_to_use;
+ union wx_rx_desc *rx_desc;
+ struct wx_rx_buffer *bi;
+
+ /* nothing to do */
+ if (!cleaned_count)
+ return;
+
+ rx_desc = WX_RX_DESC(rx_ring, i);
+ bi = &rx_ring->rx_buffer_info[i];
+ i -= rx_ring->count;
+
+ do {
+ if (!wx_alloc_mapped_page(rx_ring, bi))
+ break;
+
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+ bi->page_offset,
+ WX_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+
+ rx_desc->read.pkt_addr =
+ cpu_to_le64(bi->page_dma + bi->page_offset);
+
+ rx_desc++;
+ bi++;
+ i++;
+ if (unlikely(!i)) {
+ rx_desc = WX_RX_DESC(rx_ring, 0);
+ bi = rx_ring->rx_buffer_info;
+ i -= rx_ring->count;
+ }
+
+ /* clear the status bits for the next_to_use descriptor */
+ rx_desc->wb.upper.status_error = 0;
+
+ cleaned_count--;
+ } while (cleaned_count);
+
+ i += rx_ring->count;
+
+ if (rx_ring->next_to_use != i) {
+ rx_ring->next_to_use = i;
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = i;
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(i, rx_ring->tail);
+ }
+}
+
+u16 wx_desc_unused(struct wx_ring *ring)
+{
+ u16 ntc = ring->next_to_clean;
+ u16 ntu = ring->next_to_use;
+
+ return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+/**
+ * wx_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean. If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool wx_is_non_eop(struct wx_ring *rx_ring,
+ union wx_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
+ prefetch(WX_RX_DESC(rx_ring, ntc));
+
+ /* if we are the last buffer then there is nothing else to do */
+ if (likely(wx_test_staterr(rx_desc, WX_RXD_STAT_EOP)))
+ return false;
+
+ rx_ring->rx_buffer_info[ntc].skb = skb;
+
+ return true;
+}
+
+static void wx_pull_tail(struct sk_buff *skb)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+ unsigned int pull_len;
+ unsigned char *va;
+
+ /* it is valid to use page_address instead of kmap since we are
+ * working with pages allocated out of the lomem pool per
+ * alloc_page(GFP_ATOMIC)
+ */
+ va = skb_frag_address(frag);
+
+ /* we need the header to contain the greater of either ETH_HLEN or
+ * 60 bytes if the skb->len is less than 60 for skb_pad.
+ */
+ pull_len = eth_get_headlen(skb->dev, va, WX_RXBUFFER_256);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+ /* update all of the pointers */
+ skb_frag_size_sub(frag, pull_len);
+ skb_frag_off_add(frag, pull_len);
+ skb->data_len -= pull_len;
+ skb->tail += pull_len;
+}
+
+/**
+ * wx_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Check for corrupted packet headers caused by senders on the local L2
+ * embedded NIC switch not setting up their Tx Descriptors right. These
+ * should be very rare.
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool wx_cleanup_headers(struct wx_ring *rx_ring,
+ union wx_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct net_device *netdev = rx_ring->netdev;
+
+ /* verify that the packet does not have any known errors */
+ if (!netdev ||
+ unlikely(wx_test_staterr(rx_desc, WX_RXD_ERR_RXE) &&
+ !(netdev->features & NETIF_F_RXALL))) {
+ dev_kfree_skb_any(skb);
+ return true;
+ }
+
+ /* place header in linear portion of buffer */
+ if (!skb_headlen(skb))
+ wx_pull_tail(skb);
+
+ /* if eth_skb_pad returns an error the skb was freed */
+ if (eth_skb_pad(skb))
+ return true;
+
+ return false;
+}
+
+/**
+ * wx_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @q_vector: structure containing interrupt and ring information
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed.
+ **/
+static int wx_clean_rx_irq(struct wx_q_vector *q_vector,
+ struct wx_ring *rx_ring,
+ int budget)
+{
+ unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ u16 cleaned_count = wx_desc_unused(rx_ring);
+
+ do {
+ struct wx_rx_buffer *rx_buffer;
+ union wx_rx_desc *rx_desc;
+ struct sk_buff *skb;
+ int rx_buffer_pgcnt;
+
+ /* return some buffers to hardware, one at a time is too slow */
+ if (cleaned_count >= WX_RX_BUFFER_WRITE) {
+ wx_alloc_rx_buffers(rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ rx_desc = WX_RX_DESC(rx_ring, rx_ring->next_to_clean);
+ if (!wx_test_staterr(rx_desc, WX_RXD_STAT_DD))
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we know the
+ * descriptor has been written back
+ */
+ dma_rmb();
+
+ rx_buffer = wx_get_rx_buffer(rx_ring, rx_desc, &skb, &rx_buffer_pgcnt);
+
+ /* retrieve a buffer from the ring */
+ skb = wx_build_skb(rx_ring, rx_buffer, rx_desc);
+
+ /* exit if we failed to retrieve a buffer */
+ if (!skb) {
+ rx_buffer->pagecnt_bias++;
+ break;
+ }
+
+ wx_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt);
+ cleaned_count++;
+
+ /* place incomplete frames back on ring for completion */
+ if (wx_is_non_eop(rx_ring, rx_desc, skb))
+ continue;
+
+ /* verify the packet layout is correct */
+ if (wx_cleanup_headers(rx_ring, rx_desc, skb))
+ continue;
+
+ /* probably a little skewed due to removing CRC */
+ total_rx_bytes += skb->len;
+
+ skb_record_rx_queue(skb, rx_ring->queue_index);
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+ napi_gro_receive(&q_vector->napi, skb);
+
+ /* update budget accounting */
+ total_rx_packets++;
+ } while (likely(total_rx_packets < budget));
+
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->stats.packets += total_rx_packets;
+ rx_ring->stats.bytes += total_rx_bytes;
+ u64_stats_update_end(&rx_ring->syncp);
+ q_vector->rx.total_packets += total_rx_packets;
+ q_vector->rx.total_bytes += total_rx_bytes;
+
+ return total_rx_packets;
+}
+
+static struct netdev_queue *wx_txring_txq(const struct wx_ring *ring)
+{
+ return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
+
+/**
+ * wx_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: structure containing interrupt and ring information
+ * @tx_ring: tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ **/
+static bool wx_clean_tx_irq(struct wx_q_vector *q_vector,
+ struct wx_ring *tx_ring, int napi_budget)
+{
+ unsigned int budget = q_vector->wx->tx_work_limit;
+ unsigned int total_bytes = 0, total_packets = 0;
+ unsigned int i = tx_ring->next_to_clean;
+ struct wx_tx_buffer *tx_buffer;
+ union wx_tx_desc *tx_desc;
+
+ if (!netif_carrier_ok(tx_ring->netdev))
+ return true;
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ tx_desc = WX_TX_DESC(tx_ring, i);
+ i -= tx_ring->count;
+
+ do {
+ union wx_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+ /* if next_to_watch is not set then there is no work pending */
+ if (!eop_desc)
+ break;
+
+ /* prevent any other reads prior to eop_desc */
+ smp_rmb();
+
+ /* if DD is not set pending work has not been completed */
+ if (!(eop_desc->wb.status & cpu_to_le32(WX_TXD_STAT_DD)))
+ break;
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buffer->next_to_watch = NULL;
+
+ /* update the statistics for this packet */
+ total_bytes += tx_buffer->bytecount;
+ total_packets += tx_buffer->gso_segs;
+
+ /* free the skb */
+ napi_consume_skb(tx_buffer->skb, napi_budget);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+
+ /* clear tx_buffer data */
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = WX_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len)) {
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+ }
+ }
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = WX_TX_DESC(tx_ring, 0);
+ }
+
+ /* issue prefetch for next Tx descriptor */
+ prefetch(tx_desc);
+
+ /* update budget accounting */
+ budget--;
+ } while (likely(budget));
+
+ i += tx_ring->count;
+ tx_ring->next_to_clean = i;
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->stats.bytes += total_bytes;
+ tx_ring->stats.packets += total_packets;
+ u64_stats_update_end(&tx_ring->syncp);
+ q_vector->tx.total_bytes += total_bytes;
+ q_vector->tx.total_packets += total_packets;
+
+ netdev_tx_completed_queue(wx_txring_txq(tx_ring),
+ total_packets, total_bytes);
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+ if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+ (wx_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
+ /* Make sure that anybody stopping the queue after this
+ * sees the new next_to_clean.
+ */
+ smp_mb();
+
+ if (__netif_subqueue_stopped(tx_ring->netdev,
+ tx_ring->queue_index) &&
+ netif_running(tx_ring->netdev))
+ netif_wake_subqueue(tx_ring->netdev,
+ tx_ring->queue_index);
+ }
+
+ return !!budget;
+}
+
+/**
+ * wx_poll - NAPI polling RX/TX cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ **/
+static int wx_poll(struct napi_struct *napi, int budget)
+{
+ struct wx_q_vector *q_vector = container_of(napi, struct wx_q_vector, napi);
+ int per_ring_budget, work_done = 0;
+ struct wx *wx = q_vector->wx;
+ bool clean_complete = true;
+ struct wx_ring *ring;
+
+ wx_for_each_ring(ring, q_vector->tx) {
+ if (!wx_clean_tx_irq(q_vector, ring, budget))
+ clean_complete = false;
+ }
+
+ /* Exit if we are called by netpoll */
+ if (budget <= 0)
+ return budget;
+
+ /* attempt to distribute budget to each queue fairly, but don't allow
+ * the budget to go below 1 because we'll exit polling
+ */
+ if (q_vector->rx.count > 1)
+ per_ring_budget = max(budget / q_vector->rx.count, 1);
+ else
+ per_ring_budget = budget;
+
+ wx_for_each_ring(ring, q_vector->rx) {
+ int cleaned = wx_clean_rx_irq(q_vector, ring, per_ring_budget);
+
+ work_done += cleaned;
+ if (cleaned >= per_ring_budget)
+ clean_complete = false;
+ }
+
+ /* If all work not completed, return budget and keep polling */
+ if (!clean_complete)
+ return budget;
+
+ /* all work done, exit the polling mode */
+ if (likely(napi_complete_done(napi, work_done))) {
+ if (netif_running(wx->netdev))
+ wx_intr_enable(wx, WX_INTR_Q(q_vector->v_idx));
+ };
+
+ return min(work_done, budget - 1);
+}
+
+static int wx_maybe_stop_tx(struct wx_ring *tx_ring, u16 size)
+{
+ if (likely(wx_desc_unused(tx_ring) >= size))
+ return 0;
+
+ netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+ /* For the next check */
+ smp_mb();
+
+ /* We need to check again in a case another CPU has just
+ * made room available.
+ */
+ if (likely(wx_desc_unused(tx_ring) < size))
+ return -EBUSY;
+
+ /* A reprieve! - use start_queue because it doesn't call schedule */
+ netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+ return 0;
+}
+
+static void wx_tx_map(struct wx_ring *tx_ring,
+ struct wx_tx_buffer *first)
+{
+ struct sk_buff *skb = first->skb;
+ struct wx_tx_buffer *tx_buffer;
+ u16 i = tx_ring->next_to_use;
+ unsigned int data_len, size;
+ union wx_tx_desc *tx_desc;
+ skb_frag_t *frag;
+ dma_addr_t dma;
+ u32 cmd_type;
+
+ cmd_type = WX_TXD_DTYP_DATA | WX_TXD_IFCS;
+ tx_desc = WX_TX_DESC(tx_ring, i);
+
+ tx_desc->read.olinfo_status = cpu_to_le32(skb->len << WX_TXD_PAYLEN_SHIFT);
+
+ size = skb_headlen(skb);
+ data_len = skb->data_len;
+ dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+ tx_buffer = first;
+
+ for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+ if (dma_mapping_error(tx_ring->dev, dma))
+ goto dma_error;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buffer, len, size);
+ dma_unmap_addr_set(tx_buffer, dma, dma);
+
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+ while (unlikely(size > WX_MAX_DATA_PER_TXD)) {
+ tx_desc->read.cmd_type_len =
+ cpu_to_le32(cmd_type ^ WX_MAX_DATA_PER_TXD);
+
+ i++;
+ tx_desc++;
+ if (i == tx_ring->count) {
+ tx_desc = WX_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+ tx_desc->read.olinfo_status = 0;
+
+ dma += WX_MAX_DATA_PER_TXD;
+ size -= WX_MAX_DATA_PER_TXD;
+
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+ }
+
+ if (likely(!data_len))
+ break;
+
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+ i++;
+ tx_desc++;
+ if (i == tx_ring->count) {
+ tx_desc = WX_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+ tx_desc->read.olinfo_status = 0;
+
+ size = skb_frag_size(frag);
+
+ data_len -= size;
+
+ dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+ DMA_TO_DEVICE);
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ }
+
+ /* write last descriptor with RS and EOP bits */
+ cmd_type |= size | WX_TXD_EOP | WX_TXD_RS;
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+ netdev_tx_sent_queue(wx_txring_txq(tx_ring), first->bytecount);
+
+ skb_tx_timestamp(skb);
+
+ /* Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch. (Only applicable for weak-ordered
+ * memory model archs, such as IA-64).
+ *
+ * We also need this memory barrier to make certain all of the
+ * status bits have been updated before next_to_watch is written.
+ */
+ wmb();
+
+ /* set next_to_watch value indicating a packet is present */
+ first->next_to_watch = tx_desc;
+
+ i++;
+ if (i == tx_ring->count)
+ i = 0;
+
+ tx_ring->next_to_use = i;
+
+ wx_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+ if (netif_xmit_stopped(wx_txring_txq(tx_ring)) || !netdev_xmit_more())
+ writel(i, tx_ring->tail);
+
+ return;
+dma_error:
+ dev_err(tx_ring->dev, "TX DMA map failed\n");
+
+ /* clear dma mappings for failed tx_buffer_info map */
+ for (;;) {
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+ if (tx_buffer == first)
+ break;
+ if (i == 0)
+ i += tx_ring->count;
+ i--;
+ }
+
+ dev_kfree_skb_any(first->skb);
+ first->skb = NULL;
+
+ tx_ring->next_to_use = i;
+}
+
+static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb,
+ struct wx_ring *tx_ring)
+{
+ u16 count = TXD_USE_COUNT(skb_headlen(skb));
+ struct wx_tx_buffer *first;
+ unsigned short f;
+
+ /* need: 1 descriptor per page * PAGE_SIZE/WX_MAX_DATA_PER_TXD,
+ * + 1 desc for skb_headlen/WX_MAX_DATA_PER_TXD,
+ * + 2 desc gap to keep tail from touching head,
+ * + 1 desc for context descriptor,
+ * otherwise try next time
+ */
+ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+ count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->
+ frags[f]));
+
+ if (wx_maybe_stop_tx(tx_ring, count + 3))
+ return NETDEV_TX_BUSY;
+
+ /* record the location of the first descriptor for this packet */
+ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ first->skb = skb;
+ first->bytecount = skb->len;
+ first->gso_segs = 1;
+
+ wx_tx_map(tx_ring, first);
+
+ return NETDEV_TX_OK;
+}
+
+netdev_tx_t wx_xmit_frame(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ unsigned int r_idx = skb->queue_mapping;
+ struct wx *wx = netdev_priv(netdev);
+ struct wx_ring *tx_ring;
+
+ if (!netif_carrier_ok(netdev)) {
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* The minimum packet size for olinfo paylen is 17 so pad the skb
+ * in order to meet this minimum size requirement.
+ */
+ if (skb_put_padto(skb, 17))
+ return NETDEV_TX_OK;
+
+ if (r_idx >= wx->num_tx_queues)
+ r_idx = r_idx % wx->num_tx_queues;
+ tx_ring = wx->tx_ring[r_idx];
+
+ return wx_xmit_frame_ring(skb, tx_ring);
+}
+EXPORT_SYMBOL(wx_xmit_frame);
+
+void wx_napi_enable_all(struct wx *wx)
+{
+ struct wx_q_vector *q_vector;
+ int q_idx;
+
+ for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) {
+ q_vector = wx->q_vector[q_idx];
+ napi_enable(&q_vector->napi);
+ }
+}
+EXPORT_SYMBOL(wx_napi_enable_all);
+
+void wx_napi_disable_all(struct wx *wx)
+{
+ struct wx_q_vector *q_vector;
+ int q_idx;
+
+ for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) {
+ q_vector = wx->q_vector[q_idx];
+ napi_disable(&q_vector->napi);
+ }
+}
+EXPORT_SYMBOL(wx_napi_disable_all);
+
+/**
+ * wx_set_rss_queues: Allocate queues for RSS
+ * @wx: board private structure to initialize
+ *
+ * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
+ * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
+ *
+ **/
+static void wx_set_rss_queues(struct wx *wx)
+{
+ wx->num_rx_queues = wx->mac.max_rx_queues;
+ wx->num_tx_queues = wx->mac.max_tx_queues;
+}
+
+static void wx_set_num_queues(struct wx *wx)
+{
+ /* Start with base case */
+ wx->num_rx_queues = 1;
+ wx->num_tx_queues = 1;
+ wx->queues_per_pool = 1;
+
+ wx_set_rss_queues(wx);
+}
+
+/**
+ * wx_acquire_msix_vectors - acquire MSI-X vectors
+ * @wx: board private structure
+ *
+ * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
+ * return a negative error code if unable to acquire MSI-X vectors for any
+ * reason.
+ */
+static int wx_acquire_msix_vectors(struct wx *wx)
+{
+ struct irq_affinity affd = {0, };
+ int nvecs, i;
+
+ nvecs = min_t(int, num_online_cpus(), wx->mac.max_msix_vectors);
+
+ wx->msix_entries = kcalloc(nvecs,
+ sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (!wx->msix_entries)
+ return -ENOMEM;
+
+ nvecs = pci_alloc_irq_vectors_affinity(wx->pdev, nvecs,
+ nvecs,
+ PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
+ &affd);
+ if (nvecs < 0) {
+ wx_err(wx, "Failed to allocate MSI-X interrupts. Err: %d\n", nvecs);
+ kfree(wx->msix_entries);
+ wx->msix_entries = NULL;
+ return nvecs;
+ }
+
+ for (i = 0; i < nvecs; i++) {
+ wx->msix_entries[i].entry = i;
+ wx->msix_entries[i].vector = pci_irq_vector(wx->pdev, i);
+ }
+
+ /* one for msix_other */
+ nvecs -= 1;
+ wx->num_q_vectors = nvecs;
+ wx->num_rx_queues = nvecs;
+ wx->num_tx_queues = nvecs;
+
+ return 0;
+}
+
+/**
+ * wx_set_interrupt_capability - set MSI-X or MSI if supported
+ * @wx: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int wx_set_interrupt_capability(struct wx *wx)
+{
+ struct pci_dev *pdev = wx->pdev;
+ int nvecs, ret;
+
+ /* We will try to get MSI-X interrupts first */
+ ret = wx_acquire_msix_vectors(wx);
+ if (ret == 0 || (ret == -ENOMEM))
+ return ret;
+
+ wx->num_rx_queues = 1;
+ wx->num_tx_queues = 1;
+ wx->num_q_vectors = 1;
+
+ /* minmum one for queue, one for misc*/
+ nvecs = 1;
+ nvecs = pci_alloc_irq_vectors(pdev, nvecs,
+ nvecs, PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+ if (nvecs == 1) {
+ if (pdev->msi_enabled)
+ wx_err(wx, "Fallback to MSI.\n");
+ else
+ wx_err(wx, "Fallback to LEGACY.\n");
+ } else {
+ wx_err(wx, "Failed to allocate MSI/LEGACY interrupts. Error: %d\n", nvecs);
+ return nvecs;
+ }
+
+ pdev->irq = pci_irq_vector(pdev, 0);
+
+ return 0;
+}
+
+/**
+ * wx_cache_ring_rss - Descriptor ring to register mapping for RSS
+ * @wx: board private structure to initialize
+ *
+ * Cache the descriptor ring offsets for RSS, ATR, FCoE, and SR-IOV.
+ *
+ **/
+static void wx_cache_ring_rss(struct wx *wx)
+{
+ u16 i;
+
+ for (i = 0; i < wx->num_rx_queues; i++)
+ wx->rx_ring[i]->reg_idx = i;
+
+ for (i = 0; i < wx->num_tx_queues; i++)
+ wx->tx_ring[i]->reg_idx = i;
+}
+
+static void wx_add_ring(struct wx_ring *ring, struct wx_ring_container *head)
+{
+ ring->next = head->ring;
+ head->ring = ring;
+ head->count++;
+}
+
+/**
+ * wx_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @wx: board private structure to initialize
+ * @v_count: q_vectors allocated on wx, used for ring interleaving
+ * @v_idx: index of vector in wx struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ **/
+static int wx_alloc_q_vector(struct wx *wx,
+ unsigned int v_count, unsigned int v_idx,
+ unsigned int txr_count, unsigned int txr_idx,
+ unsigned int rxr_count, unsigned int rxr_idx)
+{
+ struct wx_q_vector *q_vector;
+ int ring_count, default_itr;
+ struct wx_ring *ring;
+
+ /* note this will allocate space for the ring structure as well! */
+ ring_count = txr_count + rxr_count;
+
+ q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+ GFP_KERNEL);
+ if (!q_vector)
+ return -ENOMEM;
+
+ /* initialize NAPI */
+ netif_napi_add(wx->netdev, &q_vector->napi,
+ wx_poll);
+
+ /* tie q_vector and wx together */
+ wx->q_vector[v_idx] = q_vector;
+ q_vector->wx = wx;
+ q_vector->v_idx = v_idx;
+ if (cpu_online(v_idx))
+ q_vector->numa_node = cpu_to_node(v_idx);
+
+ /* initialize pointer to rings */
+ ring = q_vector->ring;
+
+ if (wx->mac.type == wx_mac_sp)
+ default_itr = WX_12K_ITR;
+ else
+ default_itr = WX_7K_ITR;
+ /* initialize ITR */
+ if (txr_count && !rxr_count)
+ /* tx only vector */
+ q_vector->itr = wx->tx_itr_setting ?
+ default_itr : wx->tx_itr_setting;
+ else
+ /* rx or rx/tx vector */
+ q_vector->itr = wx->rx_itr_setting ?
+ default_itr : wx->rx_itr_setting;
+
+ while (txr_count) {
+ /* assign generic ring traits */
+ ring->dev = &wx->pdev->dev;
+ ring->netdev = wx->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Tx values */
+ wx_add_ring(ring, &q_vector->tx);
+
+ /* apply Tx specific ring traits */
+ ring->count = wx->tx_ring_count;
+
+ ring->queue_index = txr_idx;
+
+ /* assign ring to wx */
+ wx->tx_ring[txr_idx] = ring;
+
+ /* update count and index */
+ txr_count--;
+ txr_idx += v_count;
+
+ /* push pointer to next ring */
+ ring++;
+ }
+
+ while (rxr_count) {
+ /* assign generic ring traits */
+ ring->dev = &wx->pdev->dev;
+ ring->netdev = wx->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Rx values */
+ wx_add_ring(ring, &q_vector->rx);
+
+ /* apply Rx specific ring traits */
+ ring->count = wx->rx_ring_count;
+ ring->queue_index = rxr_idx;
+
+ /* assign ring to wx */
+ wx->rx_ring[rxr_idx] = ring;
+
+ /* update count and index */
+ rxr_count--;
+ rxr_idx += v_count;
+
+ /* push pointer to next ring */
+ ring++;
+ }
+
+ return 0;
+}
+
+/**
+ * wx_free_q_vector - Free memory allocated for specific interrupt vector
+ * @wx: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void wx_free_q_vector(struct wx *wx, int v_idx)
+{
+ struct wx_q_vector *q_vector = wx->q_vector[v_idx];
+ struct wx_ring *ring;
+
+ wx_for_each_ring(ring, q_vector->tx)
+ wx->tx_ring[ring->queue_index] = NULL;
+
+ wx_for_each_ring(ring, q_vector->rx)
+ wx->rx_ring[ring->queue_index] = NULL;
+
+ wx->q_vector[v_idx] = NULL;
+ netif_napi_del(&q_vector->napi);
+ kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * wx_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @wx: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ **/
+static int wx_alloc_q_vectors(struct wx *wx)
+{
+ unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+ unsigned int rxr_remaining = wx->num_rx_queues;
+ unsigned int txr_remaining = wx->num_tx_queues;
+ unsigned int q_vectors = wx->num_q_vectors;
+ int rqpv, tqpv;
+ int err;
+
+ for (; v_idx < q_vectors; v_idx++) {
+ rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+ tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+ err = wx_alloc_q_vector(wx, q_vectors, v_idx,
+ tqpv, txr_idx,
+ rqpv, rxr_idx);
+
+ if (err)
+ goto err_out;
+
+ /* update counts and index */
+ rxr_remaining -= rqpv;
+ txr_remaining -= tqpv;
+ rxr_idx++;
+ txr_idx++;
+ }
+
+ return 0;
+
+err_out:
+ wx->num_tx_queues = 0;
+ wx->num_rx_queues = 0;
+ wx->num_q_vectors = 0;
+
+ while (v_idx--)
+ wx_free_q_vector(wx, v_idx);
+
+ return -ENOMEM;
+}
+
+/**
+ * wx_free_q_vectors - Free memory allocated for interrupt vectors
+ * @wx: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void wx_free_q_vectors(struct wx *wx)
+{
+ int v_idx = wx->num_q_vectors;
+
+ wx->num_tx_queues = 0;
+ wx->num_rx_queues = 0;
+ wx->num_q_vectors = 0;
+
+ while (v_idx--)
+ wx_free_q_vector(wx, v_idx);
+}
+
+void wx_reset_interrupt_capability(struct wx *wx)
+{
+ struct pci_dev *pdev = wx->pdev;
+
+ if (!pdev->msi_enabled && !pdev->msix_enabled)
+ return;
+
+ pci_free_irq_vectors(wx->pdev);
+ if (pdev->msix_enabled) {
+ kfree(wx->msix_entries);
+ wx->msix_entries = NULL;
+ }
+}
+EXPORT_SYMBOL(wx_reset_interrupt_capability);
+
+/**
+ * wx_clear_interrupt_scheme - Clear the current interrupt scheme settings
+ * @wx: board private structure to clear interrupt scheme on
+ *
+ * We go through and clear interrupt specific resources and reset the structure
+ * to pre-load conditions
+ **/
+void wx_clear_interrupt_scheme(struct wx *wx)
+{
+ wx_free_q_vectors(wx);
+ wx_reset_interrupt_capability(wx);
+}
+EXPORT_SYMBOL(wx_clear_interrupt_scheme);
+
+int wx_init_interrupt_scheme(struct wx *wx)
+{
+ int ret;
+
+ /* Number of supported queues */
+ wx_set_num_queues(wx);
+
+ /* Set interrupt mode */
+ ret = wx_set_interrupt_capability(wx);
+ if (ret) {
+ wx_err(wx, "Allocate irq vectors for failed.\n");
+ return ret;
+ }
+
+ /* Allocate memory for queues */
+ ret = wx_alloc_q_vectors(wx);
+ if (ret) {
+ wx_err(wx, "Unable to allocate memory for queue vectors.\n");
+ wx_reset_interrupt_capability(wx);
+ return ret;
+ }
+
+ wx_cache_ring_rss(wx);
+
+ return 0;
+}
+EXPORT_SYMBOL(wx_init_interrupt_scheme);
+
+irqreturn_t wx_msix_clean_rings(int __always_unused irq, void *data)
+{
+ struct wx_q_vector *q_vector = data;
+
+ /* EIAM disabled interrupts (on this vector) for us */
+ if (q_vector->rx.ring || q_vector->tx.ring)
+ napi_schedule_irqoff(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+EXPORT_SYMBOL(wx_msix_clean_rings);
+
+void wx_free_irq(struct wx *wx)
+{
+ struct pci_dev *pdev = wx->pdev;
+ int vector;
+
+ if (!(pdev->msix_enabled)) {
+ free_irq(pdev->irq, wx);
+ return;
+ }
+
+ for (vector = 0; vector < wx->num_q_vectors; vector++) {
+ struct wx_q_vector *q_vector = wx->q_vector[vector];
+ struct msix_entry *entry = &wx->msix_entries[vector];
+
+ /* free only the irqs that were actually requested */
+ if (!q_vector->rx.ring && !q_vector->tx.ring)
+ continue;
+
+ free_irq(entry->vector, q_vector);
+ }
+
+ free_irq(wx->msix_entries[vector].vector, wx);
+}
+EXPORT_SYMBOL(wx_free_irq);
+
+/**
+ * wx_setup_isb_resources - allocate interrupt status resources
+ * @wx: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+int wx_setup_isb_resources(struct wx *wx)
+{
+ struct pci_dev *pdev = wx->pdev;
+
+ wx->isb_mem = dma_alloc_coherent(&pdev->dev,
+ sizeof(u32) * 4,
+ &wx->isb_dma,
+ GFP_KERNEL);
+ if (!wx->isb_mem) {
+ wx_err(wx, "Alloc isb_mem failed\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(wx_setup_isb_resources);
+
+/**
+ * wx_free_isb_resources - allocate all queues Rx resources
+ * @wx: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+void wx_free_isb_resources(struct wx *wx)
+{
+ struct pci_dev *pdev = wx->pdev;
+
+ dma_free_coherent(&pdev->dev, sizeof(u32) * 4,
+ wx->isb_mem, wx->isb_dma);
+ wx->isb_mem = NULL;
+}
+EXPORT_SYMBOL(wx_free_isb_resources);
+
+u32 wx_misc_isb(struct wx *wx, enum wx_isb_idx idx)
+{
+ u32 cur_tag = 0;
+
+ cur_tag = wx->isb_mem[WX_ISB_HEADER];
+ wx->isb_tag[idx] = cur_tag;
+
+ return (__force u32)cpu_to_le32(wx->isb_mem[idx]);
+}
+EXPORT_SYMBOL(wx_misc_isb);
+
+/**
+ * wx_set_ivar - set the IVAR registers, mapping interrupt causes to vectors
+ * @wx: pointer to wx struct
+ * @direction: 0 for Rx, 1 for Tx, -1 for other causes
+ * @queue: queue to map the corresponding interrupt to
+ * @msix_vector: the vector to map to the corresponding queue
+ *
+ **/
+static void wx_set_ivar(struct wx *wx, s8 direction,
+ u16 queue, u16 msix_vector)
+{
+ u32 ivar, index;
+
+ if (direction == -1) {
+ /* other causes */
+ msix_vector |= WX_PX_IVAR_ALLOC_VAL;
+ index = 0;
+ ivar = rd32(wx, WX_PX_MISC_IVAR);
+ ivar &= ~(0xFF << index);
+ ivar |= (msix_vector << index);
+ wr32(wx, WX_PX_MISC_IVAR, ivar);
+ } else {
+ /* tx or rx causes */
+ msix_vector |= WX_PX_IVAR_ALLOC_VAL;
+ index = ((16 * (queue & 1)) + (8 * direction));
+ ivar = rd32(wx, WX_PX_IVAR(queue >> 1));
+ ivar &= ~(0xFF << index);
+ ivar |= (msix_vector << index);
+ wr32(wx, WX_PX_IVAR(queue >> 1), ivar);
+ }
+}
+
+/**
+ * wx_write_eitr - write EITR register in hardware specific way
+ * @q_vector: structure containing interrupt and ring information
+ *
+ * This function is made to be called by ethtool and by the driver
+ * when it needs to update EITR registers at runtime. Hardware
+ * specific quirks/differences are taken care of here.
+ */
+static void wx_write_eitr(struct wx_q_vector *q_vector)
+{
+ struct wx *wx = q_vector->wx;
+ int v_idx = q_vector->v_idx;
+ u32 itr_reg;
+
+ if (wx->mac.type == wx_mac_sp)
+ itr_reg = q_vector->itr & WX_SP_MAX_EITR;
+ else
+ itr_reg = q_vector->itr & WX_EM_MAX_EITR;
+
+ itr_reg |= WX_PX_ITR_CNT_WDIS;
+
+ wr32(wx, WX_PX_ITR(v_idx), itr_reg);
+}
+
+/**
+ * wx_configure_vectors - Configure vectors for hardware
+ * @wx: board private structure
+ *
+ * wx_configure_vectors sets up the hardware to properly generate MSI-X/MSI/LEGACY
+ * interrupts.
+ **/
+void wx_configure_vectors(struct wx *wx)
+{
+ struct pci_dev *pdev = wx->pdev;
+ u32 eitrsel = 0;
+ u16 v_idx;
+
+ if (pdev->msix_enabled) {
+ /* Populate MSIX to EITR Select */
+ wr32(wx, WX_PX_ITRSEL, eitrsel);
+ /* use EIAM to auto-mask when MSI-X interrupt is asserted
+ * this saves a register write for every interrupt
+ */
+ wr32(wx, WX_PX_GPIE, WX_PX_GPIE_MODEL);
+ } else {
+ /* legacy interrupts, use EIAM to auto-mask when reading EICR,
+ * specifically only auto mask tx and rx interrupts.
+ */
+ wr32(wx, WX_PX_GPIE, 0);
+ }
+
+ /* Populate the IVAR table and set the ITR values to the
+ * corresponding register.
+ */
+ for (v_idx = 0; v_idx < wx->num_q_vectors; v_idx++) {
+ struct wx_q_vector *q_vector = wx->q_vector[v_idx];
+ struct wx_ring *ring;
+
+ wx_for_each_ring(ring, q_vector->rx)
+ wx_set_ivar(wx, 0, ring->reg_idx, v_idx);
+
+ wx_for_each_ring(ring, q_vector->tx)
+ wx_set_ivar(wx, 1, ring->reg_idx, v_idx);
+
+ wx_write_eitr(q_vector);
+ }
+
+ wx_set_ivar(wx, -1, 0, v_idx);
+ if (pdev->msix_enabled)
+ wr32(wx, WX_PX_ITR(v_idx), 1950);
+}
+EXPORT_SYMBOL(wx_configure_vectors);
+
+/**
+ * wx_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ **/
+static void wx_clean_rx_ring(struct wx_ring *rx_ring)
+{
+ struct wx_rx_buffer *rx_buffer;
+ u16 i = rx_ring->next_to_clean;
+
+ rx_buffer = &rx_ring->rx_buffer_info[i];
+
+ /* Free all the Rx ring sk_buffs */
+ while (i != rx_ring->next_to_alloc) {
+ if (rx_buffer->skb) {
+ struct sk_buff *skb = rx_buffer->skb;
+
+ if (WX_CB(skb)->page_released)
+ page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
+
+ dev_kfree_skb(skb);
+ }
+
+ /* Invalidate cache lines that may have been written to by
+ * device so that we avoid corrupting memory.
+ */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ WX_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+
+ /* free resources associated with mapping */
+ page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
+ __page_frag_cache_drain(rx_buffer->page,
+ rx_buffer->pagecnt_bias);
+
+ i++;
+ rx_buffer++;
+ if (i == rx_ring->count) {
+ i = 0;
+ rx_buffer = rx_ring->rx_buffer_info;
+ }
+ }
+
+ rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+}
+
+/**
+ * wx_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @wx: board private structure
+ **/
+void wx_clean_all_rx_rings(struct wx *wx)
+{
+ int i;
+
+ for (i = 0; i < wx->num_rx_queues; i++)
+ wx_clean_rx_ring(wx->rx_ring[i]);
+}
+EXPORT_SYMBOL(wx_clean_all_rx_rings);
+
+/**
+ * wx_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+static void wx_free_rx_resources(struct wx_ring *rx_ring)
+{
+ wx_clean_rx_ring(rx_ring);
+ kvfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!rx_ring->desc)
+ return;
+
+ dma_free_coherent(rx_ring->dev, rx_ring->size,
+ rx_ring->desc, rx_ring->dma);
+
+ rx_ring->desc = NULL;
+
+ if (rx_ring->page_pool) {
+ page_pool_destroy(rx_ring->page_pool);
+ rx_ring->page_pool = NULL;
+ }
+}
+
+/**
+ * wx_free_all_rx_resources - Free Rx Resources for All Queues
+ * @wx: pointer to hardware structure
+ *
+ * Free all receive software resources
+ **/
+static void wx_free_all_rx_resources(struct wx *wx)
+{
+ int i;
+
+ for (i = 0; i < wx->num_rx_queues; i++)
+ wx_free_rx_resources(wx->rx_ring[i]);
+}
+
+/**
+ * wx_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void wx_clean_tx_ring(struct wx_ring *tx_ring)
+{
+ struct wx_tx_buffer *tx_buffer;
+ u16 i = tx_ring->next_to_clean;
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+
+ while (i != tx_ring->next_to_use) {
+ union wx_tx_desc *eop_desc, *tx_desc;
+
+ /* Free all the Tx ring sk_buffs */
+ dev_kfree_skb_any(tx_buffer->skb);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+
+ /* check for eop_desc to determine the end of the packet */
+ eop_desc = tx_buffer->next_to_watch;
+ tx_desc = WX_TX_DESC(tx_ring, i);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = WX_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ }
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ }
+ }
+
+ netdev_tx_reset_queue(wx_txring_txq(tx_ring));
+
+ /* reset next_to_use and next_to_clean */
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+}
+
+/**
+ * wx_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @wx: board private structure
+ **/
+void wx_clean_all_tx_rings(struct wx *wx)
+{
+ int i;
+
+ for (i = 0; i < wx->num_tx_queues; i++)
+ wx_clean_tx_ring(wx->tx_ring[i]);
+}
+EXPORT_SYMBOL(wx_clean_all_tx_rings);
+
+/**
+ * wx_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+static void wx_free_tx_resources(struct wx_ring *tx_ring)
+{
+ wx_clean_tx_ring(tx_ring);
+ kvfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!tx_ring->desc)
+ return;
+
+ dma_free_coherent(tx_ring->dev, tx_ring->size,
+ tx_ring->desc, tx_ring->dma);
+ tx_ring->desc = NULL;
+}
+
+/**
+ * wx_free_all_tx_resources - Free Tx Resources for All Queues
+ * @wx: pointer to hardware structure
+ *
+ * Free all transmit software resources
+ **/
+static void wx_free_all_tx_resources(struct wx *wx)
+{
+ int i;
+
+ for (i = 0; i < wx->num_tx_queues; i++)
+ wx_free_tx_resources(wx->tx_ring[i]);
+}
+
+void wx_free_resources(struct wx *wx)
+{
+ wx_free_isb_resources(wx);
+ wx_free_all_rx_resources(wx);
+ wx_free_all_tx_resources(wx);
+}
+EXPORT_SYMBOL(wx_free_resources);
+
+static int wx_alloc_page_pool(struct wx_ring *rx_ring)
+{
+ int ret = 0;
+
+ struct page_pool_params pp_params = {
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+ .order = 0,
+ .pool_size = rx_ring->size,
+ .nid = dev_to_node(rx_ring->dev),
+ .dev = rx_ring->dev,
+ .dma_dir = DMA_FROM_DEVICE,
+ .offset = 0,
+ .max_len = PAGE_SIZE,
+ };
+
+ rx_ring->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rx_ring->page_pool)) {
+ rx_ring->page_pool = NULL;
+ ret = PTR_ERR(rx_ring->page_pool);
+ }
+
+ return ret;
+}
+
+/**
+ * wx_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring: rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int wx_setup_rx_resources(struct wx_ring *rx_ring)
+{
+ struct device *dev = rx_ring->dev;
+ int orig_node = dev_to_node(dev);
+ int numa_node = NUMA_NO_NODE;
+ int size, ret;
+
+ size = sizeof(struct wx_rx_buffer) * rx_ring->count;
+
+ if (rx_ring->q_vector)
+ numa_node = rx_ring->q_vector->numa_node;
+
+ rx_ring->rx_buffer_info = kvmalloc_node(size, GFP_KERNEL, numa_node);
+ if (!rx_ring->rx_buffer_info)
+ rx_ring->rx_buffer_info = kvmalloc(size, GFP_KERNEL);
+ if (!rx_ring->rx_buffer_info)
+ goto err;
+
+ /* Round up to nearest 4K */
+ rx_ring->size = rx_ring->count * sizeof(union wx_rx_desc);
+ rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+ set_dev_node(dev, numa_node);
+ rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+ &rx_ring->dma, GFP_KERNEL);
+ if (!rx_ring->desc) {
+ set_dev_node(dev, orig_node);
+ rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+ &rx_ring->dma, GFP_KERNEL);
+ }
+
+ if (!rx_ring->desc)
+ goto err;
+
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+
+ ret = wx_alloc_page_pool(rx_ring);
+ if (ret < 0) {
+ dev_err(rx_ring->dev, "Page pool creation failed: %d\n", ret);
+ goto err;
+ }
+
+ return 0;
+err:
+ kvfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * wx_setup_all_rx_resources - allocate all queues Rx resources
+ * @wx: pointer to hardware structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not). It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int wx_setup_all_rx_resources(struct wx *wx)
+{
+ int i, err = 0;
+
+ for (i = 0; i < wx->num_rx_queues; i++) {
+ err = wx_setup_rx_resources(wx->rx_ring[i]);
+ if (!err)
+ continue;
+
+ wx_err(wx, "Allocation for Rx Queue %u failed\n", i);
+ goto err_setup_rx;
+ }
+
+ return 0;
+err_setup_rx:
+ /* rewind the index freeing the rings as we go */
+ while (i--)
+ wx_free_rx_resources(wx->rx_ring[i]);
+ return err;
+}
+
+/**
+ * wx_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int wx_setup_tx_resources(struct wx_ring *tx_ring)
+{
+ struct device *dev = tx_ring->dev;
+ int orig_node = dev_to_node(dev);
+ int numa_node = NUMA_NO_NODE;
+ int size;
+
+ size = sizeof(struct wx_tx_buffer) * tx_ring->count;
+
+ if (tx_ring->q_vector)
+ numa_node = tx_ring->q_vector->numa_node;
+
+ tx_ring->tx_buffer_info = kvmalloc_node(size, GFP_KERNEL, numa_node);
+ if (!tx_ring->tx_buffer_info)
+ tx_ring->tx_buffer_info = kvmalloc(size, GFP_KERNEL);
+ if (!tx_ring->tx_buffer_info)
+ goto err;
+
+ /* round up to nearest 4K */
+ tx_ring->size = tx_ring->count * sizeof(union wx_tx_desc);
+ tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+ set_dev_node(dev, numa_node);
+ tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+ &tx_ring->dma, GFP_KERNEL);
+ if (!tx_ring->desc) {
+ set_dev_node(dev, orig_node);
+ tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+ &tx_ring->dma, GFP_KERNEL);
+ }
+
+ if (!tx_ring->desc)
+ goto err;
+
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+
+ return 0;
+
+err:
+ kvfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+ dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * wx_setup_all_tx_resources - allocate all queues Tx resources
+ * @wx: pointer to private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not). It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int wx_setup_all_tx_resources(struct wx *wx)
+{
+ int i, err = 0;
+
+ for (i = 0; i < wx->num_tx_queues; i++) {
+ err = wx_setup_tx_resources(wx->tx_ring[i]);
+ if (!err)
+ continue;
+
+ wx_err(wx, "Allocation for Tx Queue %u failed\n", i);
+ goto err_setup_tx;
+ }
+
+ return 0;
+err_setup_tx:
+ /* rewind the index freeing the rings as we go */
+ while (i--)
+ wx_free_tx_resources(wx->tx_ring[i]);
+ return err;
+}
+
+int wx_setup_resources(struct wx *wx)
+{
+ int err;
+
+ /* allocate transmit descriptors */
+ err = wx_setup_all_tx_resources(wx);
+ if (err)
+ return err;
+
+ /* allocate receive descriptors */
+ err = wx_setup_all_rx_resources(wx);
+ if (err)
+ goto err_free_tx;
+
+ err = wx_setup_isb_resources(wx);
+ if (err)
+ goto err_free_rx;
+
+ return 0;
+
+err_free_rx:
+ wx_free_all_rx_resources(wx);
+err_free_tx:
+ wx_free_all_tx_resources(wx);
+
+ return err;
+}
+EXPORT_SYMBOL(wx_setup_resources);
+
+/**
+ * wx_get_stats64 - Get System Network Statistics
+ * @netdev: network interface device structure
+ * @stats: storage space for 64bit statistics
+ */
+void wx_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct wx *wx = netdev_priv(netdev);
+ int i;
+
+ rcu_read_lock();
+ for (i = 0; i < wx->num_rx_queues; i++) {
+ struct wx_ring *ring = READ_ONCE(wx->rx_ring[i]);
+ u64 bytes, packets;
+ unsigned int start;
+
+ if (ring) {
+ do {
+ start = u64_stats_fetch_begin(&ring->syncp);
+ packets = ring->stats.packets;
+ bytes = ring->stats.bytes;
+ } while (u64_stats_fetch_retry(&ring->syncp, start));
+ stats->rx_packets += packets;
+ stats->rx_bytes += bytes;
+ }
+ }
+
+ for (i = 0; i < wx->num_tx_queues; i++) {
+ struct wx_ring *ring = READ_ONCE(wx->tx_ring[i]);
+ u64 bytes, packets;
+ unsigned int start;
+
+ if (ring) {
+ do {
+ start = u64_stats_fetch_begin(&ring->syncp);
+ packets = ring->stats.packets;
+ bytes = ring->stats.bytes;
+ } while (u64_stats_fetch_retry(&ring->syncp,
+ start));
+ stats->tx_packets += packets;
+ stats->tx_bytes += bytes;
+ }
+ }
+
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(wx_get_stats64);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
new file mode 100644
index 000000000000..50ee41f1fa10
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd.
+ */
+
+#ifndef _WX_LIB_H_
+#define _WX_LIB_H_
+
+void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count);
+u16 wx_desc_unused(struct wx_ring *ring);
+netdev_tx_t wx_xmit_frame(struct sk_buff *skb,
+ struct net_device *netdev);
+void wx_napi_enable_all(struct wx *wx);
+void wx_napi_disable_all(struct wx *wx);
+void wx_reset_interrupt_capability(struct wx *wx);
+void wx_clear_interrupt_scheme(struct wx *wx);
+int wx_init_interrupt_scheme(struct wx *wx);
+irqreturn_t wx_msix_clean_rings(int __always_unused irq, void *data);
+void wx_free_irq(struct wx *wx);
+int wx_setup_isb_resources(struct wx *wx);
+void wx_free_isb_resources(struct wx *wx);
+u32 wx_misc_isb(struct wx *wx, enum wx_isb_idx idx);
+void wx_configure_vectors(struct wx *wx);
+void wx_clean_all_rx_rings(struct wx *wx);
+void wx_clean_all_tx_rings(struct wx *wx);
+void wx_free_resources(struct wx *wx);
+int wx_setup_resources(struct wx *wx);
+void wx_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats);
+
+#endif /* _NGBE_LIB_H_ */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index c86a37914d43..eede93d4120d 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -5,6 +5,7 @@
#define _WX_TYPE_H_
#include <linux/bitfield.h>
+#include <linux/netdevice.h>
/* Vendor ID */
#ifndef PCI_VENDOR_ID_WANGXUN
@@ -65,21 +66,50 @@
/* port cfg Registers */
#define WX_CFG_PORT_CTL 0x14400
#define WX_CFG_PORT_CTL_DRV_LOAD BIT(3)
+#define WX_CFG_PORT_CTL_QINQ BIT(2)
+#define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/
+#define WX_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4))
+
+/* GPIO Registers */
+#define WX_GPIO_DR 0x14800
+#define WX_GPIO_DR_0 BIT(0) /* SDP0 Data Value */
+#define WX_GPIO_DR_1 BIT(1) /* SDP1 Data Value */
+#define WX_GPIO_DDR 0x14804
+#define WX_GPIO_DDR_0 BIT(0) /* SDP0 IO direction */
+#define WX_GPIO_DDR_1 BIT(1) /* SDP1 IO direction */
+#define WX_GPIO_CTL 0x14808
+#define WX_GPIO_INTEN 0x14830
+#define WX_GPIO_INTEN_0 BIT(0)
+#define WX_GPIO_INTEN_1 BIT(1)
+#define WX_GPIO_INTMASK 0x14834
+#define WX_GPIO_INTTYPE_LEVEL 0x14838
+#define WX_GPIO_POLARITY 0x1483C
+#define WX_GPIO_EOI 0x1484C
/*********************** Transmit DMA registers **************************/
/* transmit global control */
#define WX_TDM_CTL 0x18000
/* TDM CTL BIT */
#define WX_TDM_CTL_TE BIT(0) /* Transmit Enable */
+#define WX_TDM_PB_THRE(_i) (0x18020 + ((_i) * 4))
/***************************** RDB registers *********************************/
/* receive packet buffer */
#define WX_RDB_PB_CTL 0x19000
#define WX_RDB_PB_CTL_RXEN BIT(31) /* Enable Receiver */
#define WX_RDB_PB_CTL_DISABLED BIT(0)
+#define WX_RDB_PB_SZ(_i) (0x19020 + ((_i) * 4))
+#define WX_RDB_PB_SZ_SHIFT 10
/* statistic */
#define WX_RDB_PFCMACDAL 0x19210
#define WX_RDB_PFCMACDAH 0x19214
+/* ring assignment */
+#define WX_RDB_PL_CFG(_i) (0x19300 + ((_i) * 4))
+#define WX_RDB_PL_CFG_L4HDR BIT(1)
+#define WX_RDB_PL_CFG_L3HDR BIT(2)
+#define WX_RDB_PL_CFG_L2HDR BIT(3)
+#define WX_RDB_PL_CFG_TUN_TUNHDR BIT(4)
+#define WX_RDB_PL_CFG_TUN_OUTL2HDR BIT(5)
/******************************* PSR Registers *******************************/
/* psr control */
@@ -97,10 +127,24 @@
#define WX_PSR_CTL_MO_SHIFT 5
#define WX_PSR_CTL_MO (0x3 << WX_PSR_CTL_MO_SHIFT)
#define WX_PSR_CTL_TPE BIT(4)
+#define WX_PSR_MAX_SZ 0x15020
+#define WX_PSR_VLAN_CTL 0x15088
+#define WX_PSR_VLAN_CTL_CFIEN BIT(29) /* bit 29 */
+#define WX_PSR_VLAN_CTL_VFE BIT(30) /* bit 30 */
/* mcasst/ucast overflow tbl */
#define WX_PSR_MC_TBL(_i) (0x15200 + ((_i) * 4))
#define WX_PSR_UC_TBL(_i) (0x15400 + ((_i) * 4))
+/* VM L2 contorl */
+#define WX_PSR_VM_L2CTL(_i) (0x15600 + ((_i) * 4))
+#define WX_PSR_VM_L2CTL_UPE BIT(4) /* unicast promiscuous */
+#define WX_PSR_VM_L2CTL_VACC BIT(6) /* accept nomatched vlan */
+#define WX_PSR_VM_L2CTL_AUPE BIT(8) /* accept untagged packets */
+#define WX_PSR_VM_L2CTL_ROMPE BIT(9) /* accept packets in MTA tbl */
+#define WX_PSR_VM_L2CTL_ROPE BIT(10) /* accept packets in UC tbl */
+#define WX_PSR_VM_L2CTL_BAM BIT(11) /* accept broadcast packets */
+#define WX_PSR_VM_L2CTL_MPE BIT(12) /* multicast promiscuous */
+
/* Management */
#define WX_PSR_MNG_FLEX_SEL 0x1582C
#define WX_PSR_MNG_FLEX_DW_L(_i) (0x15A00 + ((_i) * 16))
@@ -122,6 +166,27 @@
#define WX_PSR_MAC_SWC_IDX 0x16210
#define WX_CLEAR_VMDQ_ALL 0xFFFFFFFFU
+/********************************* RSEC **************************************/
+/* general rsec */
+#define WX_RSC_CTL 0x17000
+#define WX_RSC_CTL_SAVE_MAC_ERR BIT(6)
+#define WX_RSC_CTL_CRC_STRIP BIT(2)
+#define WX_RSC_CTL_RX_DIS BIT(1)
+#define WX_RSC_ST 0x17004
+#define WX_RSC_ST_RSEC_RDY BIT(0)
+
+/****************************** TDB ******************************************/
+#define WX_TDB_PB_SZ(_i) (0x1CC00 + ((_i) * 4))
+#define WX_TXPKT_SIZE_MAX 0xA /* Max Tx Packet size */
+
+/****************************** TSEC *****************************************/
+/* Security Control Registers */
+#define WX_TSC_CTL 0x1D000
+#define WX_TSC_CTL_TX_DIS BIT(1)
+#define WX_TSC_CTL_TSEC_DIS BIT(0)
+#define WX_TSC_BUF_AE 0x1D00C
+#define WX_TSC_BUF_AE_THR GENMASK(9, 0)
+
/************************************** MNG ********************************/
#define WX_MNG_SWFW_SYNC 0x1E008
#define WX_MNG_SWFW_SYNC_SW_MB BIT(2)
@@ -135,6 +200,7 @@
#define WX_MAC_TX_CFG 0x11000
#define WX_MAC_TX_CFG_TE BIT(0)
#define WX_MAC_TX_CFG_SPEED_MASK GENMASK(30, 29)
+#define WX_MAC_TX_CFG_SPEED_10G FIELD_PREP(WX_MAC_TX_CFG_SPEED_MASK, 0)
#define WX_MAC_TX_CFG_SPEED_1G FIELD_PREP(WX_MAC_TX_CFG_SPEED_MASK, 3)
#define WX_MAC_RX_CFG 0x11004
#define WX_MAC_RX_CFG_RE BIT(0)
@@ -151,10 +217,34 @@
/* Interrupt Registers */
#define WX_BME_CTL 0x12020
#define WX_PX_MISC_IC 0x100
+#define WX_PX_MISC_ICS 0x104
+#define WX_PX_MISC_IEN 0x108
+#define WX_PX_INTA 0x110
+#define WX_PX_GPIE 0x118
+#define WX_PX_GPIE_MODEL BIT(0)
+#define WX_PX_IC 0x120
#define WX_PX_IMS(_i) (0x140 + (_i) * 4)
+#define WX_PX_IMC(_i) (0x150 + (_i) * 4)
+#define WX_PX_ISB_ADDR_L 0x160
+#define WX_PX_ISB_ADDR_H 0x164
#define WX_PX_TRANSACTION_PENDING 0x168
+#define WX_PX_ITRSEL 0x180
+#define WX_PX_ITR(_i) (0x200 + (_i) * 4)
+#define WX_PX_ITR_CNT_WDIS BIT(31)
+#define WX_PX_MISC_IVAR 0x4FC
+#define WX_PX_IVAR(_i) (0x500 + (_i) * 4)
+
+#define WX_PX_IVAR_ALLOC_VAL 0x80 /* Interrupt Allocation valid */
+#define WX_7K_ITR 595
+#define WX_12K_ITR 336
+#define WX_SP_MAX_EITR 0x00000FF8U
+#define WX_EM_MAX_EITR 0x00007FFCU
/* transmit DMA Registers */
+#define WX_PX_TR_BAL(_i) (0x03000 + ((_i) * 0x40))
+#define WX_PX_TR_BAH(_i) (0x03004 + ((_i) * 0x40))
+#define WX_PX_TR_WP(_i) (0x03008 + ((_i) * 0x40))
+#define WX_PX_TR_RP(_i) (0x0300C + ((_i) * 0x40))
#define WX_PX_TR_CFG(_i) (0x03010 + ((_i) * 0x40))
/* Transmit Config masks */
#define WX_PX_TR_CFG_ENABLE BIT(0) /* Ena specific Tx Queue */
@@ -164,8 +254,22 @@
#define WX_PX_TR_CFG_THRE_SHIFT 8
/* Receive DMA Registers */
+#define WX_PX_RR_BAL(_i) (0x01000 + ((_i) * 0x40))
+#define WX_PX_RR_BAH(_i) (0x01004 + ((_i) * 0x40))
+#define WX_PX_RR_WP(_i) (0x01008 + ((_i) * 0x40))
+#define WX_PX_RR_RP(_i) (0x0100C + ((_i) * 0x40))
#define WX_PX_RR_CFG(_i) (0x01010 + ((_i) * 0x40))
/* PX_RR_CFG bit definitions */
+#define WX_PX_RR_CFG_SPLIT_MODE BIT(26)
+#define WX_PX_RR_CFG_RR_THER_SHIFT 16
+#define WX_PX_RR_CFG_RR_HDR_SZ GENMASK(15, 12)
+#define WX_PX_RR_CFG_RR_BUF_SZ GENMASK(11, 8)
+#define WX_PX_RR_CFG_BHDRSIZE_SHIFT 6 /* 64byte resolution (>> 6)
+ * + at bit 8 offset (<< 12)
+ * = (<< 6)
+ */
+#define WX_PX_RR_CFG_BSIZEPKT_SHIFT 2 /* so many KBs */
+#define WX_PX_RR_CFG_RR_SIZE_SHIFT 1
#define WX_PX_RR_CFG_RR_EN BIT(0)
/* Number of 80 microseconds we wait for PCI Express master disable */
@@ -193,8 +297,46 @@
#define WX_MAC_STATE_MODIFIED 0x2
#define WX_MAC_STATE_IN_USE 0x4
+#define WX_MAX_RXD 8192
+#define WX_MAX_TXD 8192
+
+/* Supported Rx Buffer Sizes */
+#define WX_RXBUFFER_256 256 /* Used for skb receive header */
+#define WX_RXBUFFER_2K 2048
+#define WX_MAX_RXBUFFER 16384 /* largest size for single descriptor */
+
+#if MAX_SKB_FRAGS < 8
+#define WX_RX_BUFSZ ALIGN(WX_MAX_RXBUFFER / MAX_SKB_FRAGS, 1024)
+#else
+#define WX_RX_BUFSZ WX_RXBUFFER_2K
+#endif
+
+#define WX_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+
+#define WX_MAX_DATA_PER_TXD BIT(14)
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), WX_MAX_DATA_PER_TXD)
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+
+/* Ether Types */
+#define WX_ETH_P_CNM 0x22E7
+
#define WX_CFG_PORT_ST 0x14404
+/******************* Receive Descriptor bit definitions **********************/
+#define WX_RXD_STAT_DD BIT(0) /* Done */
+#define WX_RXD_STAT_EOP BIT(1) /* End of Packet */
+
+#define WX_RXD_ERR_RXE BIT(29) /* Any MAC Error */
+
+/*********************** Transmit Descriptor Config Masks ****************/
+#define WX_TXD_STAT_DD BIT(0) /* Descriptor Done */
+#define WX_TXD_DTYP_DATA 0 /* Adv Data Descriptor */
+#define WX_TXD_PAYLEN_SHIFT 13 /* Desc PAYLEN shift */
+#define WX_TXD_EOP BIT(24) /* End of Packet */
+#define WX_TXD_IFCS BIT(25) /* Insert FCS */
+#define WX_TXD_RS BIT(27) /* Report Status */
+
/* Host Interface Command Structures */
struct wx_hic_hdr {
u8 cmd;
@@ -270,9 +412,12 @@ struct wx_mac_info {
bool set_lben;
u8 addr[ETH_ALEN];
u8 perm_addr[ETH_ALEN];
+ u32 mta_shadow[128];
s32 mc_filter_type;
u32 mcft_size;
u32 num_rar_entries;
+ u32 rx_pb_size;
+ u32 tx_pb_size;
u32 max_tx_queues;
u32 max_rx_queues;
@@ -312,6 +457,161 @@ enum wx_reset_type {
WX_GLOBAL_RESET
};
+struct wx_cb {
+ dma_addr_t dma;
+ u16 append_cnt; /* number of skb's appended */
+ bool page_released;
+ bool dma_released;
+};
+
+#define WX_CB(skb) ((struct wx_cb *)(skb)->cb)
+
+/* Transmit Descriptor */
+union wx_tx_desc {
+ struct {
+ __le64 buffer_addr; /* Address of descriptor's data buf */
+ __le32 cmd_type_len;
+ __le32 olinfo_status;
+ } read;
+ struct {
+ __le64 rsvd; /* Reserved */
+ __le32 nxtseq_seed;
+ __le32 status;
+ } wb;
+};
+
+/* Receive Descriptor */
+union wx_rx_desc {
+ struct {
+ __le64 pkt_addr; /* Packet buffer address */
+ __le64 hdr_addr; /* Header buffer address */
+ } read;
+ struct {
+ struct {
+ union {
+ __le32 data;
+ struct {
+ __le16 pkt_info; /* RSS, Pkt type */
+ __le16 hdr_info; /* Splithdr, hdrlen */
+ } hs_rss;
+ } lo_dword;
+ union {
+ __le32 rss; /* RSS Hash */
+ struct {
+ __le16 ip_id; /* IP id */
+ __le16 csum; /* Packet Checksum */
+ } csum_ip;
+ } hi_dword;
+ } lower;
+ struct {
+ __le32 status_error; /* ext status/error */
+ __le16 length; /* Packet length */
+ __le16 vlan; /* VLAN tag */
+ } upper;
+ } wb; /* writeback */
+};
+
+#define WX_RX_DESC(R, i) \
+ (&(((union wx_rx_desc *)((R)->desc))[i]))
+#define WX_TX_DESC(R, i) \
+ (&(((union wx_tx_desc *)((R)->desc))[i]))
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct wx_tx_buffer {
+ union wx_tx_desc *next_to_watch;
+ struct sk_buff *skb;
+ unsigned int bytecount;
+ unsigned short gso_segs;
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+};
+
+struct wx_rx_buffer {
+ struct sk_buff *skb;
+ dma_addr_t dma;
+ dma_addr_t page_dma;
+ struct page *page;
+ unsigned int page_offset;
+ u16 pagecnt_bias;
+};
+
+struct wx_queue_stats {
+ u64 packets;
+ u64 bytes;
+};
+
+/* iterator for handling rings in ring container */
+#define wx_for_each_ring(posm, headm) \
+ for (posm = (headm).ring; posm; posm = posm->next)
+
+struct wx_ring_container {
+ struct wx_ring *ring; /* pointer to linked list of rings */
+ unsigned int total_bytes; /* total bytes processed this int */
+ unsigned int total_packets; /* total packets processed this int */
+ u8 count; /* total number of rings in vector */
+ u8 itr; /* current ITR setting for ring */
+};
+
+struct wx_ring {
+ struct wx_ring *next; /* pointer to next ring in q_vector */
+ struct wx_q_vector *q_vector; /* backpointer to host q_vector */
+ struct net_device *netdev; /* netdev ring belongs to */
+ struct device *dev; /* device for DMA mapping */
+ struct page_pool *page_pool;
+ void *desc; /* descriptor ring memory */
+ union {
+ struct wx_tx_buffer *tx_buffer_info;
+ struct wx_rx_buffer *rx_buffer_info;
+ };
+ u8 __iomem *tail;
+ dma_addr_t dma; /* phys. address of descriptor ring */
+ unsigned int size; /* length in bytes */
+
+ u16 count; /* amount of descriptors */
+
+ u8 queue_index; /* needed for multiqueue queue management */
+ u8 reg_idx; /* holds the special value that gets
+ * the hardware register offset
+ * associated with this ring, which is
+ * different for DCB and RSS modes
+ */
+ u16 next_to_use;
+ u16 next_to_clean;
+ u16 next_to_alloc;
+
+ struct wx_queue_stats stats;
+ struct u64_stats_sync syncp;
+} ____cacheline_internodealigned_in_smp;
+
+struct wx_q_vector {
+ struct wx *wx;
+ int cpu; /* CPU for DCA */
+ int numa_node;
+ u16 v_idx; /* index of q_vector within array, also used for
+ * finding the bit in EICR and friends that
+ * represents the vector for this ring
+ */
+ u16 itr; /* Interrupt throttle rate written to EITR */
+ struct wx_ring_container rx, tx;
+ struct napi_struct napi;
+ struct rcu_head rcu; /* to avoid race with update stats on free */
+
+ char name[IFNAMSIZ + 17];
+
+ /* for dynamic allocation of rings associated with this q_vector */
+ struct wx_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+enum wx_isb_idx {
+ WX_ISB_HEADER,
+ WX_ISB_MISC,
+ WX_ISB_VEC0,
+ WX_ISB_VEC1,
+ WX_ISB_MAX
+};
+
struct wx {
u8 __iomem *hw_addr;
struct pci_dev *pdev;
@@ -331,6 +631,7 @@ struct wx {
u16 oem_svid;
u16 msg_enable;
bool adapter_stopped;
+ u16 tpid[8];
char eeprom_id[32];
enum wx_reset_type reset_type;
@@ -360,6 +661,18 @@ struct wx {
u32 tx_ring_count;
u32 rx_ring_count;
+ struct wx_ring *tx_ring[64] ____cacheline_aligned_in_smp;
+ struct wx_ring *rx_ring[64];
+ struct wx_q_vector *q_vector[64];
+
+ unsigned int queues_per_pool;
+ struct msix_entry *msix_entries;
+
+ /* misc interrupt status block */
+ dma_addr_t isb_dma;
+ u32 *isb_mem;
+ u32 isb_tag[WX_ISB_MAX];
+
#define WX_MAX_RETA_ENTRIES 128
u8 rss_indir_tbl[WX_MAX_RETA_ENTRIES];
@@ -371,6 +684,7 @@ struct wx {
};
#define WX_INTR_ALL (~0ULL)
+#define WX_INTR_Q(i) BIT(i)
/* register operations */
#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index ed52f80b5475..f94d415daf3c 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -13,6 +13,7 @@
#include "../libwx/wx_type.h"
#include "../libwx/wx_hw.h"
+#include "../libwx/wx_lib.h"
#include "ngbe_type.h"
#include "ngbe_mdio.h"
#include "ngbe_hw.h"
@@ -112,6 +113,9 @@ static int ngbe_sw_init(struct wx *wx)
wx->mac.num_rar_entries = NGBE_RAR_ENTRIES;
wx->mac.max_rx_queues = NGBE_MAX_RX_QUEUES;
wx->mac.max_tx_queues = NGBE_MAX_TX_QUEUES;
+ wx->mac.mcft_size = NGBE_MC_TBL_SIZE;
+ wx->mac.rx_pb_size = NGBE_RX_PB_SIZE;
+ wx->mac.tx_pb_size = NGBE_TDB_PB_SZ;
/* PCI config space info */
err = wx_sw_init(wx);
@@ -148,27 +152,211 @@ static int ngbe_sw_init(struct wx *wx)
return 0;
}
+/**
+ * ngbe_irq_enable - Enable default interrupt generation settings
+ * @wx: board private structure
+ * @queues: enable all queues interrupts
+ **/
+static void ngbe_irq_enable(struct wx *wx, bool queues)
+{
+ u32 mask;
+
+ /* enable misc interrupt */
+ mask = NGBE_PX_MISC_IEN_MASK;
+
+ wr32(wx, WX_GPIO_DDR, WX_GPIO_DDR_0);
+ wr32(wx, WX_GPIO_INTEN, WX_GPIO_INTEN_0 | WX_GPIO_INTEN_1);
+ wr32(wx, WX_GPIO_INTTYPE_LEVEL, 0x0);
+ wr32(wx, WX_GPIO_POLARITY, wx->gpio_ctrl ? 0 : 0x3);
+
+ wr32(wx, WX_PX_MISC_IEN, mask);
+
+ /* mask interrupt */
+ if (queues)
+ wx_intr_enable(wx, NGBE_INTR_ALL);
+ else
+ wx_intr_enable(wx, NGBE_INTR_MISC(wx));
+}
+
+/**
+ * ngbe_intr - msi/legacy mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t ngbe_intr(int __always_unused irq, void *data)
+{
+ struct wx_q_vector *q_vector;
+ struct wx *wx = data;
+ struct pci_dev *pdev;
+ u32 eicr;
+
+ q_vector = wx->q_vector[0];
+ pdev = wx->pdev;
+
+ eicr = wx_misc_isb(wx, WX_ISB_VEC0);
+ if (!eicr) {
+ /* shared interrupt alert!
+ * the interrupt that we masked before the EICR read.
+ */
+ if (netif_running(wx->netdev))
+ ngbe_irq_enable(wx, true);
+ return IRQ_NONE; /* Not our interrupt */
+ }
+ wx->isb_mem[WX_ISB_VEC0] = 0;
+ if (!(pdev->msi_enabled))
+ wr32(wx, WX_PX_INTA, 1);
+
+ wx->isb_mem[WX_ISB_MISC] = 0;
+ /* would disable interrupts here but it is auto disabled */
+ napi_schedule_irqoff(&q_vector->napi);
+
+ if (netif_running(wx->netdev))
+ ngbe_irq_enable(wx, false);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data)
+{
+ struct wx *wx = data;
+
+ /* re-enable the original interrupt state, no lsc, no queues */
+ if (netif_running(wx->netdev))
+ ngbe_irq_enable(wx, false);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ngbe_request_msix_irqs - Initialize MSI-X interrupts
+ * @wx: board private structure
+ *
+ * ngbe_request_msix_irqs allocates MSI-X vectors and requests
+ * interrupts from the kernel.
+ **/
+static int ngbe_request_msix_irqs(struct wx *wx)
+{
+ struct net_device *netdev = wx->netdev;
+ int vector, err;
+
+ for (vector = 0; vector < wx->num_q_vectors; vector++) {
+ struct wx_q_vector *q_vector = wx->q_vector[vector];
+ struct msix_entry *entry = &wx->msix_entries[vector];
+
+ if (q_vector->tx.ring && q_vector->rx.ring)
+ snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+ "%s-TxRx-%d", netdev->name, entry->entry);
+ else
+ /* skip this unused q_vector */
+ continue;
+
+ err = request_irq(entry->vector, wx_msix_clean_rings, 0,
+ q_vector->name, q_vector);
+ if (err) {
+ wx_err(wx, "request_irq failed for MSIX interrupt %s Error: %d\n",
+ q_vector->name, err);
+ goto free_queue_irqs;
+ }
+ }
+
+ err = request_irq(wx->msix_entries[vector].vector,
+ ngbe_msix_other, 0, netdev->name, wx);
+
+ if (err) {
+ wx_err(wx, "request_irq for msix_other failed: %d\n", err);
+ goto free_queue_irqs;
+ }
+
+ return 0;
+
+free_queue_irqs:
+ while (vector) {
+ vector--;
+ free_irq(wx->msix_entries[vector].vector,
+ wx->q_vector[vector]);
+ }
+ wx_reset_interrupt_capability(wx);
+ return err;
+}
+
+/**
+ * ngbe_request_irq - initialize interrupts
+ * @wx: board private structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int ngbe_request_irq(struct wx *wx)
+{
+ struct net_device *netdev = wx->netdev;
+ struct pci_dev *pdev = wx->pdev;
+ int err;
+
+ if (pdev->msix_enabled)
+ err = ngbe_request_msix_irqs(wx);
+ else if (pdev->msi_enabled)
+ err = request_irq(pdev->irq, ngbe_intr, 0,
+ netdev->name, wx);
+ else
+ err = request_irq(pdev->irq, ngbe_intr, IRQF_SHARED,
+ netdev->name, wx);
+
+ if (err)
+ wx_err(wx, "request_irq failed, Error %d\n", err);
+
+ return err;
+}
+
static void ngbe_disable_device(struct wx *wx)
{
struct net_device *netdev = wx->netdev;
+ u32 i;
+ /* disable all enabled rx queues */
+ for (i = 0; i < wx->num_rx_queues; i++)
+ /* this call also flushes the previous write */
+ wx_disable_rx_queue(wx, wx->rx_ring[i]);
/* disable receives */
wx_disable_rx(wx);
+ wx_napi_disable_all(wx);
+ netif_tx_stop_all_queues(netdev);
netif_tx_disable(netdev);
if (wx->gpio_ctrl)
ngbe_sfp_modules_txrx_powerctl(wx, false);
+ wx_irq_disable(wx);
+ /* disable transmits in the hardware now that interrupts are off */
+ for (i = 0; i < wx->num_tx_queues; i++) {
+ u8 reg_idx = wx->tx_ring[i]->reg_idx;
+
+ wr32(wx, WX_PX_TR_CFG(reg_idx), WX_PX_TR_CFG_SWFLSH);
+ }
}
static void ngbe_down(struct wx *wx)
{
phy_stop(wx->phydev);
ngbe_disable_device(wx);
+ wx_clean_all_tx_rings(wx);
+ wx_clean_all_rx_rings(wx);
}
static void ngbe_up(struct wx *wx)
{
+ wx_configure_vectors(wx);
+
+ /* make sure to complete pre-operations */
+ smp_mb__before_atomic();
+ wx_napi_enable_all(wx);
+ /* enable transmits */
+ netif_tx_start_all_queues(wx->netdev);
+
+ /* clear any pending interrupts, may auto mask */
+ rd32(wx, WX_PX_IC);
+ rd32(wx, WX_PX_MISC_IC);
+ ngbe_irq_enable(wx, true);
if (wx->gpio_ctrl)
ngbe_sfp_modules_txrx_powerctl(wx, true);
+
phy_start(wx->phydev);
}
@@ -187,12 +375,39 @@ static int ngbe_open(struct net_device *netdev)
int err;
wx_control_hw(wx, true);
- err = ngbe_phy_connect(wx);
+
+ err = wx_setup_resources(wx);
if (err)
return err;
+
+ wx_configure(wx);
+
+ err = ngbe_request_irq(wx);
+ if (err)
+ goto err_free_resources;
+
+ err = ngbe_phy_connect(wx);
+ if (err)
+ goto err_free_irq;
+
+ err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues);
+ if (err)
+ goto err_dis_phy;
+
+ err = netif_set_real_num_rx_queues(netdev, wx->num_rx_queues);
+ if (err)
+ goto err_dis_phy;
+
ngbe_up(wx);
return 0;
+err_dis_phy:
+ phy_disconnect(wx->phydev);
+err_free_irq:
+ wx_free_irq(wx);
+err_free_resources:
+ wx_free_resources(wx);
+ return err;
}
/**
@@ -211,18 +426,14 @@ static int ngbe_close(struct net_device *netdev)
struct wx *wx = netdev_priv(netdev);
ngbe_down(wx);
+ wx_free_irq(wx);
+ wx_free_resources(wx);
phy_disconnect(wx->phydev);
wx_control_hw(wx, false);
return 0;
}
-static netdev_tx_t ngbe_xmit_frame(struct sk_buff *skb,
- struct net_device *netdev)
-{
- return NETDEV_TX_OK;
-}
-
static void ngbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake)
{
struct wx *wx = pci_get_drvdata(pdev);
@@ -258,9 +469,11 @@ static void ngbe_shutdown(struct pci_dev *pdev)
static const struct net_device_ops ngbe_netdev_ops = {
.ndo_open = ngbe_open,
.ndo_stop = ngbe_close,
- .ndo_start_xmit = ngbe_xmit_frame,
+ .ndo_start_xmit = wx_xmit_frame,
+ .ndo_set_rx_mode = wx_set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = wx_set_mac,
+ .ndo_get_stats64 = wx_get_stats64,
};
/**
@@ -336,6 +549,17 @@ static int ngbe_probe(struct pci_dev *pdev,
netdev->netdev_ops = &ngbe_netdev_ops;
netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features = NETIF_F_SG;
+
+ /* copy netdev features into list of user selectable features */
+ netdev->hw_features |= netdev->features |
+ NETIF_F_RXALL;
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+ netdev->priv_flags |= IFF_SUPP_NOFCS;
+
+ netdev->min_mtu = ETH_MIN_MTU;
+ netdev->max_mtu = NGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
wx->bd_number = func_nums;
/* setup the private structure */
@@ -411,10 +635,14 @@ static int ngbe_probe(struct pci_dev *pdev,
eth_hw_addr_set(netdev, wx->mac.perm_addr);
wx_mac_set_default_filter(wx, wx->mac.perm_addr);
+ err = wx_init_interrupt_scheme(wx);
+ if (err)
+ goto err_free_mac_table;
+
/* phy Interface Configuration */
err = ngbe_mdio_init(wx);
if (err)
- goto err_free_mac_table;
+ goto err_clear_interrupt_scheme;
err = register_netdev(netdev);
if (err)
@@ -431,6 +659,8 @@ static int ngbe_probe(struct pci_dev *pdev,
err_register:
wx_control_hw(wx, false);
+err_clear_interrupt_scheme:
+ wx_clear_interrupt_scheme(wx);
err_free_mac_table:
kfree(wx->mac_table);
err_pci_release_regions:
@@ -462,6 +692,7 @@ static void ngbe_remove(struct pci_dev *pdev)
pci_select_bars(pdev, IORESOURCE_MEM));
kfree(wx->mac_table);
+ wx_clear_interrupt_scheme(wx);
pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index fd71260f73de..a2351349785e 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -90,6 +90,20 @@ enum NGBE_MSCA_CMD_value {
#define NGBE_GPIO_DDR_0 BIT(0) /* SDP0 IO direction */
#define NGBE_GPIO_DDR_1 BIT(1) /* SDP1 IO direction */
+/* Extended Interrupt Enable Set */
+#define NGBE_PX_MISC_IEN_DEV_RST BIT(10)
+#define NGBE_PX_MISC_IEN_ETH_LK BIT(18)
+#define NGBE_PX_MISC_IEN_INT_ERR BIT(20)
+#define NGBE_PX_MISC_IEN_GPIO BIT(26)
+#define NGBE_PX_MISC_IEN_MASK ( \
+ NGBE_PX_MISC_IEN_DEV_RST | \
+ NGBE_PX_MISC_IEN_ETH_LK | \
+ NGBE_PX_MISC_IEN_INT_ERR | \
+ NGBE_PX_MISC_IEN_GPIO)
+
+#define NGBE_INTR_ALL 0x1FF
+#define NGBE_INTR_MISC(A) BIT((A)->num_q_vectors)
+
#define NGBE_PHY_CONFIG(reg_offset) (0x14000 + ((reg_offset) * 4))
#define NGBE_CFG_LAN_SPEED 0x14440
#define NGBE_CFG_PORT_ST 0x14404
@@ -120,6 +134,10 @@ enum NGBE_MSCA_CMD_value {
#define NGBE_ETH_LENGTH_OF_ADDRESS 6
#define NGBE_MAX_MSIX_VECTORS 0x09
#define NGBE_RAR_ENTRIES 32
+#define NGBE_RX_PB_SIZE 42
+#define NGBE_MC_TBL_SIZE 128
+#define NGBE_TDB_PB_SZ (20 * 1024) /* 160KB Packet Buffer */
+#define NGBE_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
/* TX/RX descriptor defines */
#define NGBE_DEFAULT_TXD 512 /* default ring size */
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index aa4d09df3b01..094df377726b 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -11,6 +11,7 @@
#include <net/ip.h>
#include "../libwx/wx_type.h"
+#include "../libwx/wx_lib.h"
#include "../libwx/wx_hw.h"
#include "txgbe_type.h"
#include "txgbe_hw.h"
@@ -72,9 +73,177 @@ static int txgbe_enumerate_functions(struct wx *wx)
return physfns;
}
+/**
+ * txgbe_irq_enable - Enable default interrupt generation settings
+ * @wx: pointer to private structure
+ * @queues: enable irqs for queues
+ **/
+static void txgbe_irq_enable(struct wx *wx, bool queues)
+{
+ /* unmask interrupt */
+ wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
+ if (queues)
+ wx_intr_enable(wx, TXGBE_INTR_QALL(wx));
+}
+
+/**
+ * txgbe_intr - msi/legacy mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t txgbe_intr(int __always_unused irq, void *data)
+{
+ struct wx_q_vector *q_vector;
+ struct wx *wx = data;
+ struct pci_dev *pdev;
+ u32 eicr;
+
+ q_vector = wx->q_vector[0];
+ pdev = wx->pdev;
+
+ eicr = wx_misc_isb(wx, WX_ISB_VEC0);
+ if (!eicr) {
+ /* shared interrupt alert!
+ * the interrupt that we masked before the ICR read.
+ */
+ if (netif_running(wx->netdev))
+ txgbe_irq_enable(wx, true);
+ return IRQ_NONE; /* Not our interrupt */
+ }
+ wx->isb_mem[WX_ISB_VEC0] = 0;
+ if (!(pdev->msi_enabled))
+ wr32(wx, WX_PX_INTA, 1);
+
+ wx->isb_mem[WX_ISB_MISC] = 0;
+ /* would disable interrupts here but it is auto disabled */
+ napi_schedule_irqoff(&q_vector->napi);
+
+ /* re-enable link(maybe) and non-queue interrupts, no flush.
+ * txgbe_poll will re-enable the queue interrupts
+ */
+ if (netif_running(wx->netdev))
+ txgbe_irq_enable(wx, false);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t txgbe_msix_other(int __always_unused irq, void *data)
+{
+ struct wx *wx = data;
+
+ /* re-enable the original interrupt state */
+ if (netif_running(wx->netdev))
+ txgbe_irq_enable(wx, false);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * txgbe_request_msix_irqs - Initialize MSI-X interrupts
+ * @wx: board private structure
+ *
+ * Allocate MSI-X vectors and request interrupts from the kernel.
+ **/
+static int txgbe_request_msix_irqs(struct wx *wx)
+{
+ struct net_device *netdev = wx->netdev;
+ int vector, err;
+
+ for (vector = 0; vector < wx->num_q_vectors; vector++) {
+ struct wx_q_vector *q_vector = wx->q_vector[vector];
+ struct msix_entry *entry = &wx->msix_entries[vector];
+
+ if (q_vector->tx.ring && q_vector->rx.ring)
+ snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+ "%s-TxRx-%d", netdev->name, entry->entry);
+ else
+ /* skip this unused q_vector */
+ continue;
+
+ err = request_irq(entry->vector, wx_msix_clean_rings, 0,
+ q_vector->name, q_vector);
+ if (err) {
+ wx_err(wx, "request_irq failed for MSIX interrupt %s Error: %d\n",
+ q_vector->name, err);
+ goto free_queue_irqs;
+ }
+ }
+
+ err = request_irq(wx->msix_entries[vector].vector,
+ txgbe_msix_other, 0, netdev->name, wx);
+ if (err) {
+ wx_err(wx, "request_irq for msix_other failed: %d\n", err);
+ goto free_queue_irqs;
+ }
+
+ return 0;
+
+free_queue_irqs:
+ while (vector) {
+ vector--;
+ free_irq(wx->msix_entries[vector].vector,
+ wx->q_vector[vector]);
+ }
+ wx_reset_interrupt_capability(wx);
+ return err;
+}
+
+/**
+ * txgbe_request_irq - initialize interrupts
+ * @wx: board private structure
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int txgbe_request_irq(struct wx *wx)
+{
+ struct net_device *netdev = wx->netdev;
+ struct pci_dev *pdev = wx->pdev;
+ int err;
+
+ if (pdev->msix_enabled)
+ err = txgbe_request_msix_irqs(wx);
+ else if (pdev->msi_enabled)
+ err = request_irq(wx->pdev->irq, &txgbe_intr, 0,
+ netdev->name, wx);
+ else
+ err = request_irq(wx->pdev->irq, &txgbe_intr, IRQF_SHARED,
+ netdev->name, wx);
+
+ if (err)
+ wx_err(wx, "request_irq failed, Error %d\n", err);
+
+ return err;
+}
+
static void txgbe_up_complete(struct wx *wx)
{
+ u32 reg;
+
wx_control_hw(wx, true);
+ wx_configure_vectors(wx);
+
+ /* make sure to complete pre-operations */
+ smp_mb__before_atomic();
+ wx_napi_enable_all(wx);
+
+ /* clear any pending interrupts, may auto mask */
+ rd32(wx, WX_PX_IC);
+ rd32(wx, WX_PX_MISC_IC);
+ txgbe_irq_enable(wx, true);
+
+ /* Configure MAC Rx and Tx when link is up */
+ reg = rd32(wx, WX_MAC_RX_CFG);
+ wr32(wx, WX_MAC_RX_CFG, reg);
+ wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR);
+ reg = rd32(wx, WX_MAC_WDG_TIMEOUT);
+ wr32(wx, WX_MAC_WDG_TIMEOUT, reg);
+ reg = rd32(wx, WX_MAC_TX_CFG);
+ wr32(wx, WX_MAC_TX_CFG, (reg & ~WX_MAC_TX_CFG_SPEED_MASK) | WX_MAC_TX_CFG_SPEED_10G);
+
+ /* enable transmits */
+ netif_tx_start_all_queues(wx->netdev);
+ netif_carrier_on(wx->netdev);
}
static void txgbe_reset(struct wx *wx)
@@ -96,14 +265,24 @@ static void txgbe_reset(struct wx *wx)
static void txgbe_disable_device(struct wx *wx)
{
struct net_device *netdev = wx->netdev;
+ u32 i;
wx_disable_pcie_master(wx);
/* disable receives */
wx_disable_rx(wx);
+ /* disable all enabled rx queues */
+ for (i = 0; i < wx->num_rx_queues; i++)
+ /* this call also flushes the previous write */
+ wx_disable_rx_queue(wx, wx->rx_ring[i]);
+
+ netif_tx_stop_all_queues(netdev);
netif_carrier_off(netdev);
netif_tx_disable(netdev);
+ wx_irq_disable(wx);
+ wx_napi_disable_all(wx);
+
if (wx->bus.func < 2)
wr32m(wx, TXGBE_MIS_PRB_CTL, TXGBE_MIS_PRB_CTL_LAN_UP(wx->bus.func), 0);
else
@@ -116,6 +295,13 @@ static void txgbe_disable_device(struct wx *wx)
wr32m(wx, WX_MAC_TX_CFG, WX_MAC_TX_CFG_TE, 0);
}
+ /* disable transmits in the hardware now that interrupts are off */
+ for (i = 0; i < wx->num_tx_queues; i++) {
+ u8 reg_idx = wx->tx_ring[i]->reg_idx;
+
+ wr32(wx, WX_PX_TR_CFG(reg_idx), WX_PX_TR_CFG_SWFLSH);
+ }
+
/* Disable the Tx DMA engine */
wr32m(wx, WX_TDM_CTL, WX_TDM_CTL_TE, 0);
}
@@ -124,6 +310,9 @@ static void txgbe_down(struct wx *wx)
{
txgbe_disable_device(wx);
txgbe_reset(wx);
+
+ wx_clean_all_tx_rings(wx);
+ wx_clean_all_rx_rings(wx);
}
/**
@@ -132,12 +321,15 @@ static void txgbe_down(struct wx *wx)
**/
static int txgbe_sw_init(struct wx *wx)
{
+ u16 msix_count = 0;
int err;
wx->mac.num_rar_entries = TXGBE_SP_RAR_ENTRIES;
wx->mac.max_tx_queues = TXGBE_SP_MAX_TX_QUEUES;
wx->mac.max_rx_queues = TXGBE_SP_MAX_RX_QUEUES;
wx->mac.mcft_size = TXGBE_SP_MC_TBL_SIZE;
+ wx->mac.rx_pb_size = TXGBE_SP_RX_PB_SIZE;
+ wx->mac.tx_pb_size = TXGBE_SP_TDB_PB_SZ;
/* PCI config space info */
err = wx_sw_init(wx);
@@ -156,6 +348,25 @@ static int txgbe_sw_init(struct wx *wx)
break;
}
+ /* Set common capability flags and settings */
+ wx->max_q_vectors = TXGBE_MAX_MSIX_VECTORS;
+ err = wx_get_pcie_msix_counts(wx, &msix_count, TXGBE_MAX_MSIX_VECTORS);
+ if (err)
+ wx_err(wx, "Do not support MSI-X\n");
+ wx->mac.max_msix_vectors = msix_count;
+
+ /* enable itr by default in dynamic mode */
+ wx->rx_itr_setting = 1;
+ wx->tx_itr_setting = 1;
+
+ /* set default ring sizes */
+ wx->tx_ring_count = TXGBE_DEFAULT_TXD;
+ wx->rx_ring_count = TXGBE_DEFAULT_RXD;
+
+ /* set default work limits */
+ wx->tx_work_limit = TXGBE_DEFAULT_TX_WORK;
+ wx->rx_work_limit = TXGBE_DEFAULT_RX_WORK;
+
return 0;
}
@@ -171,10 +382,39 @@ static int txgbe_sw_init(struct wx *wx)
static int txgbe_open(struct net_device *netdev)
{
struct wx *wx = netdev_priv(netdev);
+ int err;
+
+ err = wx_setup_resources(wx);
+ if (err)
+ goto err_reset;
+
+ wx_configure(wx);
+
+ err = txgbe_request_irq(wx);
+ if (err)
+ goto err_free_isb;
+
+ /* Notify the stack of the actual queue counts. */
+ err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues);
+ if (err)
+ goto err_free_irq;
+
+ err = netif_set_real_num_rx_queues(netdev, wx->num_rx_queues);
+ if (err)
+ goto err_free_irq;
txgbe_up_complete(wx);
return 0;
+
+err_free_irq:
+ wx_free_irq(wx);
+err_free_isb:
+ wx_free_isb_resources(wx);
+err_reset:
+ txgbe_reset(wx);
+
+ return err;
}
/**
@@ -187,6 +427,7 @@ static int txgbe_open(struct net_device *netdev)
static void txgbe_close_suspend(struct wx *wx)
{
txgbe_disable_device(wx);
+ wx_free_resources(wx);
}
/**
@@ -205,6 +446,8 @@ static int txgbe_close(struct net_device *netdev)
struct wx *wx = netdev_priv(netdev);
txgbe_down(wx);
+ wx_free_irq(wx);
+ wx_free_resources(wx);
wx_control_hw(wx, false);
return 0;
@@ -240,18 +483,14 @@ static void txgbe_shutdown(struct pci_dev *pdev)
}
}
-static netdev_tx_t txgbe_xmit_frame(struct sk_buff *skb,
- struct net_device *netdev)
-{
- return NETDEV_TX_OK;
-}
-
static const struct net_device_ops txgbe_netdev_ops = {
.ndo_open = txgbe_open,
.ndo_stop = txgbe_close,
- .ndo_start_xmit = txgbe_xmit_frame,
+ .ndo_start_xmit = wx_xmit_frame,
+ .ndo_set_rx_mode = wx_set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = wx_set_mac,
+ .ndo_get_stats64 = wx_get_stats64,
};
/**
@@ -354,6 +593,16 @@ static int txgbe_probe(struct pci_dev *pdev,
}
netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features = NETIF_F_SG;
+
+ /* copy netdev features into list of user selectable features */
+ netdev->hw_features |= netdev->features | NETIF_F_RXALL;
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+ netdev->priv_flags |= IFF_SUPP_NOFCS;
+
+ netdev->min_mtu = ETH_MIN_MTU;
+ netdev->max_mtu = TXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
/* make sure the EEPROM is good */
err = txgbe_validate_eeprom_checksum(wx, NULL);
@@ -367,6 +616,10 @@ static int txgbe_probe(struct pci_dev *pdev,
eth_hw_addr_set(netdev, wx->mac.perm_addr);
wx_mac_set_default_filter(wx, wx->mac.perm_addr);
+ err = wx_init_interrupt_scheme(wx);
+ if (err)
+ goto err_free_mac_table;
+
/* Save off EEPROM version number and Option Rom version which
* together make a unique identify for the eeprom
*/
@@ -411,6 +664,8 @@ static int txgbe_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, wx);
+ netif_tx_stop_all_queues(netdev);
+
/* calculate the expected PCIe bandwidth required for optimal
* performance. Note that some older parts will never have enough
* bandwidth due to being older generation PCIe parts. We clamp these
@@ -435,6 +690,7 @@ static int txgbe_probe(struct pci_dev *pdev,
return 0;
err_release_hw:
+ wx_clear_interrupt_scheme(wx);
wx_control_hw(wx, false);
err_free_mac_table:
kfree(wx->mac_table);
@@ -468,6 +724,7 @@ static void txgbe_remove(struct pci_dev *pdev)
pci_select_bars(pdev, IORESOURCE_MEM));
kfree(wx->mac_table);
+ wx_clear_interrupt_scheme(wx);
pci_disable_pcie_error_reporting(pdev);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index cbd705a9f4bd..563ea51deca6 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -67,6 +67,7 @@
#define TXGBE_PBANUM1_PTR 0x06
#define TXGBE_PBANUM_PTR_GUARD 0xFAFA
+#define TXGBE_MAX_MSIX_VECTORS 64
#define TXGBE_MAX_FDIR_INDICES 63
#define TXGBE_MAX_RX_QUEUES (TXGBE_MAX_FDIR_INDICES + 1)
@@ -76,6 +77,26 @@
#define TXGBE_SP_MAX_RX_QUEUES 128
#define TXGBE_SP_RAR_ENTRIES 128
#define TXGBE_SP_MC_TBL_SIZE 128
+#define TXGBE_SP_RX_PB_SIZE 512
+#define TXGBE_SP_TDB_PB_SZ (160 * 1024) /* 160KB Packet Buffer */
+#define TXGBE_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
+
+/* TX/RX descriptor defines */
+#define TXGBE_DEFAULT_TXD 512
+#define TXGBE_DEFAULT_TX_WORK 256
+
+#if (PAGE_SIZE < 8192)
+#define TXGBE_DEFAULT_RXD 512
+#define TXGBE_DEFAULT_RX_WORK 256
+#else
+#define TXGBE_DEFAULT_RXD 256
+#define TXGBE_DEFAULT_RX_WORK 128
+#endif
+
+#define TXGBE_INTR_MISC(A) BIT((A)->num_q_vectors)
+#define TXGBE_INTR_QALL(A) (TXGBE_INTR_MISC(A) - 1)
+
+#define TXGBE_MAX_EITR GENMASK(11, 3)
extern char txgbe_driver_name[];
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c6c62d0596bc..5256fdd55547 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2282,9 +2282,9 @@ static int virtnet_close(struct net_device *dev)
cancel_delayed_work_sync(&vi->refill);
for (i = 0; i < vi->max_queue_pairs; i++) {
+ virtnet_napi_tx_disable(&vi->sq[i].napi);
napi_disable(&vi->rq[i].napi);
xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
- virtnet_napi_tx_disable(&vi->sq[i].napi);
}
return 0;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1b6201bb04c1..7cf6a78fea07 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -9926,13 +9926,20 @@ struct mlx5_ifc_mpegc_reg_bits {
};
enum {
+ MLX5_MTUTC_FREQ_ADJ_UNITS_PPB = 0x0,
+ MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM = 0x1,
+};
+
+enum {
MLX5_MTUTC_OPERATION_SET_TIME_IMMEDIATE = 0x1,
MLX5_MTUTC_OPERATION_ADJUST_TIME = 0x2,
MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC = 0x3,
};
struct mlx5_ifc_mtutc_reg_bits {
- u8 reserved_at_0[0x1c];
+ u8 reserved_at_0[0x5];
+ u8 freq_adj_units[0x3];
+ u8 reserved_at_8[0x14];
u8 operation[0x4];
u8 freq_adjustment[0x20];
@@ -10005,7 +10012,8 @@ struct mlx5_ifc_pcam_reg_bits {
};
struct mlx5_ifc_mcam_enhanced_features_bits {
- u8 reserved_at_0[0x51];
+ u8 reserved_at_0[0x50];
+ u8 mtutc_freq_adj_units[0x1];
u8 mtutc_time_adjustment_extended_range[0x1];
u8 reserved_at_52[0xb];
u8 mcia_32dwords[0x1];
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 2d3249eb0e62..2823f90fdab4 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -81,6 +81,9 @@
/********** net/core/page_pool.c **********/
#define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA)
+/********** net/core/skbuff.c **********/
+#define SKB_LIST_POISON_NEXT ((void *)(0x800 + POISON_POINTER_DELTA))
+
/********** kernel/bpf/ **********/
#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5ba12185f43e..1fa95b916342 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1738,6 +1738,13 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
skb->next = NULL;
}
+static inline void skb_poison_list(struct sk_buff *skb)
+{
+#ifdef CONFIG_DEBUG_NET
+ skb->next = SKB_LIST_POISON_NEXT;
+#endif
+}
+
/* Iterate through singly-linked GSO fragments of an skb. */
#define skb_list_walk_safe(first, skb, next_skb) \
for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 813c93499f20..34bf531ffc8d 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -386,7 +386,7 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
static inline void page_pool_ring_lock(struct page_pool *pool)
__acquires(&pool->ring.producer_lock)
{
- if (in_serving_softirq())
+ if (in_softirq())
spin_lock(&pool->ring.producer_lock);
else
spin_lock_bh(&pool->ring.producer_lock);
@@ -395,7 +395,7 @@ static inline void page_pool_ring_lock(struct page_pool *pool)
static inline void page_pool_ring_unlock(struct page_pool *pool)
__releases(&pool->ring.producer_lock)
{
- if (in_serving_softirq())
+ if (in_softirq())
spin_unlock(&pool->ring.producer_lock);
else
spin_unlock_bh(&pool->ring.producer_lock);
diff --git a/include/net/raw.h b/include/net/raw.h
index 5e665934ebc7..2c004c20ed99 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -15,6 +15,8 @@
#include <net/inet_sock.h>
#include <net/protocol.h>
+#include <net/netns/hash.h>
+#include <linux/hash.h>
#include <linux/icmp.h>
extern struct proto raw_prot;
@@ -29,13 +31,20 @@ int raw_local_deliver(struct sk_buff *, int);
int raw_rcv(struct sock *, struct sk_buff *);
-#define RAW_HTABLE_SIZE MAX_INET_PROTOS
+#define RAW_HTABLE_LOG 8
+#define RAW_HTABLE_SIZE (1U << RAW_HTABLE_LOG)
struct raw_hashinfo {
spinlock_t lock;
- struct hlist_nulls_head ht[RAW_HTABLE_SIZE];
+
+ struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned;
};
+static inline u32 raw_hashfunc(const struct net *net, u32 proto)
+{
+ return hash_32(net_hash_mix(net) ^ proto, RAW_HTABLE_LOG);
+}
+
static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
{
int i;
diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
index 6b200059c2c5..a6b3a4e409f0 100644
--- a/include/trace/events/bridge.h
+++ b/include/trace/events/bridge.h
@@ -122,6 +122,64 @@ TRACE_EVENT(br_fdb_update,
__entry->flags)
);
+TRACE_EVENT(br_mdb_full,
+
+ TP_PROTO(const struct net_device *dev,
+ const struct br_ip *group),
+
+ TP_ARGS(dev, group),
+
+ TP_STRUCT__entry(
+ __string(dev, dev->name)
+ __field(int, af)
+ __field(u16, vid)
+ __array(__u8, src, 16)
+ __array(__u8, grp, 16)
+ __array(__u8, grpmac, ETH_ALEN) /* For af == 0. */
+ ),
+
+ TP_fast_assign(
+ struct in6_addr *in6;
+
+ __assign_str(dev, dev->name);
+ __entry->vid = group->vid;
+
+ if (!group->proto) {
+ __entry->af = 0;
+
+ memset(__entry->src, 0, sizeof(__entry->src));
+ memset(__entry->grp, 0, sizeof(__entry->grp));
+ memcpy(__entry->grpmac, group->dst.mac_addr, ETH_ALEN);
+ } else if (group->proto == htons(ETH_P_IP)) {
+ __entry->af = AF_INET;
+
+ in6 = (struct in6_addr *)__entry->src;
+ ipv6_addr_set_v4mapped(group->src.ip4, in6);
+
+ in6 = (struct in6_addr *)__entry->grp;
+ ipv6_addr_set_v4mapped(group->dst.ip4, in6);
+
+ memset(__entry->grpmac, 0, ETH_ALEN);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ __entry->af = AF_INET6;
+
+ in6 = (struct in6_addr *)__entry->src;
+ *in6 = group->src.ip6;
+
+ in6 = (struct in6_addr *)__entry->grp;
+ *in6 = group->dst.ip6;
+
+ memset(__entry->grpmac, 0, ETH_ALEN);
+#endif
+ }
+ ),
+
+ TP_printk("dev %s af %u src %pI6c grp %pI6c/%pM vid %u",
+ __get_str(dev), __entry->af, __entry->src, __entry->grp,
+ __entry->grpmac, __entry->vid)
+);
#endif /* _TRACE_BRIDGE_H */
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index d9de241d90f9..d60c456710b3 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -523,6 +523,8 @@ enum {
BRIDGE_VLANDB_ENTRY_TUNNEL_INFO,
BRIDGE_VLANDB_ENTRY_STATS,
BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
+ BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
+ BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
__BRIDGE_VLANDB_ENTRY_MAX,
};
#define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 02b87e4c65be..57ceb788250f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -567,6 +567,8 @@ enum {
IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
IFLA_BRPORT_LOCKED,
IFLA_BRPORT_MAB,
+ IFLA_BRPORT_MCAST_N_GROUPS,
+ IFLA_BRPORT_MCAST_MAX_GROUPS,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 00e5743647b0..9f22ebfdc518 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -849,11 +849,10 @@ static int br_mdb_add_group_sg(const struct br_mdb_config *cfg,
}
p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
- MCAST_INCLUDE, cfg->rt_protocol);
- if (unlikely(!p)) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (S, G) port group");
+ MCAST_INCLUDE, cfg->rt_protocol, extack);
+ if (unlikely(!p))
return -ENOMEM;
- }
+
rcu_assign_pointer(*pp, p);
if (!(flags & MDB_PG_FLAGS_PERMANENT) && !cfg->src_entry)
mod_timer(&p->timer,
@@ -1075,11 +1074,10 @@ static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg,
}
p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
- cfg->filter_mode, cfg->rt_protocol);
- if (unlikely(!p)) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (*, G) port group");
+ cfg->filter_mode, cfg->rt_protocol,
+ extack);
+ if (unlikely(!p))
return -ENOMEM;
- }
err = br_mdb_add_group_srcs(cfg, p, brmctx, extack);
if (err)
@@ -1101,8 +1099,7 @@ static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg,
return 0;
err_del_port_group:
- hlist_del_init(&p->mglist);
- kfree(p);
+ br_multicast_del_port_group(p);
return err;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index dea1ee1bd095..96d1fc78dd39 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -31,6 +31,7 @@
#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#endif
+#include <trace/events/bridge.h>
#include "br_private.h"
#include "br_private_mcast_eht.h"
@@ -234,6 +235,29 @@ out:
return pmctx;
}
+static struct net_bridge_mcast_port *
+br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
+{
+ struct net_bridge_mcast_port *pmctx = NULL;
+ struct net_bridge_vlan *vlan;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ return NULL;
+
+ /* Take RCU to access the vlan. */
+ rcu_read_lock();
+
+ vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
+ if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+ pmctx = &vlan->port_mcast_ctx;
+
+ rcu_read_unlock();
+
+ return pmctx;
+}
+
/* when snooping we need to check if the contexts should be used
* in the following order:
* - if pmctx is non-NULL (port), check if it should be used
@@ -668,6 +692,101 @@ void br_multicast_del_group_src(struct net_bridge_group_src *src,
__br_multicast_del_group_src(src);
}
+static int
+br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
+ struct netlink_ext_ack *extack,
+ const char *what)
+{
+ u32 max = READ_ONCE(pmctx->mdb_max_entries);
+ u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+ if (max && n >= max) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and mcast_max_groups=%u",
+ what, n, max);
+ return -E2BIG;
+ }
+
+ WRITE_ONCE(pmctx->mdb_n_entries, n + 1);
+ return 0;
+}
+
+static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port *pmctx)
+{
+ u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+ WARN_ON_ONCE(n == 0);
+ WRITE_ONCE(pmctx->mdb_n_entries, n - 1);
+}
+
+static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
+ const struct br_ip *group,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_mcast_port *pmctx;
+ int err;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ /* Always count on the port context. */
+ err = br_multicast_port_ngroups_inc_one(&port->multicast_ctx, extack,
+ "Port");
+ if (err) {
+ trace_br_mdb_full(port->dev, group);
+ return err;
+ }
+
+ /* Only count on the VLAN context if VID is given, and if snooping on
+ * that VLAN is enabled.
+ */
+ if (!group->vid)
+ return 0;
+
+ pmctx = br_multicast_port_vid_to_port_ctx(port, group->vid);
+ if (!pmctx)
+ return 0;
+
+ err = br_multicast_port_ngroups_inc_one(pmctx, extack, "Port-VLAN");
+ if (err) {
+ trace_br_mdb_full(port->dev, group);
+ goto dec_one_out;
+ }
+
+ return 0;
+
+dec_one_out:
+ br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
+ return err;
+}
+
+static void br_multicast_port_ngroups_dec(struct net_bridge_port *port, u16 vid)
+{
+ struct net_bridge_mcast_port *pmctx;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ if (vid) {
+ pmctx = br_multicast_port_vid_to_port_ctx(port, vid);
+ if (pmctx)
+ br_multicast_port_ngroups_dec_one(pmctx);
+ }
+ br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
+}
+
+u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx)
+{
+ return READ_ONCE(pmctx->mdb_n_entries);
+}
+
+void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max)
+{
+ WRITE_ONCE(pmctx->mdb_max_entries, max);
+}
+
+u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx)
+{
+ return READ_ONCE(pmctx->mdb_max_entries);
+}
+
static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
{
struct net_bridge_port_group *pg;
@@ -702,6 +821,7 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
} else {
br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
}
+ br_multicast_port_ngroups_dec(pg->key.port, pg->key.addr.vid);
hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
queue_work(system_long_wq, &br->mcast_gc_work);
@@ -1165,6 +1285,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ trace_br_mdb_full(br->dev, group);
br_mc_disabled_update(br->dev, false, NULL);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
@@ -1284,14 +1405,22 @@ struct net_bridge_port_group *br_multicast_new_port_group(
unsigned char flags,
const unsigned char *src,
u8 filter_mode,
- u8 rt_protocol)
+ u8 rt_protocol,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_port_group *p;
+ int err;
- p = kzalloc(sizeof(*p), GFP_ATOMIC);
- if (unlikely(!p))
+ err = br_multicast_port_ngroups_inc(port, group, extack);
+ if (err)
return NULL;
+ p = kzalloc(sizeof(*p), GFP_ATOMIC);
+ if (unlikely(!p)) {
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
+ goto dec_out;
+ }
+
p->key.addr = *group;
p->key.port = port;
p->flags = flags;
@@ -1305,8 +1434,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
if (!br_multicast_is_star_g(group) &&
rhashtable_lookup_insert_fast(&port->br->sg_port_tbl, &p->rhnode,
br_sg_port_rht_params)) {
- kfree(p);
- return NULL;
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't insert new port group");
+ goto free_out;
}
rcu_assign_pointer(p->next, next);
@@ -1320,6 +1449,25 @@ struct net_bridge_port_group *br_multicast_new_port_group(
eth_broadcast_addr(p->eth_addr);
return p;
+
+free_out:
+ kfree(p);
+dec_out:
+ br_multicast_port_ngroups_dec(port, group->vid);
+ return NULL;
+}
+
+void br_multicast_del_port_group(struct net_bridge_port_group *p)
+{
+ struct net_bridge_port *port = p->key.port;
+ __u16 vid = p->key.addr.vid;
+
+ hlist_del_init(&p->mglist);
+ if (!br_multicast_is_star_g(&p->key.addr))
+ rhashtable_remove_fast(&port->br->sg_port_tbl, &p->rhnode,
+ br_sg_port_rht_params);
+ kfree(p);
+ br_multicast_port_ngroups_dec(port, vid);
}
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
@@ -1387,7 +1535,7 @@ __br_multicast_add_group(struct net_bridge_mcast *brmctx,
}
p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src,
- filter_mode, RTPROT_KERNEL);
+ filter_mode, RTPROT_KERNEL, NULL);
if (unlikely(!p)) {
p = ERR_PTR(-ENOMEM);
goto out;
@@ -1933,6 +2081,25 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
br_ip4_multicast_add_router(brmctx, pmctx);
br_ip6_multicast_add_router(brmctx, pmctx);
}
+
+ if (br_multicast_port_ctx_is_vlan(pmctx)) {
+ struct net_bridge_port_group *pg;
+ u32 n = 0;
+
+ /* The mcast_n_groups counter might be wrong. First,
+ * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries
+ * are flushed, thus mcast_n_groups after the toggle does not
+ * reflect the true values. And second, permanent entries added
+ * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected
+ * either. Thus we have to refresh the counter.
+ */
+
+ hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) {
+ if (pg->key.addr.vid == pmctx->vlan->vid)
+ n++;
+ }
+ WRITE_ONCE(pmctx->mdb_n_entries, n);
+ }
}
void br_multicast_enable_port(struct net_bridge_port *port)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4316cc82ae17..9173e52b89e2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -202,6 +202,8 @@ static inline size_t br_port_info_size(void)
+ nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_N_GROUPS */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_MAX_GROUPS */
#endif
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */
@@ -298,7 +300,11 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
p->multicast_eht_hosts_limit) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
- p->multicast_eht_hosts_cnt))
+ p->multicast_eht_hosts_cnt) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_N_GROUPS,
+ br_multicast_ngroups_get(&p->multicast_ctx)) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_MAX_GROUPS,
+ br_multicast_ngroups_get_max(&p->multicast_ctx)))
return -EMSGSIZE;
#endif
@@ -858,6 +864,8 @@ static int br_afspec(struct net_bridge *br,
}
static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
+ [IFLA_BRPORT_UNSPEC] = { .strict_start_type =
+ IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + 1 },
[IFLA_BRPORT_STATE] = { .type = NLA_U8 },
[IFLA_BRPORT_COST] = { .type = NLA_U32 },
[IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
@@ -881,6 +889,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MAB] = { .type = NLA_U8 },
[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
+ [IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
+ [IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
};
/* Change the state of the port and notify spanning tree */
@@ -1015,6 +1025,13 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
if (err)
return err;
}
+
+ if (tb[IFLA_BRPORT_MCAST_MAX_GROUPS]) {
+ u32 max_groups;
+
+ max_groups = nla_get_u32(tb[IFLA_BRPORT_MCAST_MAX_GROUPS]);
+ br_multicast_ngroups_set_max(&p->multicast_ctx, max_groups);
+ }
#endif
if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 8914290c75d4..17abf092f7ca 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -188,6 +188,9 @@ initvars:
}
static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1] = {
+ [IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC] = {
+ .strict_start_type = IFLA_BRIDGE_VLAN_TUNNEL_FLAGS + 1
+ },
[IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 },
[IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 },
[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 },
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 15ef7fd508ee..cef5f6ea850c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -126,6 +126,8 @@ struct net_bridge_mcast_port {
struct hlist_node ip6_rlist;
#endif /* IS_ENABLED(CONFIG_IPV6) */
unsigned char multicast_router;
+ u32 mdb_n_entries;
+ u32 mdb_max_entries;
#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
};
@@ -956,7 +958,9 @@ br_multicast_new_port_group(struct net_bridge_port *port,
const struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src,
- u8 filter_mode, u8 rt_protocol);
+ u8 filter_mode, u8 rt_protocol,
+ struct netlink_ext_ack *extack);
+void br_multicast_del_port_group(struct net_bridge_port_group *p);
int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
@@ -974,6 +978,9 @@ void br_multicast_uninit_stats(struct net_bridge *br);
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
struct br_mcast_stats *dest);
+u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx);
+void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max);
+u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx);
void br_mdb_init(void);
void br_mdb_uninit(void);
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
@@ -1757,7 +1764,8 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end);
-bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v);
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
+ const struct net_bridge_port *p);
size_t br_vlan_opts_nl_size(void);
int br_vlan_process_options(const struct net_bridge *br,
const struct net_bridge_port *p,
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bc75fa1e4666..8a3dbc09ba38 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1816,6 +1816,7 @@ out_err:
/* v_opts is used to dump the options which must be equal in the whole range */
static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range,
const struct net_bridge_vlan *v_opts,
+ const struct net_bridge_port *p,
u16 flags,
bool dump_stats)
{
@@ -1842,7 +1843,7 @@ static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range,
goto out_err;
if (v_opts) {
- if (!br_vlan_opts_fill(skb, v_opts))
+ if (!br_vlan_opts_fill(skb, v_opts, p))
goto out_err;
if (dump_stats && !br_vlan_stats_fill(skb, v_opts))
@@ -1925,7 +1926,7 @@ void br_vlan_notify(const struct net_bridge *br,
goto out_kfree;
}
- if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags, false))
+ if (!br_vlan_fill_vids(skb, vid, vid_range, v, p, flags, false))
goto out_err;
nlmsg_end(skb, nlh);
@@ -2030,7 +2031,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
if (!br_vlan_fill_vids(skb, range_start->vid,
range_end->vid, range_start,
- vlan_flags, dump_stats)) {
+ p, vlan_flags, dump_stats)) {
err = -EMSGSIZE;
break;
}
@@ -2056,7 +2057,7 @@ update_end:
else if (!dump_global &&
!br_vlan_fill_vids(skb, range_start->vid,
range_end->vid, range_start,
- br_vlan_flags(range_start, pvid),
+ p, br_vlan_flags(range_start, pvid),
dump_stats))
err = -EMSGSIZE;
}
@@ -2131,6 +2132,8 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] =
[BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 },
[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED },
[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS] = { .type = NLA_REJECT },
+ [BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
};
static int br_vlan_rtm_process_one(struct net_device *dev,
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index a2724d03278c..e378c2f3a9e2 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -48,7 +48,8 @@ bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
curr_mc_rtr == range_mc_rtr;
}
-bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
+ const struct net_bridge_port *p)
{
if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, br_vlan_get_state(v)) ||
!__vlan_tun_put(skb, v))
@@ -58,6 +59,12 @@ bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
br_vlan_multicast_router(v)))
return false;
+ if (p && !br_multicast_port_ctx_vlan_disabled(&v->port_mcast_ctx) &&
+ (nla_put_u32(skb, BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
+ br_multicast_ngroups_get(&v->port_mcast_ctx)) ||
+ nla_put_u32(skb, BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
+ br_multicast_ngroups_get_max(&v->port_mcast_ctx))))
+ return false;
#endif
return true;
@@ -70,6 +77,8 @@ size_t br_vlan_opts_nl_size(void)
+ nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_TINFO_ID */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_MCAST_ROUTER */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS */
#endif
+ 0;
}
@@ -212,6 +221,22 @@ static int br_vlan_process_one_opts(const struct net_bridge *br,
return err;
*changed = true;
}
+ if (tb[BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS]) {
+ u32 val;
+
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't set mcast_max_groups for non-port vlans");
+ return -EINVAL;
+ }
+ if (br_multicast_port_ctx_vlan_disabled(&v->port_mcast_ctx)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast snooping disabled on this VLAN");
+ return -EINVAL;
+ }
+
+ val = nla_get_u32(tb[BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS]);
+ br_multicast_ngroups_set_max(&v->port_mcast_ctx, val);
+ *changed = true;
+ }
#endif
return 0;
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index ee7006bbe49b..805b7385dd8d 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -41,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_mdb_full);
#endif
#if IS_ENABLED(CONFIG_PAGE_POOL)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b203d8660e4..193c18799865 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -511,8 +511,8 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page)
static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
{
int ret;
- /* BH protection not needed if current is serving softirq */
- if (in_serving_softirq())
+ /* BH protection not needed if current is softirq */
+ if (in_softirq())
ret = ptr_ring_produce(&pool->ring, page);
else
ret = ptr_ring_produce_bh(&pool->ring, page);
@@ -570,7 +570,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
page_pool_dma_sync_for_device(pool, page,
dma_sync_size);
- if (allow_direct && in_serving_softirq() &&
+ if (allow_direct && in_softirq() &&
page_pool_recycle_in_cache(page, pool))
return NULL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b9f584955b77..5d8eb57867a9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -58,7 +58,7 @@
#include "dev.h"
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 40
+#define RTNL_SLAVE_MAX_TYPE 42
struct rtnl_link {
rtnl_doit_func doit;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 44a19805c355..624e9e4ec116 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1000,8 +1000,10 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
while (segs) {
struct sk_buff *next = segs->next;
- if (__kfree_skb_reason(segs, reason))
+ if (__kfree_skb_reason(segs, reason)) {
+ skb_poison_list(segs);
kfree_skb_add_bulk(segs, &sa, reason);
+ }
segs = next;
}
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 1b1eeac59cb3..daad4521c61e 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y := leftover.o core.o netlink.o
+obj-y := leftover.o core.o netlink.o dev.o
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
new file mode 100644
index 000000000000..78d824eda5ec
--- /dev/null
+++ b/net/devlink/dev.c
@@ -0,0 +1,1343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include "devl_internal.h"
+
+struct devlink_info_req {
+ struct sk_buff *msg;
+ void (*version_cb)(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv);
+ void *version_cb_priv;
+};
+
+struct devlink_reload_combination {
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
+};
+
+static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
+ {
+ /* can't reinitialize driver with no down time */
+ .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ .limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
+ },
+};
+
+static bool
+devlink_reload_combination_is_invalid(enum devlink_reload_action action,
+ enum devlink_reload_limit limit)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
+ if (devlink_reload_invalid_combinations[i].action == action &&
+ devlink_reload_invalid_combinations[i].limit == limit)
+ return true;
+ return false;
+}
+
+static bool
+devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
+{
+ return test_bit(action, &devlink->ops->reload_actions);
+}
+
+static bool
+devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
+{
+ return test_bit(limit, &devlink->ops->reload_limits);
+}
+
+static int devlink_reload_stat_put(struct sk_buff *msg,
+ enum devlink_reload_limit limit, u32 value)
+{
+ struct nlattr *reload_stats_entry;
+
+ reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
+ if (!reload_stats_entry)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+ nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
+ goto nla_put_failure;
+ nla_nest_end(msg, reload_stats_entry);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_entry);
+ return -EMSGSIZE;
+}
+
+static int
+devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
+{
+ struct nlattr *reload_stats_attr, *act_info, *act_stats;
+ int i, j, stat_idx;
+ u32 value;
+
+ if (!is_remote)
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
+ else
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
+
+ if (!reload_stats_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+ if ((!is_remote &&
+ !devlink_reload_action_is_supported(devlink, i)) ||
+ i == DEVLINK_RELOAD_ACTION_UNSPEC)
+ continue;
+ act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
+ if (!act_info)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
+ goto action_info_nest_cancel;
+ act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
+ if (!act_stats)
+ goto action_info_nest_cancel;
+
+ for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+ /* Remote stats are shown even if not locally supported.
+ * Stats of actions with unspecified limit are shown
+ * though drivers don't need to register unspecified
+ * limit.
+ */
+ if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+ !devlink_reload_limit_is_supported(devlink, j)) ||
+ devlink_reload_combination_is_invalid(i, j))
+ continue;
+
+ stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
+ if (!is_remote)
+ value = devlink->stats.reload_stats[stat_idx];
+ else
+ value = devlink->stats.remote_reload_stats[stat_idx];
+ if (devlink_reload_stat_put(msg, j, value))
+ goto action_stats_nest_cancel;
+ }
+ nla_nest_end(msg, act_stats);
+ nla_nest_end(msg, act_info);
+ }
+ nla_nest_end(msg, reload_stats_attr);
+ return 0;
+
+action_stats_nest_cancel:
+ nla_nest_cancel(msg, act_stats);
+action_info_nest_cancel:
+ nla_nest_cancel(msg, act_info);
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ struct nlattr *dev_stats;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
+ goto nla_put_failure;
+
+ dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
+ if (!dev_stats)
+ goto nla_put_failure;
+
+ if (devlink_reload_stats_put(msg, devlink, false))
+ goto dev_stats_nest_cancel;
+ if (devlink_reload_stats_put(msg, devlink, true))
+ goto dev_stats_nest_cancel;
+
+ nla_nest_end(msg, dev_stats);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+dev_stats_nest_cancel:
+ nla_nest_cancel(msg, dev_stats);
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI);
+}
+
+const struct devlink_cmd devl_cmd_get = {
+ .dump_one = devlink_nl_cmd_get_dump_one,
+};
+
+static void devlink_reload_failed_set(struct devlink *devlink,
+ bool reload_failed)
+{
+ if (devlink->reload_failed == reload_failed)
+ return;
+ devlink->reload_failed = reload_failed;
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+bool devlink_is_reload_failed(const struct devlink *devlink)
+{
+ return devlink->reload_failed;
+}
+EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+
+static void
+__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
+ enum devlink_reload_limit limit, u32 actions_performed)
+{
+ unsigned long actions = actions_performed;
+ int stat_idx;
+ int action;
+
+ for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
+ stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
+ reload_stats[stat_idx]++;
+ }
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void
+devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
+ actions_performed);
+}
+
+/**
+ * devlink_remote_reload_actions_performed - Update devlink on reload actions
+ * performed which are not a direct result of devlink reload call.
+ *
+ * This should be called by a driver after performing reload actions in case it was not
+ * a result of devlink reload call. For example fw_activate was performed as a result
+ * of devlink reload triggered fw_activate on another host.
+ * The motivation for this function is to keep data on reload actions performed on this
+ * function whether it was done due to direct devlink reload call or not.
+ *
+ * @devlink: devlink
+ * @limit: reload limit
+ * @actions_performed: bitmask of actions performed
+ */
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+ enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ if (WARN_ON(!actions_performed ||
+ actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
+ limit > DEVLINK_RELOAD_LIMIT_MAX))
+ return;
+
+ __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
+ actions_performed);
+}
+EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
+
+static struct net *devlink_netns_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
+ struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
+ struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
+ struct net *net;
+
+ if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
+ NL_SET_ERR_MSG_MOD(info->extack, "multiple netns identifying attributes specified");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (netns_pid_attr) {
+ net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
+ } else if (netns_fd_attr) {
+ net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
+ } else if (netns_id_attr) {
+ net = get_net_ns_by_id(sock_net(skb->sk),
+ nla_get_u32(netns_id_attr));
+ if (!net)
+ net = ERR_PTR(-EINVAL);
+ } else {
+ WARN_ON(1);
+ net = ERR_PTR(-EINVAL);
+ }
+ if (IS_ERR(net)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Unknown network namespace");
+ return ERR_PTR(-EINVAL);
+ }
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EPERM);
+ }
+ return net;
+}
+
+static void devlink_reload_netns_change(struct devlink *devlink,
+ struct net *curr_net,
+ struct net *dest_net)
+{
+ /* Userspace needs to be notified about devlink objects
+ * removed from original and entering new network namespace.
+ * The rest of the devlink objects are re-created during
+ * reload process so the notifications are generated separatelly.
+ */
+ devlink_notify_unregister(devlink);
+ move_netdevice_notifier_net(curr_net, dest_net,
+ &devlink->netdevice_nb);
+ write_pnet(&devlink->_net, dest_net);
+ devlink_notify_register(devlink);
+}
+
+int devlink_reload(struct devlink *devlink, struct net *dest_net,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed, struct netlink_ext_ack *extack)
+{
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ struct net *curr_net;
+ int err;
+
+ memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats));
+
+ err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
+ if (err)
+ return err;
+
+ curr_net = devlink_net(devlink);
+ if (dest_net && !net_eq(dest_net, curr_net))
+ devlink_reload_netns_change(devlink, curr_net, dest_net);
+
+ err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
+ devlink_reload_failed_set(devlink, !!err);
+ if (err)
+ return err;
+
+ WARN_ON(!(*actions_performed & BIT(action)));
+ /* Catch driver on updating the remote action within devlink reload */
+ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats)));
+ devlink_reload_stats_update(devlink, limit, *actions_performed);
+ return 0;
+}
+
+static int
+devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
+ enum devlink_command cmd, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
+ actions_performed))
+ goto nla_put_failure;
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
+ struct net *dest_net = NULL;
+ u32 actions_performed;
+ int err;
+
+ err = devlink_resources_validate(devlink, NULL, info);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+ action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
+ else
+ action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
+
+ if (!devlink_reload_action_is_supported(devlink, action)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested reload action is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+
+ limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
+ if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
+ struct nla_bitfield32 limits;
+ u32 limits_selected;
+
+ limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
+ limits_selected = limits.value & limits.selector;
+ if (!limits_selected) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
+ return -EINVAL;
+ }
+ for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
+ if (limits_selected & BIT(limit))
+ break;
+ /* UAPI enables multiselection, but currently it is not used */
+ if (limits_selected != BIT(limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Multiselection of limit is not supported");
+ return -EOPNOTSUPP;
+ }
+ if (!devlink_reload_limit_is_supported(devlink, limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested limit is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_reload_combination_is_invalid(action, limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested limit is invalid for this action");
+ return -EINVAL;
+ }
+ }
+ if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+ info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+ info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+ dest_net = devlink_netns_get(skb, info);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+ }
+
+ err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
+
+ if (dest_net)
+ put_net(dest_net);
+
+ if (err)
+ return err;
+ /* For backward compatibility generate reply only if attributes used by user */
+ if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
+ return 0;
+
+ return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
+ DEVLINK_CMD_RELOAD, info);
+}
+
+bool devlink_reload_actions_valid(const struct devlink_ops *ops)
+{
+ const struct devlink_reload_combination *comb;
+ int i;
+
+ if (!devlink_reload_supported(ops)) {
+ if (WARN_ON(ops->reload_actions))
+ return false;
+ return true;
+ }
+
+ if (WARN_ON(!ops->reload_actions ||
+ ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
+ return false;
+
+ if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
+ ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) {
+ comb = &devlink_reload_invalid_combinations[i];
+ if (ops->reload_actions == BIT(comb->action) &&
+ ops->reload_limits == BIT(comb->limit))
+ return false;
+ }
+ return true;
+}
+
+static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
+ void *hdr;
+ int err = 0;
+ u16 mode;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = devlink_nl_put_handle(msg, devlink);
+ if (err)
+ goto nla_put_failure;
+
+ if (ops->eswitch_mode_get) {
+ err = ops->eswitch_mode_get(devlink, &mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ if (ops->eswitch_inline_mode_get) {
+ err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
+ inline_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ if (ops->eswitch_encap_mode_get) {
+ err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET,
+ info->snd_portid, info->snd_seq, 0);
+
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
+ int err = 0;
+ u16 mode;
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
+ if (!ops->eswitch_mode_set)
+ return -EOPNOTSUPP;
+ mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+ err = devlink_rate_nodes_check(devlink, mode, info->extack);
+ if (err)
+ return err;
+ err = ops->eswitch_mode_set(devlink, mode, info->extack);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
+ if (!ops->eswitch_inline_mode_set)
+ return -EOPNOTSUPP;
+ inline_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
+ err = ops->eswitch_inline_mode_set(devlink, inline_mode,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
+ if (!ops->eswitch_encap_mode_set)
+ return -EOPNOTSUPP;
+ encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
+ err = ops->eswitch_encap_mode_set(devlink, encap_mode,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
+{
+ if (!req->msg)
+ return 0;
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
+
+int devlink_info_board_serial_number_put(struct devlink_info_req *req,
+ const char *bsn)
+{
+ if (!req->msg)
+ return 0;
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
+ bsn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
+
+static int devlink_info_version_put(struct devlink_info_req *req, int attr,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ struct nlattr *nest;
+ int err;
+
+ if (req->version_cb)
+ req->version_cb(version_name, version_type,
+ req->version_cb_priv);
+
+ if (!req->msg)
+ return 0;
+
+ nest = nla_nest_start_noflag(req->msg, attr);
+ if (!nest)
+ return -EMSGSIZE;
+
+ err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
+ version_name);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
+ version_value);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(req->msg, nest);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(req->msg, nest);
+ return err;
+}
+
+int devlink_info_version_fixed_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
+
+int devlink_info_version_stored_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
+
+int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
+
+int devlink_info_version_running_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
+
+int devlink_info_version_running_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
+
+static int devlink_nl_driver_info_get(struct device_driver *drv,
+ struct devlink_info_req *req)
+{
+ if (!drv)
+ return 0;
+
+ if (drv->name[0])
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
+ drv->name);
+
+ return 0;
+}
+
+static int
+devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags, struct netlink_ext_ack *extack)
+{
+ struct device *dev = devlink_to_dev(devlink);
+ struct devlink_info_req req = {};
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ req.msg = msg;
+ if (devlink->ops->info_get) {
+ err = devlink->ops->info_get(devlink, &req, extack);
+ if (err)
+ goto err_cancel_msg;
+ }
+
+ err = devlink_nl_driver_info_get(dev->driver, &req);
+ if (err)
+ goto err_cancel_msg;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ int err;
+
+ err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+
+const struct devlink_cmd devl_cmd_info_get = {
+ .dump_one = devlink_nl_cmd_info_get_dump_one,
+};
+
+static int devlink_nl_flash_update_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ enum devlink_command cmd,
+ struct devlink_flash_notify *params)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
+ goto out;
+
+ if (params->status_msg &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
+ params->status_msg))
+ goto nla_put_failure;
+ if (params->component &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
+ params->component))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
+ params->done, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
+ params->total, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
+ params->timeout, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void __devlink_flash_update_notify(struct devlink *devlink,
+ enum devlink_command cmd,
+ struct devlink_flash_notify *params)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_flash_update_fill(msg, devlink, cmd, params);
+ if (err)
+ goto out_free_msg;
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ return;
+
+out_free_msg:
+ nlmsg_free(msg);
+}
+
+static void devlink_flash_update_begin_notify(struct devlink *devlink)
+{
+ struct devlink_flash_notify params = {};
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE,
+ &params);
+}
+
+static void devlink_flash_update_end_notify(struct devlink *devlink)
+{
+ struct devlink_flash_notify params = {};
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_END,
+ &params);
+}
+
+void devlink_flash_update_status_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long done,
+ unsigned long total)
+{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .done = done,
+ .total = total,
+ };
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ &params);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
+
+void devlink_flash_update_timeout_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long timeout)
+{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .timeout = timeout,
+ };
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ &params);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
+
+struct devlink_flash_component_lookup_ctx {
+ const char *lookup_name;
+ bool lookup_name_found;
+};
+
+static void
+devlink_flash_component_lookup_cb(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv)
+{
+ struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
+
+ if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
+ lookup_ctx->lookup_name_found)
+ return;
+
+ lookup_ctx->lookup_name_found =
+ !strcmp(lookup_ctx->lookup_name, version_name);
+}
+
+static int devlink_flash_component_get(struct devlink *devlink,
+ struct nlattr *nla_component,
+ const char **p_component,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_flash_component_lookup_ctx lookup_ctx = {};
+ struct devlink_info_req req = {};
+ const char *component;
+ int ret;
+
+ if (!nla_component)
+ return 0;
+
+ component = nla_data(nla_component);
+
+ if (!devlink->ops->info_get) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "component update is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+
+ lookup_ctx.lookup_name = component;
+ req.version_cb = devlink_flash_component_lookup_cb;
+ req.version_cb_priv = &lookup_ctx;
+
+ ret = devlink->ops->info_get(devlink, &req, NULL);
+ if (ret)
+ return ret;
+
+ if (!lookup_ctx.lookup_name_found) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "selected component is not supported by this device");
+ return -EINVAL;
+ }
+ *p_component = component;
+ return 0;
+}
+
+int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *nla_overwrite_mask, *nla_file_name;
+ struct devlink_flash_update_params params = {};
+ struct devlink *devlink = info->user_ptr[0];
+ const char *file_name;
+ u32 supported_params;
+ int ret;
+
+ if (!devlink->ops->flash_update)
+ return -EOPNOTSUPP;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME))
+ return -EINVAL;
+
+ ret = devlink_flash_component_get(devlink,
+ info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
+ &params.component, info->extack);
+ if (ret)
+ return ret;
+
+ supported_params = devlink->ops->supported_flash_update_params;
+
+ nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
+ if (nla_overwrite_mask) {
+ struct nla_bitfield32 sections;
+
+ if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
+ "overwrite settings are not supported by this device");
+ return -EOPNOTSUPP;
+ }
+ sections = nla_get_bitfield32(nla_overwrite_mask);
+ params.overwrite_mask = sections.value & sections.selector;
+ }
+
+ nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME];
+ file_name = nla_data(nla_file_name);
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name,
+ "failed to locate the requested firmware file");
+ return ret;
+ }
+
+ devlink_flash_update_begin_notify(devlink);
+ ret = devlink->ops->flash_update(devlink, &params, info->extack);
+ devlink_flash_update_end_notify(devlink);
+
+ release_firmware(params.fw);
+
+ return ret;
+}
+
+static void __devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ struct devlink_info_req req = {};
+ const struct nlattr *nlattr;
+ struct sk_buff *msg;
+ int rem, err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ req.msg = msg;
+ err = devlink->ops->info_get(devlink, &req, NULL);
+ if (err)
+ goto free_msg;
+
+ nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
+ const struct nlattr *kv;
+ int rem_kv;
+
+ if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
+ continue;
+
+ nla_for_each_nested(kv, nlattr, rem_kv) {
+ if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
+ continue;
+
+ strlcat(buf, nla_data(kv), len);
+ strlcat(buf, " ", len);
+ }
+ }
+free_msg:
+ nlmsg_free(msg);
+}
+
+void devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ if (!devlink->ops->info_get)
+ return;
+
+ devl_lock(devlink);
+ if (devl_is_registered(devlink))
+ __devlink_compat_running_version(devlink, buf, len);
+ devl_unlock(devlink);
+}
+
+int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
+{
+ struct devlink_flash_update_params params = {};
+ int ret;
+
+ devl_lock(devlink);
+ if (!devl_is_registered(devlink)) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (!devlink->ops->flash_update) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret)
+ goto out_unlock;
+
+ devlink_flash_update_begin_notify(devlink);
+ ret = devlink->ops->flash_update(devlink, &params, NULL);
+ devlink_flash_update_end_notify(devlink);
+
+ release_firmware(params.fw);
+out_unlock:
+ devl_unlock(devlink);
+
+ return ret;
+}
+
+static int
+devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
+ u32 portid, u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *selftests;
+ void *hdr;
+ int err;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
+ DEVLINK_CMD_SELFTESTS_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto err_cancel_msg;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ if (devlink->ops->selftest_check(devlink, i, extack)) {
+ err = nla_put_flag(msg, i);
+ if (err)
+ goto err_cancel_msg;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ if (!devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
+ info->snd_seq, 0, info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ if (!devlink->ops->selftest_check)
+ return 0;
+
+ return devlink_nl_selftests_fill(msg, devlink,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+}
+
+const struct devlink_cmd devl_cmd_selftests_get = {
+ .dump_one = devlink_nl_cmd_selftests_get_dump_one,
+};
+
+static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
+ enum devlink_selftest_status test_status)
+{
+ struct nlattr *result_attr;
+
+ result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
+ if (!result_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
+ test_status))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, result_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, result_attr);
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
+ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
+};
+
+int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *attrs, *selftests;
+ struct sk_buff *msg;
+ void *hdr;
+ int err;
+ int i;
+
+ if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS))
+ return -EINVAL;
+
+ attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
+
+ err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
+ devlink_selftest_nl_policy, info->extack);
+ if (err < 0)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = -EMSGSIZE;
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto genlmsg_cancel;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ enum devlink_selftest_status test_status;
+
+ if (nla_get_flag(tb[i])) {
+ if (!devlink->ops->selftest_check(devlink, i,
+ info->extack)) {
+ if (devlink_selftest_result_put(msg, i,
+ DEVLINK_SELFTEST_STATUS_SKIP))
+ goto selftests_nest_cancel;
+ continue;
+ }
+
+ test_status = devlink->ops->selftest_run(devlink, i,
+ info->extack);
+ if (devlink_selftest_result_put(msg, i, test_status))
+ goto selftests_nest_cancel;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+selftests_nest_cancel:
+ nla_nest_cancel(msg, selftests);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return err;
+}
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index bdd7ad25c7e8..941174e157d4 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -139,6 +139,16 @@ devlink_dump_state(struct netlink_callback *cb)
return (struct devlink_nl_dump_state *)cb->ctx;
}
+static inline int
+devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+ if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
+ return -EMSGSIZE;
+ return 0;
+}
+
/* Commands */
extern const struct devlink_cmd devl_cmd_get;
extern const struct devlink_cmd devl_cmd_port_get;
@@ -157,6 +167,9 @@ extern const struct devlink_cmd devl_cmd_rate_get;
extern const struct devlink_cmd devl_cmd_linecard_get;
extern const struct devlink_cmd devl_cmd_selftests_get;
+/* Notify */
+void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
+
/* Ports */
int devlink_port_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr);
@@ -176,6 +189,12 @@ static inline bool devlink_reload_supported(const struct devlink_ops *ops)
return ops->reload_down && ops->reload_up;
}
+/* Resources */
+struct devlink_resource;
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info);
+
/* Line cards */
struct devlink_linecard;
@@ -183,8 +202,19 @@ struct devlink_linecard *
devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info);
/* Rates */
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack);
struct devlink_rate *
devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info);
struct devlink_rate *
devlink_rate_node_get_from_info(struct devlink *devlink,
struct genl_info *info);
+/* Devlink nl cmds */
+int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 056d9ca14a3d..97d30ea98b00 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -143,10 +143,6 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
};
-static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
- [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
-};
-
#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
WARN_ON_ONCE(!(devlink_port)->registered)
#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \
@@ -596,15 +592,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
return NULL;
}
-static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
-{
- if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
- return -EMSGSIZE;
- if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
- return -EMSGSIZE;
- return 0;
-}
-
static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
{
struct nlattr *nested_attr;
@@ -641,185 +628,6 @@ size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
+ nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
}
-struct devlink_reload_combination {
- enum devlink_reload_action action;
- enum devlink_reload_limit limit;
-};
-
-static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
- {
- /* can't reinitialize driver with no down time */
- .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- .limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
- },
-};
-
-static bool
-devlink_reload_combination_is_invalid(enum devlink_reload_action action,
- enum devlink_reload_limit limit)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
- if (devlink_reload_invalid_combinations[i].action == action &&
- devlink_reload_invalid_combinations[i].limit == limit)
- return true;
- return false;
-}
-
-static bool
-devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
-{
- return test_bit(action, &devlink->ops->reload_actions);
-}
-
-static bool
-devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
-{
- return test_bit(limit, &devlink->ops->reload_limits);
-}
-
-static int devlink_reload_stat_put(struct sk_buff *msg,
- enum devlink_reload_limit limit, u32 value)
-{
- struct nlattr *reload_stats_entry;
-
- reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
- if (!reload_stats_entry)
- return -EMSGSIZE;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
- nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
- goto nla_put_failure;
- nla_nest_end(msg, reload_stats_entry);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, reload_stats_entry);
- return -EMSGSIZE;
-}
-
-static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
-{
- struct nlattr *reload_stats_attr, *act_info, *act_stats;
- int i, j, stat_idx;
- u32 value;
-
- if (!is_remote)
- reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
- else
- reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
-
- if (!reload_stats_attr)
- return -EMSGSIZE;
-
- for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
- if ((!is_remote &&
- !devlink_reload_action_is_supported(devlink, i)) ||
- i == DEVLINK_RELOAD_ACTION_UNSPEC)
- continue;
- act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
- if (!act_info)
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
- goto action_info_nest_cancel;
- act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
- if (!act_stats)
- goto action_info_nest_cancel;
-
- for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
- /* Remote stats are shown even if not locally supported.
- * Stats of actions with unspecified limit are shown
- * though drivers don't need to register unspecified
- * limit.
- */
- if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
- !devlink_reload_limit_is_supported(devlink, j)) ||
- devlink_reload_combination_is_invalid(i, j))
- continue;
-
- stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
- if (!is_remote)
- value = devlink->stats.reload_stats[stat_idx];
- else
- value = devlink->stats.remote_reload_stats[stat_idx];
- if (devlink_reload_stat_put(msg, j, value))
- goto action_stats_nest_cancel;
- }
- nla_nest_end(msg, act_stats);
- nla_nest_end(msg, act_info);
- }
- nla_nest_end(msg, reload_stats_attr);
- return 0;
-
-action_stats_nest_cancel:
- nla_nest_cancel(msg, act_stats);
-action_info_nest_cancel:
- nla_nest_cancel(msg, act_info);
-nla_put_failure:
- nla_nest_cancel(msg, reload_stats_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- struct nlattr *dev_stats;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
- goto nla_put_failure;
-
- dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
- if (!dev_stats)
- goto nla_put_failure;
-
- if (devlink_reload_stats_put(msg, devlink, false))
- goto dev_stats_nest_cancel;
- if (devlink_reload_stats_put(msg, devlink, true))
- goto dev_stats_nest_cancel;
-
- nla_nest_end(msg, dev_stats);
- genlmsg_end(msg, hdr);
- return 0;
-
-dev_stats_nest_cancel:
- nla_nest_cancel(msg, dev_stats);
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
- WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
static int devlink_nl_port_attrs_put(struct sk_buff *msg,
struct devlink_port *devlink_port)
{
@@ -1274,39 +1082,6 @@ devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
return false;
}
-static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
-}
-
-const struct devlink_cmd devl_cmd_get = {
- .dump_one = devlink_nl_cmd_get_dump_one,
-};
-
static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -3064,85 +2839,8 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- const struct devlink_ops *ops = devlink->ops;
- enum devlink_eswitch_encap_mode encap_mode;
- u8 inline_mode;
- void *hdr;
- int err = 0;
- u16 mode;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = devlink_nl_put_handle(msg, devlink);
- if (err)
- goto nla_put_failure;
-
- if (ops->eswitch_mode_get) {
- err = ops->eswitch_mode_get(devlink, &mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
- if (err)
- goto nla_put_failure;
- }
-
- if (ops->eswitch_inline_mode_get) {
- err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
- inline_mode);
- if (err)
- goto nla_put_failure;
- }
-
- if (ops->eswitch_encap_mode_get) {
- err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
- if (err)
- goto nla_put_failure;
- }
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET,
- info->snd_portid, info->snd_seq, 0);
-
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
- struct netlink_ext_ack *extack)
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
{
struct devlink_rate *devlink_rate;
@@ -3154,52 +2852,6 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
return 0;
}
-static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const struct devlink_ops *ops = devlink->ops;
- enum devlink_eswitch_encap_mode encap_mode;
- u8 inline_mode;
- int err = 0;
- u16 mode;
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
- if (!ops->eswitch_mode_set)
- return -EOPNOTSUPP;
- mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
- err = devlink_rate_nodes_check(devlink, mode, info->extack);
- if (err)
- return err;
- err = ops->eswitch_mode_set(devlink, mode, info->extack);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
- if (!ops->eswitch_inline_mode_set)
- return -EOPNOTSUPP;
- inline_mode = nla_get_u8(
- info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
- err = ops->eswitch_inline_mode_set(devlink, inline_mode,
- info->extack);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
- if (!ops->eswitch_encap_mode_set)
- return -EOPNOTSUPP;
- encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
- err = ops->eswitch_encap_mode_set(devlink, encap_mode,
- info->extack);
- if (err)
- return err;
- }
-
- return 0;
-}
-
int devlink_dpipe_match_put(struct sk_buff *skb,
struct devlink_dpipe_match *match)
{
@@ -4170,10 +3822,9 @@ static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
}
-static int
-devlink_resources_validate(struct devlink *devlink,
- struct devlink_resource *resource,
- struct genl_info *info)
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info)
{
struct list_head *resource_list;
int err = 0;
@@ -4193,698 +3844,6 @@ devlink_resources_validate(struct devlink *devlink,
return err;
}
-static struct net *devlink_netns_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
- struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
- struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
- struct net *net;
-
- if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
- NL_SET_ERR_MSG_MOD(info->extack, "multiple netns identifying attributes specified");
- return ERR_PTR(-EINVAL);
- }
-
- if (netns_pid_attr) {
- net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
- } else if (netns_fd_attr) {
- net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
- } else if (netns_id_attr) {
- net = get_net_ns_by_id(sock_net(skb->sk),
- nla_get_u32(netns_id_attr));
- if (!net)
- net = ERR_PTR(-EINVAL);
- } else {
- WARN_ON(1);
- net = ERR_PTR(-EINVAL);
- }
- if (IS_ERR(net)) {
- NL_SET_ERR_MSG_MOD(info->extack, "Unknown network namespace");
- return ERR_PTR(-EINVAL);
- }
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
- put_net(net);
- return ERR_PTR(-EPERM);
- }
- return net;
-}
-
-static void devlink_reload_netns_change(struct devlink *devlink,
- struct net *curr_net,
- struct net *dest_net)
-{
- /* Userspace needs to be notified about devlink objects
- * removed from original and entering new network namespace.
- * The rest of the devlink objects are re-created during
- * reload process so the notifications are generated separatelly.
- */
- devlink_notify_unregister(devlink);
- move_netdevice_notifier_net(curr_net, dest_net,
- &devlink->netdevice_nb);
- write_pnet(&devlink->_net, dest_net);
- devlink_notify_register(devlink);
-}
-
-static void devlink_reload_failed_set(struct devlink *devlink,
- bool reload_failed)
-{
- if (devlink->reload_failed == reload_failed)
- return;
- devlink->reload_failed = reload_failed;
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-}
-
-bool devlink_is_reload_failed(const struct devlink *devlink)
-{
- return devlink->reload_failed;
-}
-EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
-
-static void
-__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
- enum devlink_reload_limit limit, u32 actions_performed)
-{
- unsigned long actions = actions_performed;
- int stat_idx;
- int action;
-
- for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
- stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
- reload_stats[stat_idx]++;
- }
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-}
-
-static void
-devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
- u32 actions_performed)
-{
- __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
- actions_performed);
-}
-
-/**
- * devlink_remote_reload_actions_performed - Update devlink on reload actions
- * performed which are not a direct result of devlink reload call.
- *
- * This should be called by a driver after performing reload actions in case it was not
- * a result of devlink reload call. For example fw_activate was performed as a result
- * of devlink reload triggered fw_activate on another host.
- * The motivation for this function is to keep data on reload actions performed on this
- * function whether it was done due to direct devlink reload call or not.
- *
- * @devlink: devlink
- * @limit: reload limit
- * @actions_performed: bitmask of actions performed
- */
-void devlink_remote_reload_actions_performed(struct devlink *devlink,
- enum devlink_reload_limit limit,
- u32 actions_performed)
-{
- if (WARN_ON(!actions_performed ||
- actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
- actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
- limit > DEVLINK_RELOAD_LIMIT_MAX))
- return;
-
- __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
- actions_performed);
-}
-EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
-
-int devlink_reload(struct devlink *devlink, struct net *dest_net,
- enum devlink_reload_action action,
- enum devlink_reload_limit limit,
- u32 *actions_performed, struct netlink_ext_ack *extack)
-{
- u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
- struct net *curr_net;
- int err;
-
- memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
- sizeof(remote_reload_stats));
-
- err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
- if (err)
- return err;
-
- curr_net = devlink_net(devlink);
- if (dest_net && !net_eq(dest_net, curr_net))
- devlink_reload_netns_change(devlink, curr_net, dest_net);
-
- err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
- devlink_reload_failed_set(devlink, !!err);
- if (err)
- return err;
-
- WARN_ON(!(*actions_performed & BIT(action)));
- /* Catch driver on updating the remote action within devlink reload */
- WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
- sizeof(remote_reload_stats)));
- devlink_reload_stats_update(devlink, limit, *actions_performed);
- return 0;
-}
-
-static int
-devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
- enum devlink_command cmd, struct genl_info *info)
-{
- struct sk_buff *msg;
- void *hdr;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
- if (!hdr)
- goto free_msg;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
- actions_performed))
- goto nla_put_failure;
- genlmsg_end(msg, hdr);
-
- return genlmsg_reply(msg, info);
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
-free_msg:
- nlmsg_free(msg);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- enum devlink_reload_action action;
- enum devlink_reload_limit limit;
- struct net *dest_net = NULL;
- u32 actions_performed;
- int err;
-
- err = devlink_resources_validate(devlink, NULL, info);
- if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
- action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
- else
- action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
-
- if (!devlink_reload_action_is_supported(devlink, action)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested reload action is not supported by the driver");
- return -EOPNOTSUPP;
- }
-
- limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
- if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
- struct nla_bitfield32 limits;
- u32 limits_selected;
-
- limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
- limits_selected = limits.value & limits.selector;
- if (!limits_selected) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
- return -EINVAL;
- }
- for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
- if (limits_selected & BIT(limit))
- break;
- /* UAPI enables multiselection, but currently it is not used */
- if (limits_selected != BIT(limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Multiselection of limit is not supported");
- return -EOPNOTSUPP;
- }
- if (!devlink_reload_limit_is_supported(devlink, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is not supported by the driver");
- return -EOPNOTSUPP;
- }
- if (devlink_reload_combination_is_invalid(action, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is invalid for this action");
- return -EINVAL;
- }
- }
- if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
- info->attrs[DEVLINK_ATTR_NETNS_FD] ||
- info->attrs[DEVLINK_ATTR_NETNS_ID]) {
- dest_net = devlink_netns_get(skb, info);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
- }
-
- err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
-
- if (dest_net)
- put_net(dest_net);
-
- if (err)
- return err;
- /* For backward compatibility generate reply only if attributes used by user */
- if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
- return 0;
-
- return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
- DEVLINK_CMD_RELOAD, info);
-}
-
-static int devlink_nl_flash_update_fill(struct sk_buff *msg,
- struct devlink *devlink,
- enum devlink_command cmd,
- struct devlink_flash_notify *params)
-{
- void *hdr;
-
- hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
- goto out;
-
- if (params->status_msg &&
- nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
- params->status_msg))
- goto nla_put_failure;
- if (params->component &&
- nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
- params->component))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
- params->done, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
- params->total, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
- params->timeout, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
-out:
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void __devlink_flash_update_notify(struct devlink *devlink,
- enum devlink_command cmd,
- struct devlink_flash_notify *params)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
- cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
- cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_flash_update_fill(msg, devlink, cmd, params);
- if (err)
- goto out_free_msg;
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
- return;
-
-out_free_msg:
- nlmsg_free(msg);
-}
-
-static void devlink_flash_update_begin_notify(struct devlink *devlink)
-{
- struct devlink_flash_notify params = {};
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE,
- &params);
-}
-
-static void devlink_flash_update_end_notify(struct devlink *devlink)
-{
- struct devlink_flash_notify params = {};
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_END,
- &params);
-}
-
-void devlink_flash_update_status_notify(struct devlink *devlink,
- const char *status_msg,
- const char *component,
- unsigned long done,
- unsigned long total)
-{
- struct devlink_flash_notify params = {
- .status_msg = status_msg,
- .component = component,
- .done = done,
- .total = total,
- };
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_STATUS,
- &params);
-}
-EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
-
-void devlink_flash_update_timeout_notify(struct devlink *devlink,
- const char *status_msg,
- const char *component,
- unsigned long timeout)
-{
- struct devlink_flash_notify params = {
- .status_msg = status_msg,
- .component = component,
- .timeout = timeout,
- };
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_STATUS,
- &params);
-}
-EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
-
-struct devlink_info_req {
- struct sk_buff *msg;
- void (*version_cb)(const char *version_name,
- enum devlink_info_version_type version_type,
- void *version_cb_priv);
- void *version_cb_priv;
-};
-
-struct devlink_flash_component_lookup_ctx {
- const char *lookup_name;
- bool lookup_name_found;
-};
-
-static void
-devlink_flash_component_lookup_cb(const char *version_name,
- enum devlink_info_version_type version_type,
- void *version_cb_priv)
-{
- struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
-
- if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
- lookup_ctx->lookup_name_found)
- return;
-
- lookup_ctx->lookup_name_found =
- !strcmp(lookup_ctx->lookup_name, version_name);
-}
-
-static int devlink_flash_component_get(struct devlink *devlink,
- struct nlattr *nla_component,
- const char **p_component,
- struct netlink_ext_ack *extack)
-{
- struct devlink_flash_component_lookup_ctx lookup_ctx = {};
- struct devlink_info_req req = {};
- const char *component;
- int ret;
-
- if (!nla_component)
- return 0;
-
- component = nla_data(nla_component);
-
- if (!devlink->ops->info_get) {
- NL_SET_ERR_MSG_ATTR(extack, nla_component,
- "component update is not supported by this device");
- return -EOPNOTSUPP;
- }
-
- lookup_ctx.lookup_name = component;
- req.version_cb = devlink_flash_component_lookup_cb;
- req.version_cb_priv = &lookup_ctx;
-
- ret = devlink->ops->info_get(devlink, &req, NULL);
- if (ret)
- return ret;
-
- if (!lookup_ctx.lookup_name_found) {
- NL_SET_ERR_MSG_ATTR(extack, nla_component,
- "selected component is not supported by this device");
- return -EINVAL;
- }
- *p_component = component;
- return 0;
-}
-
-static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *nla_overwrite_mask, *nla_file_name;
- struct devlink_flash_update_params params = {};
- struct devlink *devlink = info->user_ptr[0];
- const char *file_name;
- u32 supported_params;
- int ret;
-
- if (!devlink->ops->flash_update)
- return -EOPNOTSUPP;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME))
- return -EINVAL;
-
- ret = devlink_flash_component_get(devlink,
- info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
- &params.component, info->extack);
- if (ret)
- return ret;
-
- supported_params = devlink->ops->supported_flash_update_params;
-
- nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
- if (nla_overwrite_mask) {
- struct nla_bitfield32 sections;
-
- if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
- "overwrite settings are not supported by this device");
- return -EOPNOTSUPP;
- }
- sections = nla_get_bitfield32(nla_overwrite_mask);
- params.overwrite_mask = sections.value & sections.selector;
- }
-
- nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME];
- file_name = nla_data(nla_file_name);
- ret = request_firmware(&params.fw, file_name, devlink->dev);
- if (ret) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name, "failed to locate the requested firmware file");
- return ret;
- }
-
- devlink_flash_update_begin_notify(devlink);
- ret = devlink->ops->flash_update(devlink, &params, info->extack);
- devlink_flash_update_end_notify(devlink);
-
- release_firmware(params.fw);
-
- return ret;
-}
-
-static int
-devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
- u32 portid, u32 seq, int flags,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *selftests;
- void *hdr;
- int err;
- int i;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
- DEVLINK_CMD_SELFTESTS_GET);
- if (!hdr)
- return -EMSGSIZE;
-
- err = -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto err_cancel_msg;
-
- selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
- if (!selftests)
- goto err_cancel_msg;
-
- for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
- i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
- if (devlink->ops->selftest_check(devlink, i, extack)) {
- err = nla_put_flag(msg, i);
- if (err)
- goto err_cancel_msg;
- }
- }
-
- nla_nest_end(msg, selftests);
- genlmsg_end(msg, hdr);
- return 0;
-
-err_cancel_msg:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- if (!devlink->ops->selftest_check)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
- info->snd_seq, 0, info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- if (!devlink->ops->selftest_check)
- return 0;
-
- return devlink_nl_selftests_fill(msg, devlink,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->extack);
-}
-
-const struct devlink_cmd devl_cmd_selftests_get = {
- .dump_one = devlink_nl_cmd_selftests_get_dump_one,
-};
-
-static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
- enum devlink_selftest_status test_status)
-{
- struct nlattr *result_attr;
-
- result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
- if (!result_attr)
- return -EMSGSIZE;
-
- if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
- nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
- test_status))
- goto nla_put_failure;
-
- nla_nest_end(skb, result_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, result_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_selftests_run(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
- struct devlink *devlink = info->user_ptr[0];
- struct nlattr *attrs, *selftests;
- struct sk_buff *msg;
- void *hdr;
- int err;
- int i;
-
- if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
- return -EOPNOTSUPP;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS))
- return -EINVAL;
-
- attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
-
- err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
- devlink_selftest_nl_policy, info->extack);
- if (err < 0)
- return err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = -EMSGSIZE;
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
- &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
- if (!hdr)
- goto free_msg;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
- if (!selftests)
- goto genlmsg_cancel;
-
- for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
- i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
- enum devlink_selftest_status test_status;
-
- if (nla_get_flag(tb[i])) {
- if (!devlink->ops->selftest_check(devlink, i,
- info->extack)) {
- if (devlink_selftest_result_put(msg, i,
- DEVLINK_SELFTEST_STATUS_SKIP))
- goto selftests_nest_cancel;
- continue;
- }
-
- test_status = devlink->ops->selftest_run(devlink, i,
- info->extack);
- if (devlink_selftest_result_put(msg, i, test_status))
- goto selftests_nest_cancel;
- }
- }
-
- nla_nest_end(msg, selftests);
- genlmsg_end(msg, hdr);
- return genlmsg_reply(msg, info);
-
-selftests_nest_cancel:
- nla_nest_cancel(msg, selftests);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
-free_msg:
- nlmsg_free(msg);
- return err;
-}
-
static const struct devlink_param devlink_param_generic[] = {
{
.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -6430,205 +5389,6 @@ out_unlock:
return err;
}
-int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
-}
-EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
-
-int devlink_info_board_serial_number_put(struct devlink_info_req *req,
- const char *bsn)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
- bsn);
-}
-EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
-
-static int devlink_info_version_put(struct devlink_info_req *req, int attr,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- struct nlattr *nest;
- int err;
-
- if (req->version_cb)
- req->version_cb(version_name, version_type,
- req->version_cb_priv);
-
- if (!req->msg)
- return 0;
-
- nest = nla_nest_start_noflag(req->msg, attr);
- if (!nest)
- return -EMSGSIZE;
-
- err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
- version_name);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
- version_value);
- if (err)
- goto nla_put_failure;
-
- nla_nest_end(req->msg, nest);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(req->msg, nest);
- return err;
-}
-
-int devlink_info_version_fixed_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
-
-int devlink_info_version_stored_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
-
-int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value,
- version_type);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
-
-int devlink_info_version_running_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
-
-int devlink_info_version_running_put_ext(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value,
- version_type);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
-
-static int devlink_nl_driver_info_get(struct device_driver *drv,
- struct devlink_info_req *req)
-{
- if (!drv)
- return 0;
-
- if (drv->name[0])
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
- drv->name);
-
- return 0;
-}
-
-static int
-devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags, struct netlink_ext_ack *extack)
-{
- struct device *dev = devlink_to_dev(devlink);
- struct devlink_info_req req = {};
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto err_cancel_msg;
-
- req.msg = msg;
- if (devlink->ops->info_get) {
- err = devlink->ops->info_get(devlink, &req, extack);
- if (err)
- goto err_cancel_msg;
- }
-
- err = devlink_nl_driver_info_get(dev->driver, &req);
- if (err)
- goto err_cancel_msg;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-err_cancel_msg:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- int err;
-
- err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->extack);
- if (err == -EOPNOTSUPP)
- err = 0;
- return err;
-}
-
-const struct devlink_cmd devl_cmd_info_get = {
- .dump_one = devlink_nl_cmd_info_get_dump_one,
-};
-
struct devlink_fmsg_item {
struct list_head list;
int attrtype;
@@ -9257,35 +8017,6 @@ const struct genl_small_ops devlink_nl_ops[56] = {
/* -- No new ops here! Use split ops going forward! -- */
};
-bool devlink_reload_actions_valid(const struct devlink_ops *ops)
-{
- const struct devlink_reload_combination *comb;
- int i;
-
- if (!devlink_reload_supported(ops)) {
- if (WARN_ON(ops->reload_actions))
- return false;
- return true;
- }
-
- if (WARN_ON(!ops->reload_actions ||
- ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
- ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
- return false;
-
- if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
- ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
- return false;
-
- for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) {
- comb = &devlink_reload_invalid_combinations[i];
- if (ops->reload_actions == BIT(comb->action) &&
- ops->reload_limits == BIT(comb->limit))
- return false;
- }
- return true;
-}
-
static void
devlink_trap_policer_notify(struct devlink *devlink,
const struct devlink_trap_policer_item *policer_item,
@@ -12068,85 +10799,6 @@ devl_trap_policers_unregister(struct devlink *devlink,
}
EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
-static void __devlink_compat_running_version(struct devlink *devlink,
- char *buf, size_t len)
-{
- struct devlink_info_req req = {};
- const struct nlattr *nlattr;
- struct sk_buff *msg;
- int rem, err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- req.msg = msg;
- err = devlink->ops->info_get(devlink, &req, NULL);
- if (err)
- goto free_msg;
-
- nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
- const struct nlattr *kv;
- int rem_kv;
-
- if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
- continue;
-
- nla_for_each_nested(kv, nlattr, rem_kv) {
- if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
- continue;
-
- strlcat(buf, nla_data(kv), len);
- strlcat(buf, " ", len);
- }
- }
-free_msg:
- nlmsg_free(msg);
-}
-
-void devlink_compat_running_version(struct devlink *devlink,
- char *buf, size_t len)
-{
- if (!devlink->ops->info_get)
- return;
-
- devl_lock(devlink);
- if (devl_is_registered(devlink))
- __devlink_compat_running_version(devlink, buf, len);
- devl_unlock(devlink);
-}
-
-int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
-{
- struct devlink_flash_update_params params = {};
- int ret;
-
- devl_lock(devlink);
- if (!devl_is_registered(devlink)) {
- ret = -ENODEV;
- goto out_unlock;
- }
-
- if (!devlink->ops->flash_update) {
- ret = -EOPNOTSUPP;
- goto out_unlock;
- }
-
- ret = request_firmware(&params.fw, file_name, devlink->dev);
- if (ret)
- goto out_unlock;
-
- devlink_flash_update_begin_notify(devlink);
- ret = devlink->ops->flash_update(devlink, &params, NULL);
- devlink_flash_update_end_notify(devlink);
-
- release_firmware(params.fw);
-out_unlock:
- devl_unlock(devlink);
-
- return ret;
-}
-
int devlink_compat_phys_port_name_get(struct net_device *dev,
char *name, size_t len)
{
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 26c458f50ac6..6957971c2db2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2692,7 +2692,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (!err)
dsa_bridge_mtu_normalization(dp);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_WEAK_MOD(extack, "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
@@ -2705,8 +2706,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
err = dsa_port_lag_join(dp, info->upper_dev,
info->upper_info, extack);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(info->info.extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
@@ -2718,8 +2719,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (info->linking) {
err = dsa_port_hsr_join(dp, info->upper_dev);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(info->info.extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 006c1f0ed8b4..94df935ee0c5 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ int raw_hash_sk(struct sock *sk)
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_nulls_head *hlist;
- hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
+ hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)];
spin_lock(&h->lock);
__sk_nulls_add_node_rcu(sk, hlist);
@@ -160,9 +160,9 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
* RFC 1122: SHOULD pass TOS value up to the transport layer.
* -> It does. And not only TOS, but all IP header.
*/
-static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
+static int raw_v4_input(struct net *net, struct sk_buff *skb,
+ const struct iphdr *iph, int hash)
{
- struct net *net = dev_net(skb->dev);
struct hlist_nulls_head *hlist;
struct hlist_nulls_node *hnode;
int sdif = inet_sdif(skb);
@@ -193,9 +193,10 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
int raw_local_deliver(struct sk_buff *skb, int protocol)
{
- int hash = protocol & (RAW_HTABLE_SIZE - 1);
+ struct net *net = dev_net(skb->dev);
- return raw_v4_input(skb, ip_hdr(skb), hash);
+ return raw_v4_input(net, skb, ip_hdr(skb),
+ raw_hashfunc(net, protocol));
}
static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
@@ -271,7 +272,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
struct sock *sk;
int hash;
- hash = protocol & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, protocol);
hlist = &raw_v4_hashinfo.ht[hash];
rcu_read_lock();
@@ -287,11 +288,13 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
/* Charge it to the socket. */
ipv4_pktinfo_prepare(sk, skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -302,7 +305,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
nf_reset_ct(skb);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ada087b50541..bac9ba747bde 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
- hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
@@ -338,7 +338,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
struct sock *sk;
int hash;
- hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
@@ -355,17 +355,19 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
/* Charge it to the socket. */
skb_dst_drop(skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -386,7 +388,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
@@ -410,7 +412,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (inet->hdrincl) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1c0fe9ba5358..b163266e581a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -502,7 +502,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link,
return -EINVAL;
/* protect against parallel smcr_link_reg_buf() */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
@@ -510,7 +510,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link,
if (rc)
break;
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
return rc;
}
@@ -519,15 +519,30 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
{
struct smc_link_group *lgr = link->lgr;
+ bool do_slow = false;
int i, rc = 0;
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (rc)
return rc;
+
+ down_read(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_active(&lgr->lnk[i]))
+ continue;
+ if (!rmb_desc->is_reg_mr[link->link_idx]) {
+ up_read(&lgr->llc_conf_mutex);
+ goto slow_path;
+ }
+ }
+ /* mr register already */
+ goto fast_path;
+slow_path:
+ do_slow = true;
/* protect against parallel smc_llc_cli_rkey_exchange() and
* parallel smcr_link_reg_buf()
*/
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
@@ -535,7 +550,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
if (rc)
goto out;
}
-
+fast_path:
/* exchange confirm_rkey msg with peer */
rc = smc_llc_do_confirm_rkey(link, rmb_desc);
if (rc) {
@@ -544,7 +559,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
}
rmb_desc->is_conf_rkey = true;
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ do_slow ? up_write(&lgr->llc_conf_mutex) : up_read(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
return rc;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 7642b16c41d1..b330a1fa453e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -854,8 +854,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
- mutex_init(&lgr->sndbufs_lock);
- mutex_init(&lgr->rmbs_lock);
+ init_rwsem(&lgr->sndbufs_lock);
+ init_rwsem(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
@@ -1098,7 +1098,7 @@ err_out:
static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link_group *lgr)
{
- struct mutex *lock; /* lock buffer list */
+ struct rw_semaphore *lock; /* lock buffer list */
int rc;
if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
@@ -1106,10 +1106,10 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (!rc) {
/* protect against smc_llc_cli_rkey_exchange() */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_read(&lgr->llc_conf_mutex);
smc_llc_do_delete_rkey(lgr, buf_desc);
buf_desc->is_conf_rkey = false;
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_read(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
}
@@ -1118,9 +1118,9 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
/* buf registration failed, reuse not possible */
lock = is_rmb ? &lgr->rmbs_lock :
&lgr->sndbufs_lock;
- mutex_lock(lock);
+ down_write(lock);
list_del(&buf_desc->list);
- mutex_unlock(lock);
+ up_write(lock);
smc_buf_free(lgr, is_rmb, buf_desc);
} else {
@@ -1224,15 +1224,16 @@ static void smcr_buf_unmap_lgr(struct smc_link *lnk)
int i;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
smcr_buf_unmap_link(buf_desc, true, lnk);
- mutex_unlock(&lgr->rmbs_lock);
- mutex_lock(&lgr->sndbufs_lock);
+ up_write(&lgr->rmbs_lock);
+
+ down_write(&lgr->sndbufs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
list)
smcr_buf_unmap_link(buf_desc, false, lnk);
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
}
}
@@ -1377,12 +1378,12 @@ static void smc_lgr_free(struct smc_link_group *lgr)
int i;
if (!lgr->is_smcd) {
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_UNUSED)
smcr_link_clear(&lgr->lnk[i], false);
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
smc_llc_lgr_clear(lgr);
}
@@ -1696,12 +1697,12 @@ static void smcr_link_down(struct smc_link *lnk)
} else {
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* another llc task is ongoing */
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
wait_event_timeout(lgr->llc_flow_waiter,
(list_empty(&lgr->list) ||
lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
SMC_LLC_WAIT_TIME);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
}
if (!list_empty(&lgr->list)) {
smc_llc_send_delete_link(to_lnk, del_link_id,
@@ -1761,9 +1762,9 @@ static void smc_link_down_work(struct work_struct *work)
if (list_empty(&lgr->list))
return;
wake_up_all(&lgr->llc_msg_waiter);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
smcr_link_down(link);
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
}
static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
@@ -1990,19 +1991,19 @@ int smc_uncompress_bufsize(u8 compressed)
* buffer size; if not available, return NULL
*/
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
- struct mutex *lock,
+ struct rw_semaphore *lock,
struct list_head *buf_list)
{
struct smc_buf_desc *buf_slot;
- mutex_lock(lock);
+ down_read(lock);
list_for_each_entry(buf_slot, buf_list, list) {
if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
- mutex_unlock(lock);
+ up_read(lock);
return buf_slot;
}
}
- mutex_unlock(lock);
+ up_read(lock);
return NULL;
}
@@ -2111,13 +2112,13 @@ int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
return 0;
}
-static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
+static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock,
struct list_head *lst, bool is_rmb)
{
struct smc_buf_desc *buf_desc, *bf;
int rc = 0;
- mutex_lock(lock);
+ down_write(lock);
list_for_each_entry_safe(buf_desc, bf, lst, list) {
if (!buf_desc->used)
continue;
@@ -2126,7 +2127,7 @@ static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
goto out;
}
out:
- mutex_unlock(lock);
+ up_write(lock);
return rc;
}
@@ -2159,37 +2160,37 @@ int smcr_buf_reg_lgr(struct smc_link *lnk)
int i, rc = 0;
/* reg all RMBs for a new link */
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
if (!buf_desc->used)
continue;
rc = smcr_link_reg_buf(lnk, buf_desc);
if (rc) {
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
}
}
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
return rc;
/* reg all vzalloced sndbufs for a new link */
- mutex_lock(&lgr->sndbufs_lock);
+ down_write(&lgr->sndbufs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
if (!buf_desc->used || !buf_desc->is_vm)
continue;
rc = smcr_link_reg_buf(lnk, buf_desc);
if (rc) {
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
return rc;
}
}
}
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
return rc;
}
@@ -2247,7 +2248,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
int i, rc = 0, cnt = 0;
/* protect against parallel link reconfiguration */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_read(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
@@ -2260,7 +2261,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
cnt++;
}
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_read(&lgr->llc_conf_mutex);
if (!rc && !cnt)
rc = -EINVAL;
return rc;
@@ -2309,8 +2310,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
int bufsize, bufsize_short;
+ struct rw_semaphore *lock; /* lock buffer list */
bool is_dgraded = false;
- struct mutex *lock; /* lock buffer list */
int sk_buf_size;
if (is_rmb)
@@ -2358,9 +2359,9 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
buf_desc->used = 1;
- mutex_lock(lock);
+ down_write(lock);
list_add(&buf_desc->list, buf_list);
- mutex_unlock(lock);
+ up_write(lock);
break; /* found */
}
@@ -2434,9 +2435,9 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
/* create rmb */
rc = __smc_buf_create(smc, is_smcd, true);
if (rc) {
- mutex_lock(&smc->conn.lgr->sndbufs_lock);
+ down_write(&smc->conn.lgr->sndbufs_lock);
list_del(&smc->conn.sndbuf_desc->list);
- mutex_unlock(&smc->conn.lgr->sndbufs_lock);
+ up_write(&smc->conn.lgr->sndbufs_lock);
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
smc->conn.sndbuf_desc = NULL;
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 285f9bd8e232..08b457c2d294 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -252,9 +252,9 @@ struct smc_link_group {
unsigned short vlan_id; /* vlan id of link group */
struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
- struct mutex sndbufs_lock; /* protects tx buffers */
+ struct rw_semaphore sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
- struct mutex rmbs_lock; /* protects rx buffers */
+ struct rw_semaphore rmbs_lock; /* protects rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
@@ -298,7 +298,7 @@ struct smc_link_group {
/* queue for llc events */
spinlock_t llc_event_q_lock;
/* protects llc_event_q */
- struct mutex llc_conf_mutex;
+ struct rw_semaphore llc_conf_mutex;
/* protects lgr reconfig. */
struct work_struct llc_add_link_work;
struct work_struct llc_del_link_work;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 524649d0ab65..a0840b8c935b 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -608,7 +608,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
prim_lnk_idx = link->link_idx;
lnk_idx = link_new->link_idx;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
ext->num_rkeys = lgr->conns_num;
if (!ext->num_rkeys)
goto out;
@@ -628,7 +628,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
}
len += i * sizeof(ext->rt[0]);
out:
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return len;
}
@@ -889,7 +889,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link,
int rc = 0;
int i;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
num_rkeys_send = lgr->conns_num;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
do {
@@ -916,7 +916,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link,
break;
} while (num_rkeys_send || num_rkeys_recv);
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
@@ -999,14 +999,14 @@ static void smc_llc_save_add_link_rkeys(struct smc_link *link,
ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 +
SMC_WR_TX_SIZE);
max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
for (i = 0; i < max; i++) {
smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
ext->rt[i].rmb_key,
ext->rt[i].rmb_vaddr_new,
ext->rt[i].rmb_key_new);
}
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
}
static void smc_llc_save_add_link_info(struct smc_link *link,
@@ -1202,12 +1202,12 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
if (smc_llc_is_local_add_link(&qentry->msg))
smc_llc_cli_add_link_invite(qentry->link, qentry);
else
smc_llc_cli_add_link(qentry->link, qentry);
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
}
static int smc_llc_active_link_count(struct smc_link_group *lgr)
@@ -1313,7 +1313,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link,
int rc = 0;
int i;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
num_rkeys_send = lgr->conns_num;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
do {
@@ -1338,7 +1338,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link,
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
} while (num_rkeys_send || num_rkeys_recv);
out:
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
@@ -1509,13 +1509,13 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
rc = smc_llc_srv_add_link(link, qentry);
if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
/* delete any asymmetric link */
smc_llc_delete_asym_link(lgr);
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
kfree(qentry);
}
@@ -1582,7 +1582,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_lgr_terminate_sched(lgr);
goto out;
}
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
/* delete single link */
for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) {
if (lgr->lnk[lnk_idx].link_id != del_llc->link_num)
@@ -1616,7 +1616,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_lgr_terminate_sched(lgr);
}
out_unlock:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
out:
kfree(qentry);
}
@@ -1652,7 +1652,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
int active_links;
int i;
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
lnk = qentry->link;
del_llc = &qentry->msg.delete_link;
@@ -1708,7 +1708,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
smc_llc_add_link_local(lnk);
}
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
kfree(qentry);
}
@@ -2126,7 +2126,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
spin_lock_init(&lgr->llc_flow_lock);
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
- mutex_init(&lgr->llc_conf_mutex);
+ init_rwsem(&lgr->llc_conf_mutex);
lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time);
}
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 453ae006fbcf..91201ab3c4fc 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -4,6 +4,7 @@ TEST_PROGS = bridge_igmp.sh \
bridge_locked_port.sh \
bridge_mdb.sh \
bridge_mdb_host.sh \
+ bridge_mdb_max.sh \
bridge_mdb_port_down.sh \
bridge_mld.sh \
bridge_port_isolation.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 2fa5973c0c28..b48867d8cadf 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -1018,26 +1018,6 @@ fwd_test()
ip -6 address del fe80::1/64 dev br0
}
-igmpv3_is_in_get()
-{
- local igmpv3
-
- igmpv3=$(:
- )"22:"$( : Type - Membership Report
- )"00:"$( : Reserved
- )"2a:f8:"$( : Checksum
- )"00:00:"$( : Reserved
- )"00:01:"$( : Number of Group Records
- )"01:"$( : Record Type - IS_IN
- )"00:"$( : Aux Data Len
- )"00:01:"$( : Number of Sources
- )"ef:01:01:01:"$( : Multicast Address - 239.1.1.1
- )"c0:00:02:02"$( : Source Address - 192.0.2.2
- )
-
- echo $igmpv3
-}
-
ctrl_igmpv3_is_in_test()
{
RET=0
@@ -1049,7 +1029,7 @@ ctrl_igmpv3_is_in_test()
# IS_IN ( 192.0.2.2 )
$MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
- -t ip proto=2,p=$(igmpv3_is_in_get) -q
+ -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
check_fail $? "Permanent entry affected by IGMP packet"
@@ -1062,7 +1042,7 @@ ctrl_igmpv3_is_in_test()
# IS_IN ( 192.0.2.2 )
$MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
- -t ip proto=2,p=$(igmpv3_is_in_get) -q
+ -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
grep -q 192.0.2.2
@@ -1074,36 +1054,7 @@ ctrl_igmpv3_is_in_test()
bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10
- log_test "IGMPv3 MODE_IS_INCLUE tests"
-}
-
-mldv2_is_in_get()
-{
- local hbh
- local icmpv6
-
- hbh=$(:
- )"3a:"$( : Next Header - ICMPv6
- )"00:"$( : Hdr Ext Len
- )"00:00:00:00:00:00:"$( : Options and Padding
- )
-
- icmpv6=$(:
- )"8f:"$( : Type - MLDv2 Report
- )"00:"$( : Code
- )"45:39:"$( : Checksum
- )"00:00:"$( : Reserved
- )"00:01:"$( : Number of Group Records
- )"01:"$( : Record Type - IS_IN
- )"00:"$( : Aux Data Len
- )"00:01:"$( : Number of Sources
- )"ff:0e:00:00:00:00:00:00:"$( : Multicast address - ff0e::1
- )"00:00:00:00:00:00:00:01:"$( :
- )"20:01:0d:b8:00:01:00:00:"$( : Source Address - 2001:db8:1::2
- )"00:00:00:00:00:00:00:02:"$( :
- )
-
- echo ${hbh}${icmpv6}
+ log_test "IGMPv3 MODE_IS_INCLUDE tests"
}
ctrl_mldv2_is_in_test()
@@ -1116,8 +1067,9 @@ ctrl_mldv2_is_in_test()
filter_mode include source_list 2001:db8:1::1
# IS_IN ( 2001:db8:1::2 )
+ local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
$MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
- -t ip hop=1,next=0,p=$(mldv2_is_in_get) -q
+ -t ip hop=1,next=0,p="$p" -q
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
grep -q 2001:db8:1::2
@@ -1131,7 +1083,7 @@ ctrl_mldv2_is_in_test()
# IS_IN ( 2001:db8:1::2 )
$MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
- -t ip hop=1,next=0,p=$(mldv2_is_in_get) -q
+ -t ip hop=1,next=0,p="$p" -q
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
grep -q 2001:db8:1::2
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
new file mode 100755
index 000000000000..ae255b662ba3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -0,0 +1,1336 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.10 | | + $h2.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# | | | | | |
+# | | + $h1.20 | | | + $h2.20 |
+# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 |
+# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 |
+# | \| | | \| |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR0 (802.1q) + $swp2 | |
+# | | vid 10 vid 10 | |
+# | | vid 20 vid 20 | |
+# | | | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_8021d
+ test_8021q
+ test_8021qvs
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 20
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.2/28
+ vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+switch_create_8021d()
+{
+ log_info "802.1d tests"
+
+ ip link add name br0 type bridge vlan_filtering 0 \
+ mcast_snooping 1 \
+ mcast_igmp_version 3 mcast_mld_version 2
+ ip link set dev br0 up
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp1 up
+ bridge link set dev $swp1 fastleave on
+
+ ip link set dev $swp2 master br0
+ ip link set dev $swp2 up
+}
+
+switch_create_8021q()
+{
+ local br_flags=$1; shift
+
+ log_info "802.1q $br_flags${br_flags:+ }tests"
+
+ ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 1 $br_flags \
+ mcast_igmp_version 3 mcast_mld_version 2
+ bridge vlan add vid 10 dev br0 self
+ bridge vlan add vid 20 dev br0 self
+ ip link set dev br0 up
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp1 up
+ bridge link set dev $swp1 fastleave on
+ bridge vlan add vid 10 dev $swp1
+ bridge vlan add vid 20 dev $swp1
+
+ ip link set dev $swp2 master br0
+ ip link set dev $swp2 up
+ bridge vlan add vid 10 dev $swp2
+ bridge vlan add vid 20 dev $swp2
+}
+
+switch_create_8021qvs()
+{
+ switch_create_8021q "mcast_vlan_snooping 1"
+ bridge vlan global set dev br0 vid 10 mcast_igmp_version 3
+ bridge vlan global set dev br0 vid 10 mcast_mld_version 2
+ bridge vlan global set dev br0 vid 20 mcast_igmp_version 3
+ bridge vlan global set dev br0 vid 20 mcast_mld_version 2
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev br0 down
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy 2>/dev/null
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+cfg_src_list()
+{
+ local IPs=("$@")
+ local IPstr=$(echo ${IPs[@]} | tr '[:space:]' , | sed 's/,$//')
+
+ echo ${IPstr:+source_list }${IPstr}
+}
+
+cfg_group_op()
+{
+ local op=$1; shift
+ local locus=$1; shift
+ local GRP=$1; shift
+ local state=$1; shift
+ local IPs=("$@")
+
+ local source_list=$(cfg_src_list ${IPs[@]})
+
+ # Everything besides `bridge mdb' uses the "dev X vid Y" syntax,
+ # so we use it here as well and convert.
+ local br_locus=$(echo "$locus" | sed 's/^dev /port /')
+
+ bridge mdb $op dev br0 $br_locus grp $GRP $state \
+ filter_mode include $source_list
+}
+
+cfg4_entries_op()
+{
+ local op=$1; shift
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local GRP=239.1.1.${grp}
+ local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
+ cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]}
+}
+
+cfg4_entries_add()
+{
+ cfg4_entries_op add "$@"
+}
+
+cfg4_entries_del()
+{
+ cfg4_entries_op del "$@"
+}
+
+cfg6_entries_op()
+{
+ local op=$1; shift
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local GRP=ff0e::${grp}
+ local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1))))
+ cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]}
+}
+
+cfg6_entries_add()
+{
+ cfg6_entries_op add "$@"
+}
+
+cfg6_entries_del()
+{
+ cfg6_entries_op del "$@"
+}
+
+locus_dev_peer()
+{
+ local dev_kw=$1; shift
+ local dev=$1; shift
+ local vid_kw=$1; shift
+ local vid=$1; shift
+
+ echo "$h1.${vid:-10}"
+}
+
+locus_dev()
+{
+ local dev_kw=$1; shift
+ local dev=$1; shift
+
+ echo $dev
+}
+
+ctl4_entries_add()
+{
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
+ local peer=$(locus_dev_peer $locus)
+ local GRP=239.1.1.${grp}
+ $MZ $peer -c 1 -A 192.0.2.1 -B $GRP \
+ -t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
+ sleep 1
+
+ local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
+ if ((nn != n)); then
+ echo mcast_max_groups > /dev/stderr
+ false
+ fi
+}
+
+ctl4_entries_del()
+{
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local peer=$(locus_dev_peer $locus)
+ local GRP=239.1.1.${grp}
+ $MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+ -t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
+ sleep 1
+ ! bridge mdb show dev br0 | grep -q $GRP
+}
+
+ctl6_entries_add()
+{
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1))))
+ local peer=$(locus_dev_peer $locus)
+ local SIP=fe80::1
+ local GRP=ff0e::${grp}
+ local p=$(mldv2_is_in_get $SIP $GRP $IPs)
+ $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+ sleep 1
+
+ local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
+ if ((nn != n)); then
+ echo mcast_max_groups > /dev/stderr
+ false
+ fi
+}
+
+ctl6_entries_del()
+{
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local peer=$(locus_dev_peer $locus)
+ local SIP=fe80::1
+ local GRP=ff0e::${grp}
+ local p=$(mldv1_done_get $SIP $GRP)
+ $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+ sleep 1
+ ! bridge mdb show dev br0 | grep -q $GRP
+}
+
+bridge_maxgroups_errmsg_check_cfg()
+{
+ local msg=$1; shift
+ local needle=$1; shift
+
+ echo "$msg" | grep -q mcast_max_groups
+ check_err $? "Adding MDB entries failed for the wrong reason: $msg"
+}
+
+bridge_maxgroups_errmsg_check_cfg4()
+{
+ bridge_maxgroups_errmsg_check_cfg "$@"
+}
+
+bridge_maxgroups_errmsg_check_cfg6()
+{
+ bridge_maxgroups_errmsg_check_cfg "$@"
+}
+
+bridge_maxgroups_errmsg_check_ctl4()
+{
+ :
+}
+
+bridge_maxgroups_errmsg_check_ctl6()
+{
+ :
+}
+
+bridge_port_ngroups_get()
+{
+ local locus=$1; shift
+
+ bridge -j -d link show $locus |
+ jq '.[].mcast_n_groups'
+}
+
+bridge_port_maxgroups_get()
+{
+ local locus=$1; shift
+
+ bridge -j -d link show $locus |
+ jq '.[].mcast_max_groups'
+}
+
+bridge_port_maxgroups_set()
+{
+ local locus=$1; shift
+ local max=$1; shift
+
+ bridge link set dev $(locus_dev $locus) mcast_max_groups $max
+}
+
+bridge_port_vlan_ngroups_get()
+{
+ local locus=$1; shift
+
+ bridge -j -d vlan show $locus |
+ jq '.[].vlans[].mcast_n_groups'
+}
+
+bridge_port_vlan_maxgroups_get()
+{
+ local locus=$1; shift
+
+ bridge -j -d vlan show $locus |
+ jq '.[].vlans[].mcast_max_groups'
+}
+
+bridge_port_vlan_maxgroups_set()
+{
+ local locus=$1; shift
+ local max=$1; shift
+
+ bridge vlan set $locus mcast_max_groups $max
+}
+
+test_ngroups_reporting()
+{
+ local CFG=$1; shift
+ local context=$1; shift
+ local locus=$1; shift
+
+ RET=0
+
+ local n0=$(bridge_${context}_ngroups_get "$locus")
+ ${CFG}_entries_add "$locus" temp 5
+ check_err $? "Couldn't add MDB entries"
+ local n1=$(bridge_${context}_ngroups_get "$locus")
+
+ ((n1 == n0 + 5))
+ check_err $? "Number of groups was $n0, now is $n1, but $((n0 + 5)) expected"
+
+ ${CFG}_entries_del "$locus" temp 5
+ check_err $? "Couldn't delete MDB entries"
+ local n2=$(bridge_${context}_ngroups_get "$locus")
+
+ ((n2 == n0))
+ check_err $? "Number of groups was $n0, now is $n2, but should be back to $n0"
+
+ log_test "$CFG: $context: ngroups reporting"
+}
+
+test_8021d_ngroups_reporting_cfg4()
+{
+ test_ngroups_reporting cfg4 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_ctl4()
+{
+ test_ngroups_reporting ctl4 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_cfg6()
+{
+ test_ngroups_reporting cfg6 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_ctl6()
+{
+ test_ngroups_reporting ctl6 port "dev $swp1"
+}
+
+test_8021q_ngroups_reporting_cfg4()
+{
+ test_ngroups_reporting cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_ctl4()
+{
+ test_ngroups_reporting ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_cfg6()
+{
+ test_ngroups_reporting cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_ctl6()
+{
+ test_ngroups_reporting ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_cfg4()
+{
+ test_ngroups_reporting cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_ctl4()
+{
+ test_ngroups_reporting ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_cfg6()
+{
+ test_ngroups_reporting cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_ctl6()
+{
+ test_ngroups_reporting ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_ngroups_cross_vlan()
+{
+ local CFG=$1; shift
+
+ local locus1="dev $swp1 vid 10"
+ local locus2="dev $swp1 vid 20"
+
+ RET=0
+
+ local n10=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n20=$(bridge_port_vlan_ngroups_get "$locus2")
+ ${CFG}_entries_add "$locus1" temp 5 111
+ check_err $? "Couldn't add MDB entries to VLAN 10"
+ local n11=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n21=$(bridge_port_vlan_ngroups_get "$locus2")
+
+ ((n11 == n10 + 5))
+ check_err $? "Number of groups at VLAN 10 was $n10, now is $n11, but 5 entries added on VLAN 10, $((n10 + 5)) expected"
+
+ ((n21 == n20))
+ check_err $? "Number of groups at VLAN 20 was $n20, now is $n21, but no change expected on VLAN 20"
+
+ ${CFG}_entries_add "$locus2" temp 5 112
+ check_err $? "Couldn't add MDB entries to VLAN 20"
+ local n12=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n22=$(bridge_port_vlan_ngroups_get "$locus2")
+
+ ((n12 == n11))
+ check_err $? "Number of groups at VLAN 10 was $n11, now is $n12, but no change expected on VLAN 10"
+
+ ((n22 == n21 + 5))
+ check_err $? "Number of groups at VLAN 20 was $n21, now is $n22, but 5 entries added on VLAN 20, $((n21 + 5)) expected"
+
+ ${CFG}_entries_del "$locus1" temp 5 111
+ check_err $? "Couldn't delete MDB entries from VLAN 10"
+ ${CFG}_entries_del "$locus2" temp 5 112
+ check_err $? "Couldn't delete MDB entries from VLAN 20"
+ local n13=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n23=$(bridge_port_vlan_ngroups_get "$locus2")
+
+ ((n13 == n10))
+ check_err $? "Number of groups at VLAN 10 was $n10, now is $n13, but should be back to $n10"
+
+ ((n23 == n20))
+ check_err $? "Number of groups at VLAN 20 was $n20, now is $n23, but should be back to $n20"
+
+ log_test "$CFG: port_vlan: isolation of port and per-VLAN ngroups"
+}
+
+test_8021qvs_ngroups_cross_vlan_cfg4()
+{
+ test_ngroups_cross_vlan cfg4
+}
+
+test_8021qvs_ngroups_cross_vlan_ctl4()
+{
+ test_ngroups_cross_vlan ctl4
+}
+
+test_8021qvs_ngroups_cross_vlan_cfg6()
+{
+ test_ngroups_cross_vlan cfg6
+}
+
+test_8021qvs_ngroups_cross_vlan_ctl6()
+{
+ test_ngroups_cross_vlan ctl6
+}
+
+test_maxgroups_zero()
+{
+ local CFG=$1; shift
+ local context=$1; shift
+ local locus=$1; shift
+
+ RET=0
+ local max
+
+ max=$(bridge_${context}_maxgroups_get "$locus")
+ ((max == 0))
+ check_err $? "Max groups on $locus should be 0, but $max reported"
+
+ bridge_${context}_maxgroups_set "$locus" 100
+ check_err $? "Failed to set max to 100"
+ max=$(bridge_${context}_maxgroups_get "$locus")
+ ((max == 100))
+ check_err $? "Max groups expected to be 100, but $max reported"
+
+ bridge_${context}_maxgroups_set "$locus" 0
+ check_err $? "Couldn't set maximum to 0"
+
+ # Test that setting 0 explicitly still serves as infinity.
+ ${CFG}_entries_add "$locus" temp 5
+ check_err $? "Adding 5 MDB entries failed but should have passed"
+ ${CFG}_entries_del "$locus" temp 5
+ check_err $? "Couldn't delete MDB entries"
+
+ log_test "$CFG: $context maxgroups: reporting and treatment of 0"
+}
+
+test_8021d_maxgroups_zero_cfg4()
+{
+ test_maxgroups_zero cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_ctl4()
+{
+ test_maxgroups_zero ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_cfg6()
+{
+ test_maxgroups_zero cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_ctl6()
+{
+ test_maxgroups_zero ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_zero_cfg4()
+{
+ test_maxgroups_zero cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_ctl4()
+{
+ test_maxgroups_zero ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_cfg6()
+{
+ test_maxgroups_zero cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_ctl6()
+{
+ test_maxgroups_zero ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_cfg4()
+{
+ test_maxgroups_zero cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_ctl4()
+{
+ test_maxgroups_zero ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_cfg6()
+{
+ test_maxgroups_zero cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_ctl6()
+{
+ test_maxgroups_zero ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_zero_cross_vlan()
+{
+ local CFG=$1; shift
+
+ local locus0="dev $swp1"
+ local locus1="dev $swp1 vid 10"
+ local locus2="dev $swp1 vid 20"
+ local max
+
+ RET=0
+
+ bridge_port_vlan_maxgroups_set "$locus1" 100
+ check_err $? "$locus1: Failed to set max to 100"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 0))
+ check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 0))
+ check_err $? "$locus2: Max groups expected to be 0, but $max reported"
+
+ bridge_port_vlan_maxgroups_set "$locus2" 100
+ check_err $? "$locus2: Failed to set max to 100"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 0))
+ check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 100))
+ check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+ bridge_port_maxgroups_set "$locus0" 100
+ check_err $? "$locus0: Failed to set max to 100"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 100))
+ check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 100))
+ check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+ bridge_port_vlan_maxgroups_set "$locus1" 0
+ check_err $? "$locus1: Failed to set max to 0"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 100))
+ check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 100))
+ check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+ bridge_port_vlan_maxgroups_set "$locus2" 0
+ check_err $? "$locus2: Failed to set max to 0"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 100))
+ check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 0))
+ check_err $? "$locus2: Max groups expected to be 0 but $max reported"
+
+ bridge_port_maxgroups_set "$locus0" 0
+ check_err $? "$locus0: Failed to set max to 0"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 0))
+ check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 0))
+ check_err $? "$locus2: Max groups expected to be 0, but $max reported"
+
+ log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN maximums"
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_cfg4()
+{
+ test_maxgroups_zero_cross_vlan cfg4
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_ctl4()
+{
+ test_maxgroups_zero_cross_vlan ctl4
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_cfg6()
+{
+ test_maxgroups_zero_cross_vlan cfg6
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_ctl6()
+{
+ test_maxgroups_zero_cross_vlan ctl6
+}
+
+test_maxgroups_too_low()
+{
+ local CFG=$1; shift
+ local context=$1; shift
+ local locus=$1; shift
+
+ RET=0
+
+ local n=$(bridge_${context}_ngroups_get "$locus")
+ local msg
+
+ ${CFG}_entries_add "$locus" temp 5 111
+ check_err $? "$locus: Couldn't add MDB entries"
+
+ bridge_${context}_maxgroups_set "$locus" $((n+2))
+ check_err $? "$locus: Setting maxgroups to $((n+2)) failed"
+
+ msg=$(${CFG}_entries_add "$locus" temp 2 112 2>&1)
+ check_fail $? "$locus: Adding more entries passed when max<n"
+ bridge_maxgroups_errmsg_check_cfg "$msg"
+
+ ${CFG}_entries_del "$locus" temp 5 111
+ check_err $? "$locus: Couldn't delete MDB entries"
+
+ ${CFG}_entries_add "$locus" temp 2 112
+ check_err $? "$locus: Adding more entries failed"
+
+ ${CFG}_entries_del "$locus" temp 2 112
+ check_err $? "$locus: Deleting more entries failed"
+
+ bridge_${context}_maxgroups_set "$locus" 0
+ check_err $? "$locus: Couldn't set maximum to 0"
+
+ log_test "$CFG: $context maxgroups: configure below ngroups"
+}
+
+test_8021d_maxgroups_too_low_cfg4()
+{
+ test_maxgroups_too_low cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_ctl4()
+{
+ test_maxgroups_too_low ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_cfg6()
+{
+ test_maxgroups_too_low cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_ctl6()
+{
+ test_maxgroups_too_low ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_too_low_cfg4()
+{
+ test_maxgroups_too_low cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_ctl4()
+{
+ test_maxgroups_too_low ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_cfg6()
+{
+ test_maxgroups_too_low cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_ctl6()
+{
+ test_maxgroups_too_low ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_cfg4()
+{
+ test_maxgroups_too_low cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_ctl4()
+{
+ test_maxgroups_too_low ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_cfg6()
+{
+ test_maxgroups_too_low cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_ctl6()
+{
+ test_maxgroups_too_low ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_too_many_entries()
+{
+ local CFG=$1; shift
+ local context=$1; shift
+ local locus=$1; shift
+
+ RET=0
+
+ local n=$(bridge_${context}_ngroups_get "$locus")
+ local msg
+
+ # Configure a low maximum
+ bridge_${context}_maxgroups_set "$locus" $((n+1))
+ check_err $? "$locus: Couldn't set maximum"
+
+ # Try to add more entries than the configured maximum
+ msg=$(${CFG}_entries_add "$locus" temp 5 2>&1)
+ check_fail $? "Adding 5 MDB entries passed, but should have failed"
+ bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+ # When adding entries through the control path, as many as possible
+ # get created. That's consistent with the mcast_hash_max behavior.
+ # So there, drop the entries explicitly.
+ if [[ ${CFG%[46]} == ctl ]]; then
+ ${CFG}_entries_del "$locus" temp 17 2>&1
+ fi
+
+ local n2=$(bridge_${context}_ngroups_get "$locus")
+ ((n2 == n))
+ check_err $? "Number of groups was $n, but after a failed attempt to add MDB entries it changed to $n2"
+
+ bridge_${context}_maxgroups_set "$locus" 0
+ check_err $? "$locus: Couldn't set maximum to 0"
+
+ log_test "$CFG: $context maxgroups: add too many MDB entries"
+}
+
+test_8021d_maxgroups_too_many_entries_cfg4()
+{
+ test_maxgroups_too_many_entries cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_ctl4()
+{
+ test_maxgroups_too_many_entries ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_cfg6()
+{
+ test_maxgroups_too_many_entries cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_ctl6()
+{
+ test_maxgroups_too_many_entries ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_too_many_entries_cfg4()
+{
+ test_maxgroups_too_many_entries cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_ctl4()
+{
+ test_maxgroups_too_many_entries ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_cfg6()
+{
+ test_maxgroups_too_many_entries cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_ctl6()
+{
+ test_maxgroups_too_many_entries ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_cfg4()
+{
+ test_maxgroups_too_many_entries cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_ctl4()
+{
+ test_maxgroups_too_many_entries ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_cfg6()
+{
+ test_maxgroups_too_many_entries cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_ctl6()
+{
+ test_maxgroups_too_many_entries ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_too_many_cross_vlan()
+{
+ local CFG=$1; shift
+
+ RET=0
+
+ local locus0="dev $swp1"
+ local locus1="dev $swp1 vid 10"
+ local locus2="dev $swp1 vid 20"
+ local n1=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n2=$(bridge_port_vlan_ngroups_get "$locus2")
+ local msg
+
+ if ((n1 > n2)); then
+ local tmp=$n1
+ n1=$n2
+ n2=$tmp
+
+ tmp="$locus1"
+ locus1="$locus2"
+ locus2="$tmp"
+ fi
+
+ # Now 0 <= n1 <= n2.
+ ${CFG}_entries_add "$locus2" temp 5 112
+ check_err $? "Couldn't add 5 entries"
+
+ n2=$(bridge_port_vlan_ngroups_get "$locus2")
+ # Now 0 <= n1 < n2-1.
+
+ # Setting locus1'maxgroups to n2-1 should pass. The number is
+ # smaller than both the absolute number of MDB entries, and in
+ # particular than number of locus2's number of entries, but it is
+ # large enough to cover locus1's entries. Thus we check that
+ # individual VLAN's ngroups are independent.
+ bridge_port_vlan_maxgroups_set "$locus1" $((n2-1))
+ check_err $? "Setting ${locus1}'s maxgroups to $((n2-1)) failed"
+
+ msg=$(${CFG}_entries_add "$locus1" temp $n2 111 2>&1)
+ check_fail $? "$locus1: Adding $n2 MDB entries passed, but should have failed"
+ bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+ bridge_port_maxgroups_set "$locus0" $((n1 + n2 + 2))
+ check_err $? "$locus0: Couldn't set maximum"
+
+ msg=$(${CFG}_entries_add "$locus1" temp 5 111 2>&1)
+ check_fail $? "$locus1: Adding 5 MDB entries passed, but should have failed"
+ bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+ # IGMP/MLD packets can cause several entries to be added, before
+ # the maximum is hit and the rest is then bounced. Remove what was
+ # committed, if anything.
+ ${CFG}_entries_del "$locus1" temp 5 111 2>/dev/null
+
+ ${CFG}_entries_add "$locus1" temp 2 111
+ check_err $? "$locus1: Adding 2 MDB entries failed, but should have passed"
+
+ ${CFG}_entries_del "$locus1" temp 2 111
+ check_err $? "Couldn't delete MDB entries"
+
+ ${CFG}_entries_del "$locus2" temp 5 112
+ check_err $? "Couldn't delete MDB entries"
+
+ bridge_port_vlan_maxgroups_set "$locus1" 0
+ check_err $? "$locus1: Couldn't set maximum to 0"
+
+ bridge_port_maxgroups_set "$locus0" 0
+ check_err $? "$locus0: Couldn't set maximum to 0"
+
+ log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN ngroups"
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_cfg4()
+{
+ test_maxgroups_too_many_cross_vlan cfg4
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_ctl4()
+{
+ test_maxgroups_too_many_cross_vlan ctl4
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_cfg6()
+{
+ test_maxgroups_too_many_cross_vlan cfg6
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_ctl6()
+{
+ test_maxgroups_too_many_cross_vlan ctl6
+}
+
+test_vlan_attributes()
+{
+ local locus=$1; shift
+ local expect=$1; shift
+
+ RET=0
+
+ local max=$(bridge_port_vlan_maxgroups_get "$locus")
+ local n=$(bridge_port_vlan_ngroups_get "$locus")
+
+ eval "[[ $max $expect ]]"
+ check_err $? "$locus: maxgroups attribute expected to be $expect, but was $max"
+
+ eval "[[ $n $expect ]]"
+ check_err $? "$locus: ngroups attribute expected to be $expect, but was $n"
+
+ log_test "port_vlan: presence of ngroups and maxgroups attributes"
+}
+
+test_8021q_vlan_attributes()
+{
+ test_vlan_attributes "dev $swp1 vid 10" "== null"
+}
+
+test_8021qvs_vlan_attributes()
+{
+ test_vlan_attributes "dev $swp1 vid 10" "-ge 0"
+}
+
+test_toggle_vlan_snooping()
+{
+ local mode=$1; shift
+
+ RET=0
+
+ local CFG=cfg4
+ local context=port_vlan
+ local locus="dev $swp1 vid 10"
+
+ ${CFG}_entries_add "$locus" $mode 5
+ check_err $? "Couldn't add MDB entries"
+
+ bridge_${context}_maxgroups_set "$locus" 100
+ check_err $? "Failed to set max to 100"
+
+ ip link set dev br0 type bridge mcast_vlan_snooping 0
+ sleep 1
+ ip link set dev br0 type bridge mcast_vlan_snooping 1
+
+ local n=$(bridge_${context}_ngroups_get "$locus")
+ local nn=$(bridge mdb show dev br0 | grep $swp1 | wc -l)
+ ((nn == n))
+ check_err $? "mcast_n_groups expected to be $nn, but $n reported"
+
+ local max=$(bridge_${context}_maxgroups_get "$locus")
+ ((max == 100))
+ check_err $? "Max groups expected to be 100 but $max reported"
+
+ bridge_${context}_maxgroups_set "$locus" 0
+ check_err $? "Failed to set max to 0"
+
+ log_test "$CFG: $context: $mode: mcast_vlan_snooping toggle"
+}
+
+test_toggle_vlan_snooping_temp()
+{
+ test_toggle_vlan_snooping temp
+}
+
+test_toggle_vlan_snooping_permanent()
+{
+ test_toggle_vlan_snooping permanent
+}
+
+# ngroup test suites
+
+test_8021d_ngroups_cfg4()
+{
+ test_8021d_ngroups_reporting_cfg4
+}
+
+test_8021d_ngroups_ctl4()
+{
+ test_8021d_ngroups_reporting_ctl4
+}
+
+test_8021d_ngroups_cfg6()
+{
+ test_8021d_ngroups_reporting_cfg6
+}
+
+test_8021d_ngroups_ctl6()
+{
+ test_8021d_ngroups_reporting_ctl6
+}
+
+test_8021q_ngroups_cfg4()
+{
+ test_8021q_ngroups_reporting_cfg4
+}
+
+test_8021q_ngroups_ctl4()
+{
+ test_8021q_ngroups_reporting_ctl4
+}
+
+test_8021q_ngroups_cfg6()
+{
+ test_8021q_ngroups_reporting_cfg6
+}
+
+test_8021q_ngroups_ctl6()
+{
+ test_8021q_ngroups_reporting_ctl6
+}
+
+test_8021qvs_ngroups_cfg4()
+{
+ test_8021qvs_ngroups_reporting_cfg4
+ test_8021qvs_ngroups_cross_vlan_cfg4
+}
+
+test_8021qvs_ngroups_ctl4()
+{
+ test_8021qvs_ngroups_reporting_ctl4
+ test_8021qvs_ngroups_cross_vlan_ctl4
+}
+
+test_8021qvs_ngroups_cfg6()
+{
+ test_8021qvs_ngroups_reporting_cfg6
+ test_8021qvs_ngroups_cross_vlan_cfg6
+}
+
+test_8021qvs_ngroups_ctl6()
+{
+ test_8021qvs_ngroups_reporting_ctl6
+ test_8021qvs_ngroups_cross_vlan_ctl6
+}
+
+# maxgroups test suites
+
+test_8021d_maxgroups_cfg4()
+{
+ test_8021d_maxgroups_zero_cfg4
+ test_8021d_maxgroups_too_low_cfg4
+ test_8021d_maxgroups_too_many_entries_cfg4
+}
+
+test_8021d_maxgroups_ctl4()
+{
+ test_8021d_maxgroups_zero_ctl4
+ test_8021d_maxgroups_too_low_ctl4
+ test_8021d_maxgroups_too_many_entries_ctl4
+}
+
+test_8021d_maxgroups_cfg6()
+{
+ test_8021d_maxgroups_zero_cfg6
+ test_8021d_maxgroups_too_low_cfg6
+ test_8021d_maxgroups_too_many_entries_cfg6
+}
+
+test_8021d_maxgroups_ctl6()
+{
+ test_8021d_maxgroups_zero_ctl6
+ test_8021d_maxgroups_too_low_ctl6
+ test_8021d_maxgroups_too_many_entries_ctl6
+}
+
+test_8021q_maxgroups_cfg4()
+{
+ test_8021q_maxgroups_zero_cfg4
+ test_8021q_maxgroups_too_low_cfg4
+ test_8021q_maxgroups_too_many_entries_cfg4
+}
+
+test_8021q_maxgroups_ctl4()
+{
+ test_8021q_maxgroups_zero_ctl4
+ test_8021q_maxgroups_too_low_ctl4
+ test_8021q_maxgroups_too_many_entries_ctl4
+}
+
+test_8021q_maxgroups_cfg6()
+{
+ test_8021q_maxgroups_zero_cfg6
+ test_8021q_maxgroups_too_low_cfg6
+ test_8021q_maxgroups_too_many_entries_cfg6
+}
+
+test_8021q_maxgroups_ctl6()
+{
+ test_8021q_maxgroups_zero_ctl6
+ test_8021q_maxgroups_too_low_ctl6
+ test_8021q_maxgroups_too_many_entries_ctl6
+}
+
+test_8021qvs_maxgroups_cfg4()
+{
+ test_8021qvs_maxgroups_zero_cfg4
+ test_8021qvs_maxgroups_zero_cross_vlan_cfg4
+ test_8021qvs_maxgroups_too_low_cfg4
+ test_8021qvs_maxgroups_too_many_entries_cfg4
+ test_8021qvs_maxgroups_too_many_cross_vlan_cfg4
+}
+
+test_8021qvs_maxgroups_ctl4()
+{
+ test_8021qvs_maxgroups_zero_ctl4
+ test_8021qvs_maxgroups_zero_cross_vlan_ctl4
+ test_8021qvs_maxgroups_too_low_ctl4
+ test_8021qvs_maxgroups_too_many_entries_ctl4
+ test_8021qvs_maxgroups_too_many_cross_vlan_ctl4
+}
+
+test_8021qvs_maxgroups_cfg6()
+{
+ test_8021qvs_maxgroups_zero_cfg6
+ test_8021qvs_maxgroups_zero_cross_vlan_cfg6
+ test_8021qvs_maxgroups_too_low_cfg6
+ test_8021qvs_maxgroups_too_many_entries_cfg6
+ test_8021qvs_maxgroups_too_many_cross_vlan_cfg6
+}
+
+test_8021qvs_maxgroups_ctl6()
+{
+ test_8021qvs_maxgroups_zero_ctl6
+ test_8021qvs_maxgroups_zero_cross_vlan_ctl6
+ test_8021qvs_maxgroups_too_low_ctl6
+ test_8021qvs_maxgroups_too_many_entries_ctl6
+ test_8021qvs_maxgroups_too_many_cross_vlan_ctl6
+}
+
+# other test suites
+
+test_8021qvs_toggle_vlan_snooping()
+{
+ test_toggle_vlan_snooping_temp
+ test_toggle_vlan_snooping_permanent
+}
+
+# test groups
+
+test_8021d()
+{
+ # Tests for vlan_filtering 0 mcast_vlan_snooping 0.
+
+ switch_create_8021d
+ setup_wait
+
+ test_8021d_ngroups_cfg4
+ test_8021d_ngroups_ctl4
+ test_8021d_ngroups_cfg6
+ test_8021d_ngroups_ctl6
+ test_8021d_maxgroups_cfg4
+ test_8021d_maxgroups_ctl4
+ test_8021d_maxgroups_cfg6
+ test_8021d_maxgroups_ctl6
+
+ switch_destroy
+}
+
+test_8021q()
+{
+ # Tests for vlan_filtering 1 mcast_vlan_snooping 0.
+
+ switch_create_8021q
+ setup_wait
+
+ test_8021q_vlan_attributes
+ test_8021q_ngroups_cfg4
+ test_8021q_ngroups_ctl4
+ test_8021q_ngroups_cfg6
+ test_8021q_ngroups_ctl6
+ test_8021q_maxgroups_cfg4
+ test_8021q_maxgroups_ctl4
+ test_8021q_maxgroups_cfg6
+ test_8021q_maxgroups_ctl6
+
+ switch_destroy
+}
+
+test_8021qvs()
+{
+ # Tests for vlan_filtering 1 mcast_vlan_snooping 1.
+
+ switch_create_8021qvs
+ setup_wait
+
+ test_8021qvs_vlan_attributes
+ test_8021qvs_ngroups_cfg4
+ test_8021qvs_ngroups_ctl4
+ test_8021qvs_ngroups_cfg6
+ test_8021qvs_ngroups_ctl6
+ test_8021qvs_maxgroups_cfg4
+ test_8021qvs_maxgroups_ctl4
+ test_8021qvs_maxgroups_cfg6
+ test_8021qvs_maxgroups_ctl6
+ test_8021qvs_toggle_vlan_snooping
+
+ switch_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 29cd4705c752..cc7c4f89a097 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1671,3 +1671,219 @@ hw_stats_monitor_test()
log_test "${type}_stats notifications"
}
+
+ipv4_to_bytes()
+{
+ local IP=$1; shift
+
+ printf '%02x:' ${IP//./ } |
+ sed 's/:$//'
+}
+
+# Convert a given IPv6 address, `IP' such that the :: token, if present, is
+# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
+# digits. An optional `BYTESEP' parameter can be given to further separate
+# individual bytes of each 16-bit group.
+expand_ipv6()
+{
+ local IP=$1; shift
+ local bytesep=$1; shift
+
+ local cvt_ip=${IP/::/_}
+ local colons=${cvt_ip//[^:]/}
+ local allcol=:::::::
+ # IP where :: -> the appropriate number of colons:
+ local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
+
+ echo $allcol_ip | tr : '\n' |
+ sed s/^/0000/ |
+ sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
+ tr '\n' : |
+ sed 's/:$//'
+}
+
+ipv6_to_bytes()
+{
+ local IP=$1; shift
+
+ expand_ipv6 "$IP" :
+}
+
+u16_to_bytes()
+{
+ local u16=$1; shift
+
+ printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
+}
+
+# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
+# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
+# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
+# stands for 00:00.
+payload_template_calc_checksum()
+{
+ local payload=$1; shift
+
+ (
+ # Set input radix.
+ echo "16i"
+ # Push zero for the initial checksum.
+ echo 0
+
+ # Pad the payload with a terminating 00: in case we get an odd
+ # number of bytes.
+ echo "${payload%:}:00:" |
+ sed 's/CHECKSUM/00:00/g' |
+ tr '[:lower:]' '[:upper:]' |
+ # Add the word to the checksum.
+ sed 's/\(..\):\(..\):/\1\2+\n/g' |
+ # Strip the extra odd byte we pushed if left unconverted.
+ sed 's/\(..\):$//'
+
+ echo "10000 ~ +" # Calculate and add carry.
+ echo "FFFF r - p" # Bit-flip and print.
+ ) |
+ dc |
+ tr '[:upper:]' '[:lower:]'
+}
+
+payload_template_expand_checksum()
+{
+ local payload=$1; shift
+ local checksum=$1; shift
+
+ local ckbytes=$(u16_to_bytes $checksum)
+
+ echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
+}
+
+payload_template_nbytes()
+{
+ local payload=$1; shift
+
+ payload_template_expand_checksum "${payload%:}" 0 |
+ sed 's/:/\n/g' | wc -l
+}
+
+igmpv3_is_in_get()
+{
+ local GRP=$1; shift
+ local sources=("$@")
+
+ local igmpv3
+ local nsources=$(u16_to_bytes ${#sources[@]})
+
+ # IS_IN ( $sources )
+ igmpv3=$(:
+ )"22:"$( : Type - Membership Report
+ )"00:"$( : Reserved
+ )"CHECKSUM:"$( : Checksum
+ )"00:00:"$( : Reserved
+ )"00:01:"$( : Number of Group Records
+ )"01:"$( : Record Type - IS_IN
+ )"00:"$( : Aux Data Len
+ )"${nsources}:"$( : Number of Sources
+ )"$(ipv4_to_bytes $GRP):"$( : Multicast Address
+ )"$(for src in "${sources[@]}"; do
+ ipv4_to_bytes $src
+ echo -n :
+ done)"$( : Source Addresses
+ )
+ local checksum=$(payload_template_calc_checksum "$igmpv3")
+
+ payload_template_expand_checksum "$igmpv3" $checksum
+}
+
+igmpv2_leave_get()
+{
+ local GRP=$1; shift
+
+ local payload=$(:
+ )"17:"$( : Type - Leave Group
+ )"00:"$( : Max Resp Time - not meaningful
+ )"CHECKSUM:"$( : Checksum
+ )"$(ipv4_to_bytes $GRP)"$( : Group Address
+ )
+ local checksum=$(payload_template_calc_checksum "$payload")
+
+ payload_template_expand_checksum "$payload" $checksum
+}
+
+mldv2_is_in_get()
+{
+ local SIP=$1; shift
+ local GRP=$1; shift
+ local sources=("$@")
+
+ local hbh
+ local icmpv6
+ local nsources=$(u16_to_bytes ${#sources[@]})
+
+ hbh=$(:
+ )"3a:"$( : Next Header - ICMPv6
+ )"00:"$( : Hdr Ext Len
+ )"00:00:00:00:00:00:"$( : Options and Padding
+ )
+
+ icmpv6=$(:
+ )"8f:"$( : Type - MLDv2 Report
+ )"00:"$( : Code
+ )"CHECKSUM:"$( : Checksum
+ )"00:00:"$( : Reserved
+ )"00:01:"$( : Number of Group Records
+ )"01:"$( : Record Type - IS_IN
+ )"00:"$( : Aux Data Len
+ )"${nsources}:"$( : Number of Sources
+ )"$(ipv6_to_bytes $GRP):"$( : Multicast address
+ )"$(for src in "${sources[@]}"; do
+ ipv6_to_bytes $src
+ echo -n :
+ done)"$( : Source Addresses
+ )
+
+ local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
+ local sudohdr=$(:
+ )"$(ipv6_to_bytes $SIP):"$( : SIP
+ )"$(ipv6_to_bytes $GRP):"$( : DIP is multicast address
+ )"${len}:"$( : Upper-layer length
+ )"00:3a:"$( : Zero and next-header
+ )
+ local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
+
+ payload_template_expand_checksum "$hbh$icmpv6" $checksum
+}
+
+mldv1_done_get()
+{
+ local SIP=$1; shift
+ local GRP=$1; shift
+
+ local hbh
+ local icmpv6
+
+ hbh=$(:
+ )"3a:"$( : Next Header - ICMPv6
+ )"00:"$( : Hdr Ext Len
+ )"00:00:00:00:00:00:"$( : Options and Padding
+ )
+
+ icmpv6=$(:
+ )"84:"$( : Type - MLDv1 Done
+ )"00:"$( : Code
+ )"CHECKSUM:"$( : Checksum
+ )"00:00:"$( : Max Resp Delay - not meaningful
+ )"00:00:"$( : Reserved
+ )"$(ipv6_to_bytes $GRP):"$( : Multicast address
+ )
+
+ local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
+ local sudohdr=$(:
+ )"$(ipv6_to_bytes $SIP):"$( : SIP
+ )"$(ipv6_to_bytes $GRP):"$( : DIP is multicast address
+ )"${len}:"$( : Upper-layer length
+ )"00:3a:"$( : Zero and next-header
+ )
+ local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
+
+ payload_template_expand_checksum "$hbh$icmpv6" $checksum
+}