aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds2019-07-15 20:38:15 -0700
committerLinus Torvalds2019-07-15 20:38:15 -0700
commit2a3c389a0fde49b241430df806a34276568cfb29 (patch)
tree9cf35829317e8cc2aaffc4341fb824dad63fce02 /drivers
parent8de262531f5fbb7458463224a7587429800c24bf (diff)
parent0b043644c0ca601cb19943a81aa1f1455dbe9461 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "A smaller cycle this time. Notably we see another new driver, 'Soft iWarp', and the deletion of an ancient unused driver for nes. - Revise and simplify the signature offload RDMA MR APIs - More progress on hoisting object allocation boiler plate code out of the drivers - Driver bug fixes and revisions for hns, hfi1, efa, cxgb4, qib, i40iw - Tree wide cleanups: struct_size, put_user_page, xarray, rst doc conversion - Removal of obsolete ib_ucm chardev and nes driver - netlink based discovery of chardevs and autoloading of the modules providing them - Move more of the rdamvt/hfi1 uapi to include/uapi/rdma - New driver 'siw' for software based iWarp running on top of netdev, much like rxe's software RoCE. - mlx5 feature to report events in their raw devx format to userspace - Expose per-object counters through rdma tool - Adaptive interrupt moderation for RDMA (DIM), sharing the DIM core from netdev" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (194 commits) RMDA/siw: Require a 64 bit arch RDMA/siw: Mark expected switch fall-throughs RDMA/core: Fix -Wunused-const-variable warnings rdma/siw: Remove set but not used variable 's' rdma/siw: Add missing dependencies on LIBCRC32C and DMA_VIRT_OPS RDMA/siw: Add missing rtnl_lock around access to ifa rdma/siw: Use proper enumerated type in map_cqe_status RDMA/siw: Remove unnecessary kthread create/destroy printouts IB/rdmavt: Fix variable shadowing issue in rvt_create_cq RDMA/core: Fix race when resolving IP address RDMA/core: Make rdma_counter.h compile stand alone IB/core: Work on the caller socket net namespace in nldev_newlink() RDMA/rxe: Fill in wc byte_len with IB_WC_RECV_RDMA_WITH_IMM RDMA/mlx5: Set RDMA DIM to be enabled by default RDMA/nldev: Added configuration of RDMA dynamic interrupt moderation to netlink RDMA/core: Provide RDMA DIM support for ULPs linux/dim: Implement RDMA adaptive moderation (DIM) IB/mlx5: Report correctly tag matching rendezvous capability docs: infiniband: add it to the driver-api bookset IB/mlx5: Implement VHCA tunnel mechanism in DEVX ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/Kconfig14
-rw-r--r--drivers/infiniband/core/Makefile5
-rw-r--r--drivers/infiniband/core/addr.c2
-rw-r--r--drivers/infiniband/core/core_priv.h10
-rw-r--r--drivers/infiniband/core/counters.c634
-rw-r--r--drivers/infiniband/core/cq.c95
-rw-r--r--drivers/infiniband/core/device.c150
-rw-r--r--drivers/infiniband/core/mr_pool.c8
-rw-r--r--drivers/infiniband/core/nldev.c800
-rw-r--r--drivers/infiniband/core/restrack.c49
-rw-r--r--drivers/infiniband/core/restrack.h3
-rw-r--r--drivers/infiniband/core/rw.c201
-rw-r--r--drivers/infiniband/core/sysfs.c16
-rw-r--r--drivers/infiniband/core/ucm.c1350
-rw-r--r--drivers/infiniband/core/ucma.c114
-rw-r--r--drivers/infiniband/core/umem.c13
-rw-r--r--drivers/infiniband/core/umem_odp.c106
-rw-r--r--drivers/infiniband/core/user_mad.c53
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c28
-rw-r--r--drivers/infiniband/core/uverbs_main.c40
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c19
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c1
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c4
-rw-r--r--drivers/infiniband/core/verbs.c165
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c66
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h9
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c8
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c33
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c160
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c21
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c55
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c9
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h11
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c8
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c9
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c95
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c16
-rw-r--r--drivers/infiniband/hw/efa/efa.h9
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c118
-rw-r--r--drivers/infiniband/hw/efa/efa_com.h1
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c8
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c10
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c248
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile1
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.c270
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h262
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c5
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c9
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c6
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c3
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c8
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c29
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c7
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h2
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c3
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c36
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c11
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c6
-rw-r--r--drivers/infiniband/hw/hns/Kconfig15
-rw-r--r--drivers/infiniband/hw/hns/Makefile15
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c101
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c81
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c12
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h108
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c504
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h16
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c79
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c280
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h23
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c31
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c166
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c220
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c40
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c56
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c43
-rw-r--r--drivers/infiniband/hw/mlx4/main.c21
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h9
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c16
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c11
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c9
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c56
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c1053
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c60
-rw-r--r--drivers/infiniband/hw/mlx5/main.c157
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c20
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h47
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c554
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c23
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c312
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c52
-rw-r--r--drivers/infiniband/hw/nes/Kconfig16
-rw-r--r--drivers/infiniband/hw/nes/Makefile4
-rw-r--r--drivers/infiniband/hw/nes/nes.c1211
-rw-r--r--drivers/infiniband/hw/nes/nes.h574
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c3992
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h470
-rw-r--r--drivers/infiniband/hw/nes/nes_context.h193
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c3887
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h1380
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c1155
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.h97
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c1870
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c916
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c3759
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h198
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c11
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c38
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h7
-rw-r--r--drivers/infiniband/hw/qedr/main.c8
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c82
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h7
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c29
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c28
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib.h4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c8
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c22
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h7
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c46
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c8
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c16
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h7
-rw-r--r--drivers/infiniband/sw/Makefile1
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c250
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h7
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c402
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/rc.c41
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c69
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_mr.h56
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c7
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c40
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h3
-rw-r--r--drivers/infiniband/sw/siw/Kconfig18
-rw-r--r--drivers/infiniband/sw/siw/Makefile11
-rw-r--r--drivers/infiniband/sw/siw/iwarp.h380
-rw-r--r--drivers/infiniband/sw/siw/siw.h745
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c2070
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.h133
-rw-r--r--drivers/infiniband/sw/siw/siw_cq.c101
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c685
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c460
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.h74
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c1322
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c1458
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c1269
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c1760
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.h91
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c34
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c7
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c12
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h64
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c12
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c121
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c156
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c19
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c21
-rw-r--r--drivers/nvme/host/rdma.c2
185 files changed, 17580 insertions, 24118 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 8ba41cbf1869..85e103b147cc 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -7,6 +7,7 @@ menuconfig INFINIBAND
depends on m || IPV6 != m
depends on !ALPHA
select IRQ_POLL
+ select DIMLIB
---help---
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
@@ -36,17 +37,6 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
rdma-core <https://github.com/linux-rdma/rdma-core>.
-config INFINIBAND_USER_ACCESS_UCM
- tristate "Userspace CM (UCM, DEPRECATED)"
- depends on BROKEN || COMPILE_TEST
- depends on INFINIBAND_USER_ACCESS
- help
- The UCM module has known security flaws, which no one is
- interested to fix. The user-space part of this code was
- dropped from the upstream a long time ago.
-
- This option is DEPRECATED and planned to be removed.
-
config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)"
depends on INFINIBAND_USER_ACCESS
@@ -98,7 +88,6 @@ source "drivers/infiniband/hw/efa/Kconfig"
source "drivers/infiniband/hw/i40iw/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
-source "drivers/infiniband/hw/nes/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
@@ -108,6 +97,7 @@ source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
+source "drivers/infiniband/sw/siw/Kconfig"
endif
source "drivers/infiniband/ulp/ipoib/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 313f2349b518..09881bd5f12d 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -6,13 +6,12 @@ obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \
$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
-obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
- nldev.o restrack.o
+ nldev.o restrack.o counters.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
@@ -29,8 +28,6 @@ rdma_ucm-y := ucma.o
ib_umad-y := user_mad.o
-ib_ucm-y := ucm.o
-
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
uverbs_std_types_cq.o \
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 2f7d14159841..9b76a8fcdd24 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -337,7 +337,7 @@ static int dst_fetch_ha(const struct dst_entry *dst,
neigh_event_send(n, NULL);
ret = -ENODATA;
} else {
- memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN);
+ neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev);
}
neigh_release(n);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index ff40a450b5d2..888d89ce81df 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -60,6 +60,7 @@ extern bool ib_devices_shared_netns;
int ib_device_register_sysfs(struct ib_device *device);
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_device_rename(struct ib_device *ibdev, const char *name);
+int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim);
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
@@ -88,6 +89,15 @@ typedef int (*nldev_callback)(struct ib_device *device,
int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
struct netlink_callback *cb);
+struct ib_client_nl_info {
+ struct sk_buff *nl_msg;
+ struct device *cdev;
+ unsigned int port;
+ u64 abi;
+};
+int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
+ struct ib_client_nl_info *res);
+
enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_SET,
IB_CACHE_GID_DEFAULT_MODE_DELETE
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
new file mode 100644
index 000000000000..01faef7bc061
--- /dev/null
+++ b/drivers/infiniband/core/counters.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ */
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_counter.h>
+
+#include "core_priv.h"
+#include "restrack.h"
+
+#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE)
+
+static int __counter_set_mode(struct rdma_counter_mode *curr,
+ enum rdma_nl_counter_mode new_mode,
+ enum rdma_nl_counter_mask new_mask)
+{
+ if ((new_mode == RDMA_COUNTER_MODE_AUTO) &&
+ ((new_mask & (~ALL_AUTO_MODE_MASKS)) ||
+ (curr->mode != RDMA_COUNTER_MODE_NONE)))
+ return -EINVAL;
+
+ curr->mode = new_mode;
+ curr->mask = new_mask;
+ return 0;
+}
+
+/**
+ * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
+ *
+ * When @on is true, the @mask must be set; When @on is false, it goes
+ * into manual mode if there's any counter, so that the user is able to
+ * manually access them.
+ */
+int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
+ bool on, enum rdma_nl_counter_mask mask)
+{
+ struct rdma_port_counter *port_counter;
+ int ret;
+
+ port_counter = &dev->port_data[port].port_counter;
+ mutex_lock(&port_counter->lock);
+ if (on) {
+ ret = __counter_set_mode(&port_counter->mode,
+ RDMA_COUNTER_MODE_AUTO, mask);
+ } else {
+ if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (port_counter->num_counters)
+ ret = __counter_set_mode(&port_counter->mode,
+ RDMA_COUNTER_MODE_MANUAL, 0);
+ else
+ ret = __counter_set_mode(&port_counter->mode,
+ RDMA_COUNTER_MODE_NONE, 0);
+ }
+
+out:
+ mutex_unlock(&port_counter->lock);
+ return ret;
+}
+
+static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
+ enum rdma_nl_counter_mode mode)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter;
+ int ret;
+
+ if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
+ return NULL;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return NULL;
+
+ counter->device = dev;
+ counter->port = port;
+ counter->res.type = RDMA_RESTRACK_COUNTER;
+ counter->stats = dev->ops.counter_alloc_stats(counter);
+ if (!counter->stats)
+ goto err_stats;
+
+ port_counter = &dev->port_data[port].port_counter;
+ mutex_lock(&port_counter->lock);
+ if (mode == RDMA_COUNTER_MODE_MANUAL) {
+ ret = __counter_set_mode(&port_counter->mode,
+ RDMA_COUNTER_MODE_MANUAL, 0);
+ if (ret)
+ goto err_mode;
+ }
+
+ port_counter->num_counters++;
+ mutex_unlock(&port_counter->lock);
+
+ counter->mode.mode = mode;
+ kref_init(&counter->kref);
+ mutex_init(&counter->lock);
+
+ return counter;
+
+err_mode:
+ mutex_unlock(&port_counter->lock);
+ kfree(counter->stats);
+err_stats:
+ kfree(counter);
+ return NULL;
+}
+
+static void rdma_counter_free(struct rdma_counter *counter)
+{
+ struct rdma_port_counter *port_counter;
+
+ port_counter = &counter->device->port_data[counter->port].port_counter;
+ mutex_lock(&port_counter->lock);
+ port_counter->num_counters--;
+ if (!port_counter->num_counters &&
+ (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
+ __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE,
+ 0);
+
+ mutex_unlock(&port_counter->lock);
+
+ rdma_restrack_del(&counter->res);
+ kfree(counter->stats);
+ kfree(counter);
+}
+
+static void auto_mode_init_counter(struct rdma_counter *counter,
+ const struct ib_qp *qp,
+ enum rdma_nl_counter_mask new_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+
+ counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
+ counter->mode.mask = new_mask;
+
+ if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ param->qp_type = qp->qp_type;
+}
+
+static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
+ enum rdma_nl_counter_mask auto_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+ bool match = true;
+
+ if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res))
+ return false;
+
+ /* Ensure that counter belong to right PID */
+ if (!rdma_is_kernel_res(&counter->res) &&
+ !rdma_is_kernel_res(&qp->res) &&
+ (task_pid_vnr(counter->res.task) != current->pid))
+ return false;
+
+ if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ match &= (param->qp_type == qp->qp_type);
+
+ return match;
+}
+
+static int __rdma_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ int ret;
+
+ if (qp->counter)
+ return -EINVAL;
+
+ if (!qp->device->ops.counter_bind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_bind_qp(counter, qp);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+static int __rdma_counter_unbind_qp(struct ib_qp *qp)
+{
+ struct rdma_counter *counter = qp->counter;
+ int ret;
+
+ if (!qp->device->ops.counter_unbind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_unbind_qp(qp);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+static void counter_history_stat_update(const struct rdma_counter *counter)
+{
+ struct ib_device *dev = counter->device;
+ struct rdma_port_counter *port_counter;
+ int i;
+
+ port_counter = &dev->port_data[counter->port].port_counter;
+ if (!port_counter->hstats)
+ return;
+
+ for (i = 0; i < counter->stats->num_counters; i++)
+ port_counter->hstats->value[i] += counter->stats->value[i];
+}
+
+/**
+ * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
+ * with in auto mode
+ *
+ * Return: The counter (with ref-count increased) if found
+ */
+static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
+ u8 port)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter = NULL;
+ struct ib_device *dev = qp->device;
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ unsigned long id = 0;
+
+ port_counter = &dev->port_data[port].port_counter;
+ rt = &dev->res[RDMA_RESTRACK_COUNTER];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_is_visible_in_pid_ns(res))
+ continue;
+
+ counter = container_of(res, struct rdma_counter, res);
+ if ((counter->device != qp->device) || (counter->port != port))
+ goto next;
+
+ if (auto_mode_match(qp, counter, port_counter->mode.mask))
+ break;
+next:
+ counter = NULL;
+ }
+
+ if (counter && !kref_get_unless_zero(&counter->kref))
+ counter = NULL;
+
+ xa_unlock(&rt->xa);
+ return counter;
+}
+
+static void rdma_counter_res_add(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ if (rdma_is_kernel_res(&qp->res)) {
+ rdma_restrack_set_task(&counter->res, qp->res.kern_name);
+ rdma_restrack_kadd(&counter->res);
+ } else {
+ rdma_restrack_attach_task(&counter->res, qp->res.task);
+ rdma_restrack_uadd(&counter->res);
+ }
+}
+
+static void counter_release(struct kref *kref)
+{
+ struct rdma_counter *counter;
+
+ counter = container_of(kref, struct rdma_counter, kref);
+ counter_history_stat_update(counter);
+ counter->device->ops.counter_dealloc(counter);
+ rdma_counter_free(counter);
+}
+
+/**
+ * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
+ * the auto-mode rule
+ */
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
+{
+ struct rdma_port_counter *port_counter;
+ struct ib_device *dev = qp->device;
+ struct rdma_counter *counter;
+ int ret;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
+ return 0;
+
+ counter = rdma_get_counter_auto_mode(qp, port);
+ if (counter) {
+ ret = __rdma_counter_bind_qp(counter, qp);
+ if (ret) {
+ kref_put(&counter->kref, counter_release);
+ return ret;
+ }
+ } else {
+ counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO);
+ if (!counter)
+ return -ENOMEM;
+
+ auto_mode_init_counter(counter, qp, port_counter->mode.mask);
+
+ ret = __rdma_counter_bind_qp(counter, qp);
+ if (ret) {
+ rdma_counter_free(counter);
+ return ret;
+ }
+
+ rdma_counter_res_add(counter, qp);
+ }
+
+ return 0;
+}
+
+/**
+ * rdma_counter_unbind_qp - Unbind a qp from a counter
+ * @force:
+ * true - Decrease the counter ref-count anyway (e.g., qp destroy)
+ */
+int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
+{
+ struct rdma_counter *counter = qp->counter;
+ int ret;
+
+ if (!counter)
+ return -EINVAL;
+
+ ret = __rdma_counter_unbind_qp(qp);
+ if (ret && !force)
+ return ret;
+
+ kref_put(&counter->kref, counter_release);
+ return 0;
+}
+
+int rdma_counter_query_stats(struct rdma_counter *counter)
+{
+ struct ib_device *dev = counter->device;
+ int ret;
+
+ if (!dev->ops.counter_update_stats)
+ return -EINVAL;
+
+ mutex_lock(&counter->lock);
+ ret = dev->ops.counter_update_stats(counter);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
+ u8 port, u32 index)
+{
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct rdma_counter *counter;
+ unsigned long id = 0;
+ u64 sum = 0;
+
+ rt = &dev->res[RDMA_RESTRACK_COUNTER];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_restrack_get(res))
+ continue;
+
+ xa_unlock(&rt->xa);
+
+ counter = container_of(res, struct rdma_counter, res);
+ if ((counter->device != dev) || (counter->port != port) ||
+ rdma_counter_query_stats(counter))
+ goto next;
+
+ sum += counter->stats->value[index];
+
+next:
+ xa_lock(&rt->xa);
+ rdma_restrack_put(res);
+ }
+
+ xa_unlock(&rt->xa);
+ return sum;
+}
+
+/**
+ * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
+ * specific port, including the running ones and history data
+ */
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index)
+{
+ struct rdma_port_counter *port_counter;
+ u64 sum;
+
+ port_counter = &dev->port_data[port].port_counter;
+ sum = get_running_counters_hwstat_sum(dev, port, index);
+ sum += port_counter->hstats->value[index];
+
+ return sum;
+}
+
+static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
+{
+ struct rdma_restrack_entry *res = NULL;
+ struct ib_qp *qp = NULL;
+
+ res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num);
+ if (IS_ERR(res))
+ return NULL;
+
+ if (!rdma_is_visible_in_pid_ns(res))
+ goto err;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
+ goto err;
+
+ return qp;
+
+err:
+ rdma_restrack_put(&qp->res);
+ return NULL;
+}
+
+static int rdma_counter_bind_qp_manual(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ if ((counter->device != qp->device) || (counter->port != qp->port))
+ return -EINVAL;
+
+ return __rdma_counter_bind_qp(counter, qp);
+}
+
+static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
+ u32 counter_id)
+{
+ struct rdma_restrack_entry *res;
+ struct rdma_counter *counter;
+
+ res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id);
+ if (IS_ERR(res))
+ return NULL;
+
+ if (!rdma_is_visible_in_pid_ns(res)) {
+ rdma_restrack_put(res);
+ return NULL;
+ }
+
+ counter = container_of(res, struct rdma_counter, res);
+ kref_get(&counter->kref);
+ rdma_restrack_put(res);
+
+ return counter;
+}
+
+/**
+ * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
+ */
+int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
+ u32 qp_num, u32 counter_id)
+{
+ struct rdma_counter *counter;
+ struct ib_qp *qp;
+ int ret;
+
+ qp = rdma_counter_get_qp(dev, qp_num);
+ if (!qp)
+ return -ENOENT;
+
+ counter = rdma_get_counter_by_id(dev, counter_id);
+ if (!counter) {
+ ret = -ENOENT;
+ goto err;
+ }
+
+ if (counter->res.task != qp->res.task) {
+ ret = -EINVAL;
+ goto err_task;
+ }
+
+ ret = rdma_counter_bind_qp_manual(counter, qp);
+ if (ret)
+ goto err_task;
+
+ rdma_restrack_put(&qp->res);
+ return 0;
+
+err_task:
+ kref_put(&counter->kref, counter_release);
+err:
+ rdma_restrack_put(&qp->res);
+ return ret;
+}
+
+/**
+ * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
+ * The id of new counter is returned in @counter_id
+ */
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
+ u32 qp_num, u32 *counter_id)
+{
+ struct rdma_counter *counter;
+ struct ib_qp *qp;
+ int ret;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ qp = rdma_counter_get_qp(dev, qp_num);
+ if (!qp)
+ return -ENOENT;
+
+ if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL);
+ if (!counter) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = rdma_counter_bind_qp_manual(counter, qp);
+ if (ret)
+ goto err_bind;
+
+ if (counter_id)
+ *counter_id = counter->id;
+
+ rdma_counter_res_add(counter, qp);
+
+ rdma_restrack_put(&qp->res);
+ return ret;
+
+err_bind:
+ rdma_counter_free(counter);
+err:
+ rdma_restrack_put(&qp->res);
+ return ret;
+}
+
+/**
+ * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
+ */
+int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
+ u32 qp_num, u32 counter_id)
+{
+ struct rdma_port_counter *port_counter;
+ struct ib_qp *qp;
+ int ret;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ qp = rdma_counter_get_qp(dev, qp_num);
+ if (!qp)
+ return -ENOENT;
+
+ if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (!qp->counter || qp->counter->id != counter_id ||
+ port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = rdma_counter_unbind_qp(qp, false);
+
+out:
+ rdma_restrack_put(&qp->res);
+ return ret;
+}
+
+int rdma_counter_get_mode(struct ib_device *dev, u8 port,
+ enum rdma_nl_counter_mode *mode,
+ enum rdma_nl_counter_mask *mask)
+{
+ struct rdma_port_counter *port_counter;
+
+ port_counter = &dev->port_data[port].port_counter;
+ *mode = port_counter->mode.mode;
+ *mask = port_counter->mode.mask;
+
+ return 0;
+}
+
+void rdma_counter_init(struct ib_device *dev)
+{
+ struct rdma_port_counter *port_counter;
+ u32 port;
+
+ if (!dev->ops.alloc_hw_stats || !dev->port_data)
+ return;
+
+ rdma_for_each_port(dev, port) {
+ port_counter = &dev->port_data[port].port_counter;
+ port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
+ mutex_init(&port_counter->lock);
+
+ port_counter->hstats = dev->ops.alloc_hw_stats(dev, port);
+ if (!port_counter->hstats)
+ goto fail;
+ }
+
+ return;
+
+fail:
+ rdma_for_each_port(dev, port) {
+ port_counter = &dev->port_data[port].port_counter;
+ kfree(port_counter->hstats);
+ port_counter->hstats = NULL;
+ }
+
+ return;
+}
+
+void rdma_counter_release(struct ib_device *dev)
+{
+ struct rdma_port_counter *port_counter;
+ u32 port;
+
+ if (!dev->ops.alloc_hw_stats)
+ return;
+
+ rdma_for_each_port(dev, port) {
+ port_counter = &dev->port_data[port].port_counter;
+ kfree(port_counter->hstats);
+ }
+}
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a24c900fbdf6..7c599878ccf7 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -18,6 +18,53 @@
#define IB_POLL_FLAGS \
(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
+static const struct dim_cq_moder
+rdma_dim_prof[RDMA_DIM_PARAMS_NUM_PROFILES] = {
+ {1, 0, 1, 0},
+ {1, 0, 4, 0},
+ {2, 0, 4, 0},
+ {2, 0, 8, 0},
+ {4, 0, 8, 0},
+ {16, 0, 8, 0},
+ {16, 0, 16, 0},
+ {32, 0, 16, 0},
+ {32, 0, 32, 0},
+};
+
+static void ib_cq_rdma_dim_work(struct work_struct *w)
+{
+ struct dim *dim = container_of(w, struct dim, work);
+ struct ib_cq *cq = dim->priv;
+
+ u16 usec = rdma_dim_prof[dim->profile_ix].usec;
+ u16 comps = rdma_dim_prof[dim->profile_ix].comps;
+
+ dim->state = DIM_START_MEASURE;
+
+ cq->device->ops.modify_cq(cq, comps, usec);
+}
+
+static void rdma_dim_init(struct ib_cq *cq)
+{
+ struct dim *dim;
+
+ if (!cq->device->ops.modify_cq || !cq->device->use_cq_dim ||
+ cq->poll_ctx == IB_POLL_DIRECT)
+ return;
+
+ dim = kzalloc(sizeof(struct dim), GFP_KERNEL);
+ if (!dim)
+ return;
+
+ dim->state = DIM_START_MEASURE;
+ dim->tune_state = DIM_GOING_RIGHT;
+ dim->profile_ix = RDMA_DIM_START_PROFILE;
+ dim->priv = cq;
+ cq->dim = dim;
+
+ INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
+}
+
static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
int batch)
{
@@ -78,6 +125,7 @@ static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
static int ib_poll_handler(struct irq_poll *iop, int budget)
{
struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+ struct dim *dim = cq->dim;
int completed;
completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
@@ -87,6 +135,9 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
irq_poll_sched(&cq->iop);
}
+ if (dim)
+ rdma_dim(dim, completed);
+
return completed;
}
@@ -105,6 +156,8 @@ static void ib_cq_poll_work(struct work_struct *work)
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
queue_work(cq->comp_wq, &cq->work);
+ else if (cq->dim)
+ rdma_dim(cq->dim, completed);
}
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
@@ -113,7 +166,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
}
/**
- * __ib_alloc_cq - allocate a completion queue
+ * __ib_alloc_cq_user - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
@@ -139,25 +192,30 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
struct ib_cq *cq;
int ret = -ENOMEM;
- cq = dev->ops.create_cq(dev, &cq_attr, NULL);
- if (IS_ERR(cq))
- return cq;
+ cq = rdma_zalloc_drv_obj(dev, ib_cq);
+ if (!cq)
+ return ERR_PTR(ret);
cq->device = dev;
- cq->uobject = NULL;
- cq->event_handler = NULL;
cq->cq_context = private;
cq->poll_ctx = poll_ctx;
atomic_set(&cq->usecnt, 0);
cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
if (!cq->wc)
- goto out_destroy_cq;
+ goto out_free_cq;
cq->res.type = RDMA_RESTRACK_CQ;
rdma_restrack_set_task(&cq->res, caller);
+
+ ret = dev->ops.create_cq(cq, &cq_attr, NULL);
+ if (ret)
+ goto out_free_wc;
+
rdma_restrack_kadd(&cq->res);
+ rdma_dim_init(cq);
+
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
cq->comp_handler = ib_cq_completion_direct;
@@ -178,29 +236,29 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
break;
default:
ret = -EINVAL;
- goto out_free_wc;
+ goto out_destroy_cq;
}
return cq;
-out_free_wc:
- kfree(cq->wc);
- rdma_restrack_del(&cq->res);
out_destroy_cq:
+ rdma_restrack_del(&cq->res);
cq->device->ops.destroy_cq(cq, udata);
+out_free_wc:
+ kfree(cq->wc);
+out_free_cq:
+ kfree(cq);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(__ib_alloc_cq_user);
/**
- * ib_free_cq - free a completion queue
+ * ib_free_cq_user - free a completion queue
* @cq: completion queue to free.
* @udata: User data or NULL for kernel object
*/
void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
- int ret;
-
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
@@ -218,9 +276,12 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
WARN_ON_ONCE(1);
}
- kfree(cq->wc);
rdma_restrack_del(&cq->res);
- ret = cq->device->ops.destroy_cq(cq, udata);
- WARN_ON_ONCE(ret);
+ cq->device->ops.destroy_cq(cq, udata);
+ if (cq->dim)
+ cancel_work_sync(&cq->dim->work);
+ kfree(cq->dim);
+ kfree(cq->wc);
+ kfree(cq);
}
EXPORT_SYMBOL(ib_free_cq_user);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 3352a107b4a3..9773145dee09 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -46,6 +46,7 @@
#include <rdma/rdma_netlink.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
+#include <rdma/rdma_counter.h>
#include "core_priv.h"
#include "restrack.h"
@@ -270,7 +271,7 @@ struct ib_port_data_rcu {
struct ib_port_data pdata[];
};
-static int ib_device_check_mandatory(struct ib_device *device)
+static void ib_device_check_mandatory(struct ib_device *device)
{
#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
static const struct {
@@ -305,8 +306,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
break;
}
}
-
- return 0;
}
/*
@@ -375,7 +374,7 @@ struct ib_device *ib_device_get_by_name(const char *name,
down_read(&devices_rwsem);
device = __ib_device_get_by_name(name);
if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
- device->driver_id != driver_id)
+ device->ops.driver_id != driver_id)
device = NULL;
if (device) {
@@ -449,6 +448,15 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
return 0;
}
+int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim)
+{
+ if (use_dim > 1)
+ return -EINVAL;
+ ibdev->use_cq_dim = use_dim;
+
+ return 0;
+}
+
static int alloc_name(struct ib_device *ibdev, const char *name)
{
struct ib_device *device;
@@ -494,10 +502,12 @@ static void ib_device_release(struct device *device)
if (dev->port_data) {
ib_cache_release_one(dev);
ib_security_release_port_pkey_list(dev);
+ rdma_counter_release(dev);
kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
pdata[0]),
rcu_head);
}
+
xa_destroy(&dev->compat_devs);
xa_destroy(&dev->client_data);
kfree_rcu(dev, rcu_head);
@@ -1193,10 +1203,7 @@ static int setup_device(struct ib_device *device)
int ret;
setup_dma_device(device);
-
- ret = ib_device_check_mandatory(device);
- if (ret)
- return ret;
+ ib_device_check_mandatory(device);
ret = setup_port_data(device);
if (ret) {
@@ -1321,6 +1328,8 @@ int ib_register_device(struct ib_device *device, const char *name)
ib_device_register_rdmacg(device);
+ rdma_counter_init(device);
+
/*
* Ensure that ADD uevent is not fired because it
* is too early amd device is not initialized yet.
@@ -1479,7 +1488,7 @@ void ib_unregister_driver(enum rdma_driver_id driver_id)
down_read(&devices_rwsem);
xa_for_each (&devices, index, ib_dev) {
- if (ib_dev->driver_id != driver_id)
+ if (ib_dev->ops.driver_id != driver_id)
continue;
get_device(&ib_dev->dev);
@@ -1749,6 +1758,104 @@ void ib_unregister_client(struct ib_client *client)
}
EXPORT_SYMBOL(ib_unregister_client);
+static int __ib_get_global_client_nl_info(const char *client_name,
+ struct ib_client_nl_info *res)
+{
+ struct ib_client *client;
+ unsigned long index;
+ int ret = -ENOENT;
+
+ down_read(&clients_rwsem);
+ xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
+ if (strcmp(client->name, client_name) != 0)
+ continue;
+ if (!client->get_global_nl_info) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ ret = client->get_global_nl_info(res);
+ if (WARN_ON(ret == -ENOENT))
+ ret = -EINVAL;
+ if (!ret && res->cdev)
+ get_device(res->cdev);
+ break;
+ }
+ up_read(&clients_rwsem);
+ return ret;
+}
+
+static int __ib_get_client_nl_info(struct ib_device *ibdev,
+ const char *client_name,
+ struct ib_client_nl_info *res)
+{
+ unsigned long index;
+ void *client_data;
+ int ret = -ENOENT;
+
+ down_read(&ibdev->client_data_rwsem);
+ xan_for_each_marked (&ibdev->client_data, index, client_data,
+ CLIENT_DATA_REGISTERED) {
+ struct ib_client *client = xa_load(&clients, index);
+
+ if (!client || strcmp(client->name, client_name) != 0)
+ continue;
+ if (!client->get_nl_info) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ ret = client->get_nl_info(ibdev, client_data, res);
+ if (WARN_ON(ret == -ENOENT))
+ ret = -EINVAL;
+
+ /*
+ * The cdev is guaranteed valid as long as we are inside the
+ * client_data_rwsem as remove_one can't be called. Keep it
+ * valid for the caller.
+ */
+ if (!ret && res->cdev)
+ get_device(res->cdev);
+ break;
+ }
+ up_read(&ibdev->client_data_rwsem);
+
+ return ret;
+}
+
+/**
+ * ib_get_client_nl_info - Fetch the nl_info from a client
+ * @device - IB device
+ * @client_name - Name of the client
+ * @res - Result of the query
+ */
+int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
+ struct ib_client_nl_info *res)
+{
+ int ret;
+
+ if (ibdev)
+ ret = __ib_get_client_nl_info(ibdev, client_name, res);
+ else
+ ret = __ib_get_global_client_nl_info(client_name, res);
+#ifdef CONFIG_MODULES
+ if (ret == -ENOENT) {
+ request_module("rdma-client-%s", client_name);
+ if (ibdev)
+ ret = __ib_get_client_nl_info(ibdev, client_name, res);
+ else
+ ret = __ib_get_global_client_nl_info(client_name, res);
+ }
+#endif
+ if (ret) {
+ if (ret == -ENOENT)
+ return -EOPNOTSUPP;
+ return ret;
+ }
+
+ if (WARN_ON(!res->cdev))
+ return -EINVAL;
+ return 0;
+}
+
/**
* ib_set_client_data - Set IB client context
* @device:Device to set context for
@@ -2039,7 +2146,7 @@ struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
(uintptr_t)ndev) {
if (rcu_access_pointer(cur->netdev) == ndev &&
(driver_id == RDMA_DRIVER_UNKNOWN ||
- cur->ib_dev->driver_id == driver_id) &&
+ cur->ib_dev->ops.driver_id == driver_id) &&
ib_device_try_get(cur->ib_dev)) {
res = cur->ib_dev;
break;
@@ -2344,12 +2451,28 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
+ if (ops->driver_id != RDMA_DRIVER_UNKNOWN) {
+ WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN &&
+ dev_ops->driver_id != ops->driver_id);
+ dev_ops->driver_id = ops->driver_id;
+ }
+ if (ops->owner) {
+ WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner);
+ dev_ops->owner = ops->owner;
+ }
+ if (ops->uverbs_abi_ver)
+ dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver;
+
+ dev_ops->uverbs_no_driver_id_binding |=
+ ops->uverbs_no_driver_id_binding;
+
SET_DEVICE_OP(dev_ops, add_gid);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
SET_DEVICE_OP(dev_ops, alloc_fmr);
SET_DEVICE_OP(dev_ops, alloc_hw_stats);
SET_DEVICE_OP(dev_ops, alloc_mr);
+ SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
SET_DEVICE_OP(dev_ops, alloc_mw);
SET_DEVICE_OP(dev_ops, alloc_pd);
SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
@@ -2357,6 +2480,11 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, alloc_xrcd);
SET_DEVICE_OP(dev_ops, attach_mcast);
SET_DEVICE_OP(dev_ops, check_mr_status);
+ SET_DEVICE_OP(dev_ops, counter_alloc_stats);
+ SET_DEVICE_OP(dev_ops, counter_bind_qp);
+ SET_DEVICE_OP(dev_ops, counter_dealloc);
+ SET_DEVICE_OP(dev_ops, counter_unbind_qp);
+ SET_DEVICE_OP(dev_ops, counter_update_stats);
SET_DEVICE_OP(dev_ops, create_ah);
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
@@ -2409,6 +2537,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, iw_reject);
SET_DEVICE_OP(dev_ops, iw_rem_ref);
SET_DEVICE_OP(dev_ops, map_mr_sg);
+ SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
SET_DEVICE_OP(dev_ops, map_phys_fmr);
SET_DEVICE_OP(dev_ops, mmap);
SET_DEVICE_OP(dev_ops, modify_ah);
@@ -2445,6 +2574,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, unmap_fmr);
SET_OBJ_SIZE(dev_ops, ib_ah);
+ SET_OBJ_SIZE(dev_ops, ib_cq);
SET_OBJ_SIZE(dev_ops, ib_pd);
SET_OBJ_SIZE(dev_ops, ib_srq);
SET_OBJ_SIZE(dev_ops, ib_ucontext);
diff --git a/drivers/infiniband/core/mr_pool.c b/drivers/infiniband/core/mr_pool.c
index d117f21ce9fd..c0e2df128b34 100644
--- a/drivers/infiniband/core/mr_pool.c
+++ b/drivers/infiniband/core/mr_pool.c
@@ -34,14 +34,18 @@ void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr)
EXPORT_SYMBOL(ib_mr_pool_put);
int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
- enum ib_mr_type type, u32 max_num_sg)
+ enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg)
{
struct ib_mr *mr;
unsigned long flags;
int ret, i;
for (i = 0; i < nr; i++) {
- mr = ib_alloc_mr(qp->pd, type, max_num_sg);
+ if (type == IB_MR_TYPE_INTEGRITY)
+ mr = ib_alloc_mr_integrity(qp->pd, max_num_sg,
+ max_num_meta_sg);
+ else
+ mr = ib_alloc_mr(qp->pd, type, max_num_sg);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto out;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 69188cbbd99b..783e465e7c41 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -42,84 +42,105 @@
#include "cma_priv.h"
#include "restrack.h"
+/*
+ * Sort array elements by the netlink attribute name
+ */
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
- [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
- .len = IB_DEVICE_NAME_MAX - 1},
- [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
- .len = IB_FW_VERSION_NAME_MAX - 1},
- [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
- .len = 16 },
- [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
- .len = TASK_COMM_LEN },
+ [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
+ [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IB_DEVICE_NAME_MAX },
+ [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
+ [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
+ [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ },
+ [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ },
+ [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
- .len = sizeof(struct __kernel_sockaddr_storage) },
- [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
- .len = sizeof(struct __kernel_sockaddr_storage) },
[RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
[RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
- .len = IFNAMSIZ },
- [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
- .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
- [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
- [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
- [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
- .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
- [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
- .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
+ [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
[RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
+ [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -232,6 +253,8 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
return -EMSGSIZE;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
+ return -EMSGSIZE;
/*
* Link type is determined on first port and mlx4 device
@@ -532,6 +555,9 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
+ goto err;
+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
goto err;
if (!rdma_is_kernel_res(res) &&
@@ -623,6 +649,152 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
err: return -EMSGSIZE;
}
+static int fill_stat_counter_mode(struct sk_buff *msg,
+ struct rdma_counter *counter)
+{
+ struct rdma_counter_mode *m = &counter->mode;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
+ return -EMSGSIZE;
+
+ if (m->mode == RDMA_COUNTER_MODE_AUTO)
+ if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_stat_counter_qps(struct sk_buff *msg,
+ struct rdma_counter *counter)
+{
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct nlattr *table_attr;
+ struct ib_qp *qp = NULL;
+ unsigned long id = 0;
+ int ret = 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
+
+ rt = &counter->device->res[RDMA_RESTRACK_QP];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_is_visible_in_pid_ns(res))
+ continue;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
+ continue;
+
+ if (!qp->counter || (qp->counter->id != counter->id))
+ continue;
+
+ ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
+ if (ret)
+ goto err;
+ }
+
+ xa_unlock(&rt->xa);
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ xa_unlock(&rt->xa);
+ nla_nest_cancel(msg, table_attr);
+ return ret;
+}
+
+static int fill_stat_hwcounter_entry(struct sk_buff *msg,
+ const char *name, u64 value)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
+ name))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
+ value, RDMA_NLDEV_ATTR_PAD))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_stat_counter_hwcounters(struct sk_buff *msg,
+ struct rdma_counter *counter)
+{
+ struct rdma_hw_stats *st = counter->stats;
+ struct nlattr *table_attr;
+ int i;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i < st->num_counters; i++)
+ if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res,
+ uint32_t port)
+{
+ struct rdma_counter *counter =
+ container_of(res, struct rdma_counter, res);
+
+ if (port && port != counter->port)
+ return 0;
+
+ /* Dump it even query failed */
+ rdma_counter_query_stats(counter);
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
+ fill_res_name_pid(msg, &counter->res) ||
+ fill_stat_counter_mode(msg, counter) ||
+ fill_stat_counter_qps(msg, counter) ||
+ fill_stat_counter_hwcounters(msg, counter))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -704,6 +876,14 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto put_done;
}
+ if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
+ u8 use_dim;
+
+ use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
+ err = ib_device_set_dim(device, use_dim);
+ goto done;
+ }
+
done:
ib_device_put(device);
put_done:
@@ -990,19 +1170,15 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_PDN,
},
+ [RDMA_RESTRACK_COUNTER] = {
+ .fill_res_func = fill_res_counter_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
+ .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
+ .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
+ },
};
-static bool is_visible_in_pid_ns(struct rdma_restrack_entry *res)
-{
- /*
- * 1. Kern resources should be visible in init name space only
- * 2. Present only resources visible in the current namespace
- */
- if (rdma_is_kernel_res(res))
- return task_active_pid_ns(current) == &init_pid_ns;
- return task_active_pid_ns(current) == task_active_pid_ns(res->task);
-}
-
static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack,
enum rdma_restrack_type res_type)
@@ -1047,7 +1223,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err;
}
- if (!is_visible_in_pid_ns(res)) {
+ if (!rdma_is_visible_in_pid_ns(res)) {
ret = -ENOENT;
goto err_get;
}
@@ -1159,7 +1335,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
* objects.
*/
xa_for_each(&rt->xa, id, res) {
- if (!is_visible_in_pid_ns(res))
+ if (!rdma_is_visible_in_pid_ns(res))
continue;
if (idx < start || !rdma_restrack_get(res))
@@ -1237,6 +1413,7 @@ RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
+RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
static LIST_HEAD(link_ops);
static DECLARE_RWSEM(link_ops_rwsem);
@@ -1299,7 +1476,7 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
sizeof(ndev_name));
- ndev = dev_get_by_name(&init_net, ndev_name);
+ ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
if (!ndev)
return -ENODEV;
@@ -1347,6 +1524,90 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
+ struct ib_client_nl_info data = {};
+ struct ib_device *ibdev = NULL;
+ struct sk_buff *msg;
+ u32 index;
+ int err;
+
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+ extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
+ return -EINVAL;
+
+ nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
+ sizeof(client_name));
+
+ if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!ibdev)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(ibdev, data.port)) {
+ err = -EINVAL;
+ goto out_put;
+ }
+ } else {
+ data.port = -1;
+ }
+ } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ return -EINVAL;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_GET_CHARDEV),
+ 0, 0);
+
+ data.nl_msg = msg;
+ err = ib_get_client_nl_info(ibdev, client_name, &data);
+ if (err)
+ goto out_nlmsg;
+
+ err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
+ huge_encode_dev(data.cdev->devt),
+ RDMA_NLDEV_ATTR_PAD);
+ if (err)
+ goto out_data;
+ err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
+ RDMA_NLDEV_ATTR_PAD);
+ if (err)
+ goto out_data;
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
+ dev_name(data.cdev))) {
+ err = -EMSGSIZE;
+ goto out_data;
+ }
+
+ nlmsg_end(msg, nlh);
+ put_device(data.cdev);
+ if (ibdev)
+ ib_device_put(ibdev);
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+out_data:
+ put_device(data.cdev);
+out_nlmsg:
+ nlmsg_free(msg);
+out_put:
+ if (ibdev)
+ ib_device_put(ibdev);
+ return err;
+}
+
static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -1399,11 +1660,375 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
+static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ u32 index, port, mode, mask = 0, qpn, cntn = 0;
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ /* Currently only counter for QP is supported */
+ if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
+ return -EINVAL;
+
+ if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_SET),
+ 0, 0);
+
+ mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
+ if (mode == RDMA_COUNTER_MODE_AUTO) {
+ if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
+ mask = nla_get_u32(
+ tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
+
+ ret = rdma_counter_set_auto_mode(device, port,
+ mask ? true : false, mask);
+ if (ret)
+ goto err_msg;
+ } else {
+ qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
+ if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
+ cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+ ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
+ } else {
+ ret = rdma_counter_bind_qpn_alloc(device, port,
+ qpn, &cntn);
+ }
+ if (ret)
+ goto err_msg;
+
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+ }
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_fill:
+ rdma_counter_unbind_qpn(device, port, qpn, cntn);
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index, port, qpn, cntn;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
+ !tb[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_SET),
+ 0, 0);
+
+ cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+ qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
+ ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ if (ret)
+ goto err_unbind;
+
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_fill:
+ rdma_counter_bind_qpn(device, port, qpn, cntn);
+err_unbind:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int stat_get_doit_default_counter(struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct nlattr *tb[])
+{
+ struct rdma_hw_stats *stats;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int ret, num_cnts, i;
+ struct sk_buff *msg;
+ u32 index, port;
+ u64 v;
+
+ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_GET),
+ 0, 0);
+
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ stats = device->port_data ? device->port_data[port].hw_stats : NULL;
+ if (stats == NULL) {
+ ret = -EINVAL;
+ goto err_msg;
+ }
+ mutex_lock(&stats->lock);
+
+ num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
+ if (num_cnts < 0) {
+ ret = -EINVAL;
+ goto err_stats;
+ }
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err_stats;
+ }
+ for (i = 0; i < num_cnts; i++) {
+ v = stats->value[i] +
+ rdma_counter_get_hwstat_value(device, port, i);
+ if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) {
+ ret = -EMSGSIZE;
+ goto err_table;
+ }
+ }
+ nla_nest_end(msg, table_attr);
+
+ mutex_unlock(&stats->lock);
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_table:
+ nla_nest_cancel(msg, table_attr);
+err_stats:
+ mutex_unlock(&stats->lock);
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, struct nlattr *tb[])
+
+{
+ static enum rdma_nl_counter_mode mode;
+ static enum rdma_nl_counter_mask mask;
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index, port;
+ int ret;
+
+ if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
+ return nldev_res_get_counter_doit(skb, nlh, extack);
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_GET),
+ 0, 0);
+
+ ret = rdma_counter_get_mode(device, port, &mode, &mask);
+ if (ret)
+ goto err_msg;
+
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode))
+ goto err_msg;
+
+ if ((mode == RDMA_COUNTER_MODE_AUTO) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask))
+ goto err_msg;
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret)
+ return -EINVAL;
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
+ return stat_get_doit_default_counter(skb, nlh, extack, tb);
+
+ switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
+ case RDMA_NLDEV_ATTR_RES_QP:
+ ret = stat_get_doit_qp(skb, nlh, extack, tb);
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int nldev_stat_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ int ret;
+
+ ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NULL);
+ if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
+ return -EINVAL;
+
+ switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
+ case RDMA_NLDEV_ATTR_RES_QP:
+ ret = nldev_res_get_counter_dumpit(skb, cb);
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
.dump = nldev_get_dumpit,
},
+ [RDMA_NLDEV_CMD_GET_CHARDEV] = {
+ .doit = nldev_get_chardev,
+ },
[RDMA_NLDEV_CMD_SET] = {
.doit = nldev_set_doit,
.flags = RDMA_NL_ADMIN_PERM,
@@ -1449,6 +2074,17 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
},
[RDMA_NLDEV_CMD_SYS_SET] = {
.doit = nldev_set_sys_set_doit,
+ },
+ [RDMA_NLDEV_CMD_STAT_SET] = {
+ .doit = nldev_stat_set_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_STAT_GET] = {
+ .doit = nldev_stat_get_doit,
+ .dump = nldev_stat_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_STAT_DEL] = {
+ .doit = nldev_stat_del_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
};
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 3b5ff2f7b5f8..bddff426ee0f 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -6,6 +6,7 @@
#include <rdma/rdma_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/restrack.h>
+#include <rdma/rdma_counter.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
#include <linux/pid_namespace.h>
@@ -45,6 +46,7 @@ static const char *type2str(enum rdma_restrack_type type)
[RDMA_RESTRACK_CM_ID] = "CM_ID",
[RDMA_RESTRACK_MR] = "MR",
[RDMA_RESTRACK_CTX] = "CTX",
+ [RDMA_RESTRACK_COUNTER] = "COUNTER",
};
return names[type];
@@ -169,6 +171,8 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
return container_of(res, struct ib_mr, res)->device;
case RDMA_RESTRACK_CTX:
return container_of(res, struct ib_ucontext, res)->device;
+ case RDMA_RESTRACK_COUNTER:
+ return container_of(res, struct rdma_counter, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
@@ -190,6 +194,20 @@ void rdma_restrack_set_task(struct rdma_restrack_entry *res,
}
EXPORT_SYMBOL(rdma_restrack_set_task);
+/**
+ * rdma_restrack_attach_task() - attach the task onto this resource
+ * @res: resource entry
+ * @task: the task to attach, the current task will be used if it is NULL.
+ */
+void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
+ struct task_struct *task)
+{
+ if (res->task)
+ put_task_struct(res->task);
+ get_task_struct(task);
+ res->task = task;
+}
+
static void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
@@ -203,15 +221,22 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
kref_init(&res->kref);
init_completion(&res->comp);
- if (res->type != RDMA_RESTRACK_QP)
- ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
- &rt->next_id, GFP_KERNEL);
- else {
+ if (res->type == RDMA_RESTRACK_QP) {
/* Special case to ensure that LQPN points to right QP */
struct ib_qp *qp = container_of(res, struct ib_qp, res);
ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL);
res->id = ret ? 0 : qp->qp_num;
+ } else if (res->type == RDMA_RESTRACK_COUNTER) {
+ /* Special case to ensure that cntn points to right counter */
+ struct rdma_counter *counter;
+
+ counter = container_of(res, struct rdma_counter, res);
+ ret = xa_insert(&rt->xa, counter->id, res, GFP_KERNEL);
+ res->id = ret ? 0 : counter->id;
+ } else {
+ ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
+ &rt->next_id, GFP_KERNEL);
}
if (!ret)
@@ -237,7 +262,8 @@ EXPORT_SYMBOL(rdma_restrack_kadd);
*/
void rdma_restrack_uadd(struct rdma_restrack_entry *res)
{
- if (res->type != RDMA_RESTRACK_CM_ID)
+ if ((res->type != RDMA_RESTRACK_CM_ID) &&
+ (res->type != RDMA_RESTRACK_COUNTER))
res->task = NULL;
if (!res->task)
@@ -323,3 +349,16 @@ out:
}
}
EXPORT_SYMBOL(rdma_restrack_del);
+
+bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res)
+{
+ /*
+ * 1. Kern resources should be visible in init
+ * namespace only
+ * 2. Present only resources visible in the current
+ * namespace
+ */
+ if (rdma_is_kernel_res(res))
+ return task_active_pid_ns(current) == &init_pid_ns;
+ return task_active_pid_ns(current) == task_active_pid_ns(res->task);
+}
diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
index 09a1fbdf578e..7bd177cc0a61 100644
--- a/drivers/infiniband/core/restrack.h
+++ b/drivers/infiniband/core/restrack.h
@@ -25,4 +25,7 @@ struct rdma_restrack_root {
int rdma_restrack_init(struct ib_device *dev);
void rdma_restrack_clean(struct ib_device *dev);
+void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
+ struct task_struct *task);
+bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res);
#endif /* _RDMA_CORE_RESTRACK_H_ */
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 32ca8429eaae..dce06108c8c3 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -51,10 +51,34 @@ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
return false;
}
-static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev)
+static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev,
+ bool pi_support)
{
+ u32 max_pages;
+
+ if (pi_support)
+ max_pages = dev->attrs.max_pi_fast_reg_page_list_len;
+ else
+ max_pages = dev->attrs.max_fast_reg_page_list_len;
+
/* arbitrary limit to avoid allocating gigantic resources */
- return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256);
+ return min_t(u32, max_pages, 256);
+}
+
+static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg)
+{
+ int count = 0;
+
+ if (reg->mr->need_inval) {
+ reg->inv_wr.opcode = IB_WR_LOCAL_INV;
+ reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
+ reg->inv_wr.next = &reg->reg_wr.wr;
+ count++;
+ } else {
+ reg->inv_wr.next = NULL;
+ }
+
+ return count;
}
/* Caller must have zero-initialized *reg. */
@@ -62,7 +86,8 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
u32 sg_cnt, u32 offset)
{
- u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
+ qp->integrity_en);
u32 nents = min(sg_cnt, pages_per_mr);
int count = 0, ret;
@@ -70,14 +95,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
if (!reg->mr)
return -EAGAIN;
- if (reg->mr->need_inval) {
- reg->inv_wr.opcode = IB_WR_LOCAL_INV;
- reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
- reg->inv_wr.next = &reg->reg_wr.wr;
- count++;
- } else {
- reg->inv_wr.next = NULL;
- }
+ count += rdma_rw_inv_key(reg);
ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
if (ret < 0 || ret < nents) {
@@ -102,7 +120,8 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct rdma_rw_reg_ctx *prev = NULL;
- u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
+ qp->integrity_en);
int i, j, ret = 0, count = 0;
ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
@@ -343,13 +362,14 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct ib_device *dev = qp->pd->device;
- u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
+ qp->integrity_en);
struct ib_rdma_wr *rdma_wr;
- struct ib_send_wr *prev_wr = NULL;
int count = 0, ret;
if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
- pr_err("SG count too large\n");
+ pr_err("SG count too large: sg_cnt=%d, prot_sg_cnt=%d, pages_per_mr=%d\n",
+ sg_cnt, prot_sg_cnt, pages_per_mr);
return -EINVAL;
}
@@ -358,75 +378,58 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
return -ENOMEM;
sg_cnt = ret;
- ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
- if (!ret) {
- ret = -ENOMEM;
- goto out_unmap_sg;
+ if (prot_sg_cnt) {
+ ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
+ if (!ret) {
+ ret = -ENOMEM;
+ goto out_unmap_sg;
+ }
+ prot_sg_cnt = ret;
}
- prot_sg_cnt = ret;
ctx->type = RDMA_RW_SIG_MR;
ctx->nr_ops = 1;
- ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL);
- if (!ctx->sig) {
+ ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL);
+ if (!ctx->reg) {
ret = -ENOMEM;
goto out_unmap_prot_sg;
}
- ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0);
- if (ret < 0)
- goto out_free_ctx;
- count += ret;
- prev_wr = &ctx->sig->data.reg_wr.wr;
-
- ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot,
- prot_sg, prot_sg_cnt, 0);
- if (ret < 0)
- goto out_destroy_data_mr;
- count += ret;
-
- if (ctx->sig->prot.inv_wr.next)
- prev_wr->next = &ctx->sig->prot.inv_wr;
- else
- prev_wr->next = &ctx->sig->prot.reg_wr.wr;
- prev_wr = &ctx->sig->prot.reg_wr.wr;
-
- ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs);
- if (!ctx->sig->sig_mr) {
+ ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs);
+ if (!ctx->reg->mr) {
ret = -EAGAIN;
- goto out_destroy_prot_mr;
+ goto out_free_ctx;
}
- if (ctx->sig->sig_mr->need_inval) {
- memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr));
+ count += rdma_rw_inv_key(ctx->reg);
- ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV;
- ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey;
+ memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs));
- prev_wr->next = &ctx->sig->sig_inv_wr;
- prev_wr = &ctx->sig->sig_inv_wr;
+ ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg,
+ prot_sg_cnt, NULL, SZ_4K);
+ if (unlikely(ret)) {
+ pr_err("failed to map PI sg (%d)\n", sg_cnt + prot_sg_cnt);
+ goto out_destroy_sig_mr;
}
- ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
- ctx->sig->sig_wr.wr.wr_cqe = NULL;
- ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge;
- ctx->sig->sig_wr.wr.num_sge = 1;
- ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
- ctx->sig->sig_wr.sig_attrs = sig_attrs;
- ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr;
- if (prot_sg_cnt)
- ctx->sig->sig_wr.prot = &ctx->sig->prot.sge;
- prev_wr->next = &ctx->sig->sig_wr.wr;
- prev_wr = &ctx->sig->sig_wr.wr;
+ ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY;
+ ctx->reg->reg_wr.wr.wr_cqe = NULL;
+ ctx->reg->reg_wr.wr.num_sge = 0;
+ ctx->reg->reg_wr.wr.send_flags = 0;
+ ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
+ if (rdma_protocol_iwarp(qp->device, port_num))
+ ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
+ ctx->reg->reg_wr.mr = ctx->reg->mr;
+ ctx->reg->reg_wr.key = ctx->reg->mr->lkey;
count++;
- ctx->sig->sig_sge.addr = 0;
- ctx->sig->sig_sge.length = ctx->sig->data.sge.length;
- if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE)
- ctx->sig->sig_sge.length += ctx->sig->prot.sge.length;
+ ctx->reg->sge.addr = ctx->reg->mr->iova;
+ ctx->reg->sge.length = ctx->reg->mr->length;
+ if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE)
+ ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length;
- rdma_wr = &ctx->sig->data.wr;
- rdma_wr->wr.sg_list = &ctx->sig->sig_sge;
+ rdma_wr = &ctx->reg->wr;
+ rdma_wr->wr.sg_list = &ctx->reg->sge;
rdma_wr->wr.num_sge = 1;
rdma_wr->remote_addr = remote_addr;
rdma_wr->rkey = rkey;
@@ -434,21 +437,18 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
else
rdma_wr->wr.opcode = IB_WR_RDMA_READ;
- prev_wr->next = &rdma_wr->wr;
- prev_wr = &rdma_wr->wr;
+ ctx->reg->reg_wr.wr.next = &rdma_wr->wr;
count++;
return count;
-out_destroy_prot_mr:
- if (prot_sg_cnt)
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
-out_destroy_data_mr:
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
+out_destroy_sig_mr:
+ ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
out_free_ctx:
- kfree(ctx->sig);
+ kfree(ctx->reg);
out_unmap_prot_sg:
- ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
+ if (prot_sg_cnt)
+ ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
out_unmap_sg:
ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
return ret;
@@ -491,22 +491,8 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
switch (ctx->type) {
case RDMA_RW_SIG_MR:
- rdma_rw_update_lkey(&ctx->sig->data, true);
- if (ctx->sig->prot.mr)
- rdma_rw_update_lkey(&ctx->sig->prot, true);
-
- ctx->sig->sig_mr->need_inval = true;
- ib_update_fast_reg_key(ctx->sig->sig_mr,
- ib_inc_rkey(ctx->sig->sig_mr->lkey));
- ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey;
-
- if (ctx->sig->data.inv_wr.next)
- first_wr = &ctx->sig->data.inv_wr;
- else
- first_wr = &ctx->sig->data.reg_wr.wr;
- last_wr = &ctx->sig->data.wr.wr;
- break;
case RDMA_RW_MR:
+ /* fallthrough */
for (i = 0; i < ctx->nr_ops; i++) {
rdma_rw_update_lkey(&ctx->reg[i],
ctx->reg[i].wr.wr.opcode !=
@@ -605,7 +591,7 @@ EXPORT_SYMBOL(rdma_rw_ctx_destroy);
/**
* rdma_rw_ctx_destroy_signature - release all resources allocated by
- * rdma_rw_ctx_init_signature
+ * rdma_rw_ctx_signature_init
* @ctx: context to release
* @qp: queue pair to operate on
* @port_num: port num to which the connection is bound
@@ -623,16 +609,12 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
return;
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
- ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
+ kfree(ctx->reg);
- if (ctx->sig->prot.mr) {
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ if (prot_sg_cnt)
ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
- }
-
- ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr);
- kfree(ctx->sig);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
@@ -653,7 +635,7 @@ unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
unsigned int mr_pages;
if (rdma_rw_can_use_mr(device, port_num))
- mr_pages = rdma_rw_fr_page_list_len(device);
+ mr_pages = rdma_rw_fr_page_list_len(device, false);
else
mr_pages = device->attrs.max_sge_rd;
return DIV_ROUND_UP(maxpages, mr_pages);
@@ -679,9 +661,8 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
* we'll need two additional MRs for the registrations and the
* invalidation.
*/
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
- factor += 6; /* (inv + reg) * (data + prot + sig) */
- else if (rdma_rw_can_use_mr(dev, attr->port_num))
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
+ rdma_rw_can_use_mr(dev, attr->port_num))
factor += 2; /* inv + reg */
attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
@@ -697,20 +678,22 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
{
struct ib_device *dev = qp->pd->device;
- u32 nr_mrs = 0, nr_sig_mrs = 0;
+ u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0;
int ret = 0;
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) {
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) {
nr_sig_mrs = attr->cap.max_rdma_ctxs;
- nr_mrs = attr->cap.max_rdma_ctxs * 2;
+ nr_mrs = attr->cap.max_rdma_ctxs;
+ max_num_sg = rdma_rw_fr_page_list_len(dev, true);
} else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
nr_mrs = attr->cap.max_rdma_ctxs;
+ max_num_sg = rdma_rw_fr_page_list_len(dev, false);
}
if (nr_mrs) {
ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
IB_MR_TYPE_MEM_REG,
- rdma_rw_fr_page_list_len(dev));
+ max_num_sg, 0);
if (ret) {
pr_err("%s: failed to allocated %d MRs\n",
__func__, nr_mrs);
@@ -720,10 +703,10 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
if (nr_sig_mrs) {
ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
- IB_MR_TYPE_SIGNATURE, 2);
+ IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg);
if (ret) {
pr_err("%s: failed to allocated %d SIG MRs\n",
- __func__, nr_mrs);
+ __func__, nr_sig_mrs);
goto out_free_rdma_mrs;
}
}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c78d0c9646ae..b477295a96c2 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -43,6 +43,7 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
#include <rdma/ib_cache.h>
+#include <rdma/rdma_counter.h>
struct ib_port;
@@ -800,9 +801,12 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
return 0;
}
-static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf)
+static ssize_t print_hw_stat(struct ib_device *dev, int port_num,
+ struct rdma_hw_stats *stats, int index, char *buf)
{
- return sprintf(buf, "%llu\n", stats->value[index]);
+ u64 v = rdma_counter_get_hwstat_value(dev, port_num, index);
+
+ return sprintf(buf, "%llu\n", stats->value[index] + v);
}
static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
@@ -828,7 +832,7 @@ static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
if (ret)
goto unlock;
- ret = print_hw_stat(stats, hsa->index, buf);
+ ret = print_hw_stat(dev, hsa->port_num, stats, hsa->index, buf);
unlock:
mutex_unlock(&stats->lock);
@@ -999,6 +1003,8 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
goto err;
port->hw_stats_ag = hsag;
port->hw_stats = stats;
+ if (device->port_data)
+ device->port_data[port_num].hw_stats = stats;
} else {
struct kobject *kobj = &device->dev.kobj;
ret = sysfs_create_group(kobj, hsag);
@@ -1289,6 +1295,8 @@ const struct attribute_group ib_dev_attr_group = {
void ib_free_port_attrs(struct ib_core_device *coredev)
{
+ struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
+ bool is_full_dev = &device->coredev == coredev;
struct kobject *p, *t;
list_for_each_entry_safe(p, t, &coredev->port_list, entry) {
@@ -1298,6 +1306,8 @@ void ib_free_port_attrs(struct ib_core_device *coredev)
if (port->hw_stats_ag)
free_hsag(&port->kobj, port->hw_stats_ag);
kfree(port->hw_stats);
+ if (device->port_data && is_full_dev)
+ device->port_data[port->port_num].hw_stats = NULL;
if (port->pma_table)
sysfs_remove_group(p, port->pma_table);
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
deleted file mode 100644
index 8e7da2d41fd8..000000000000
--- a/drivers/infiniband/core/ucm.c
+++ /dev/null
@@ -1,1350 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/completion.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/poll.h>
-#include <linux/sched.h>
-#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/cdev.h>
-#include <linux/xarray.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-
-#include <linux/nospec.h>
-
-#include <linux/uaccess.h>
-
-#include <rdma/ib.h>
-#include <rdma/ib_cm.h>
-#include <rdma/ib_user_cm.h>
-#include <rdma/ib_marshall.h>
-
-#include "core_priv.h"
-
-MODULE_AUTHOR("Libor Michalek");
-MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
-MODULE_LICENSE("Dual BSD/GPL");
-
-struct ib_ucm_device {
- int devnum;
- struct cdev cdev;
- struct device dev;
- struct ib_device *ib_dev;
-};
-
-struct ib_ucm_file {
- struct mutex file_mutex;
- struct file *filp;
- struct ib_ucm_device *device;
-
- struct list_head ctxs;
- struct list_head events;
- wait_queue_head_t poll_wait;
-};
-
-struct ib_ucm_context {
- int id;
- struct completion comp;
- atomic_t ref;
- int events_reported;
-
- struct ib_ucm_file *file;
- struct ib_cm_id *cm_id;
- __u64 uid;
-
- struct list_head events; /* list of pending events. */
- struct list_head file_list; /* member in file ctx list */
-};
-
-struct ib_ucm_event {
- struct ib_ucm_context *ctx;
- struct list_head file_list; /* member in file event list */
- struct list_head ctx_list; /* member in ctx event list */
-
- struct ib_cm_id *cm_id;
- struct ib_ucm_event_resp resp;
- void *data;
- void *info;
- int data_len;
- int info_len;
-};
-
-enum {
- IB_UCM_MAJOR = 231,
- IB_UCM_BASE_MINOR = 224,
- IB_UCM_MAX_DEVICES = RDMA_MAX_PORTS,
- IB_UCM_NUM_FIXED_MINOR = 32,
- IB_UCM_NUM_DYNAMIC_MINOR = IB_UCM_MAX_DEVICES - IB_UCM_NUM_FIXED_MINOR,
-};
-
-#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
-static dev_t dynamic_ucm_dev;
-
-static void ib_ucm_add_one(struct ib_device *device);
-static void ib_ucm_remove_one(struct ib_device *device, void *client_data);
-
-static struct ib_client ucm_client = {
- .name = "ucm",
- .add = ib_ucm_add_one,
- .remove = ib_ucm_remove_one
-};
-
-static DEFINE_XARRAY_ALLOC(ctx_id_table);
-static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
-
-static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
-{
- struct ib_ucm_context *ctx;
-
- xa_lock(&ctx_id_table);
- ctx = xa_load(&ctx_id_table, id);
- if (!ctx)
- ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file)
- ctx = ERR_PTR(-EINVAL);
- else
- atomic_inc(&ctx->ref);
- xa_unlock(&ctx_id_table);
-
- return ctx;
-}
-
-static void ib_ucm_ctx_put(struct ib_ucm_context *ctx)
-{
- if (atomic_dec_and_test(&ctx->ref))
- complete(&ctx->comp);
-}
-
-static inline int ib_ucm_new_cm_id(int event)
-{
- return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED;
-}
-
-static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
-{
- struct ib_ucm_event *uevent;
-
- mutex_lock(&ctx->file->file_mutex);
- list_del(&ctx->file_list);
- while (!list_empty(&ctx->events)) {
-
- uevent = list_entry(ctx->events.next,
- struct ib_ucm_event, ctx_list);
- list_del(&uevent->file_list);
- list_del(&uevent->ctx_list);
- mutex_unlock(&ctx->file->file_mutex);
-
- /* clear incoming connections. */
- if (ib_ucm_new_cm_id(uevent->resp.event))
- ib_destroy_cm_id(uevent->cm_id);
-
- kfree(uevent);
- mutex_lock(&ctx->file->file_mutex);
- }
- mutex_unlock(&ctx->file->file_mutex);
-}
-
-static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
-{
- struct ib_ucm_context *ctx;
-
- ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
- if (!ctx)
- return NULL;
-
- atomic_set(&ctx->ref, 1);
- init_completion(&ctx->comp);
- ctx->file = file;
- INIT_LIST_HEAD(&ctx->events);
-
- if (xa_alloc(&ctx_id_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
- goto error;
-
- list_add_tail(&ctx->file_list, &file->ctxs);
- return ctx;
-
-error:
- kfree(ctx);
- return NULL;
-}
-
-static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
- const struct ib_cm_req_event_param *kreq)
-{
- ureq->remote_ca_guid = kreq->remote_ca_guid;
- ureq->remote_qkey = kreq->remote_qkey;
- ureq->remote_qpn = kreq->remote_qpn;
- ureq->qp_type = kreq->qp_type;
- ureq->starting_psn = kreq->starting_psn;
- ureq->responder_resources = kreq->responder_resources;
- ureq->initiator_depth = kreq->initiator_depth;
- ureq->local_cm_response_timeout = kreq->local_cm_response_timeout;
- ureq->flow_control = kreq->flow_control;
- ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout;
- ureq->retry_count = kreq->retry_count;
- ureq->rnr_retry_count = kreq->rnr_retry_count;
- ureq->srq = kreq->srq;
- ureq->port = kreq->port;
-
- ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path);
- if (kreq->alternate_path)
- ib_copy_path_rec_to_user(&ureq->alternate_path,
- kreq->alternate_path);
-}
-
-static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
- const struct ib_cm_rep_event_param *krep)
-{
- urep->remote_ca_guid = krep->remote_ca_guid;
- urep->remote_qkey = krep->remote_qkey;
- urep->remote_qpn = krep->remote_qpn;
- urep->starting_psn = krep->starting_psn;
- urep->responder_resources = krep->responder_resources;
- urep->initiator_depth = krep->initiator_depth;
- urep->target_ack_delay = krep->target_ack_delay;
- urep->failover_accepted = krep->failover_accepted;
- urep->flow_control = krep->flow_control;
- urep->rnr_retry_count = krep->rnr_retry_count;
- urep->srq = krep->srq;
-}
-
-static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep,
- const struct ib_cm_sidr_rep_event_param *krep)
-{
- urep->status = krep->status;
- urep->qkey = krep->qkey;
- urep->qpn = krep->qpn;
-};
-
-static int ib_ucm_event_process(const struct ib_cm_event *evt,
- struct ib_ucm_event *uvt)
-{
- void *info = NULL;
-
- switch (evt->event) {
- case IB_CM_REQ_RECEIVED:
- ib_ucm_event_req_get(&uvt->resp.u.req_resp,
- &evt->param.req_rcvd);
- uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE;
- uvt->resp.present = IB_UCM_PRES_PRIMARY;
- uvt->resp.present |= (evt->param.req_rcvd.alternate_path ?
- IB_UCM_PRES_ALTERNATE : 0);
- break;
- case IB_CM_REP_RECEIVED:
- ib_ucm_event_rep_get(&uvt->resp.u.rep_resp,
- &evt->param.rep_rcvd);
- uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
- break;
- case IB_CM_RTU_RECEIVED:
- uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE;
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- case IB_CM_DREQ_RECEIVED:
- uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE;
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- case IB_CM_DREP_RECEIVED:
- uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE;
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- case IB_CM_MRA_RECEIVED:
- uvt->resp.u.mra_resp.timeout =
- evt->param.mra_rcvd.service_timeout;
- uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE;
- break;
- case IB_CM_REJ_RECEIVED:
- uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason;
- uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
- uvt->info_len = evt->param.rej_rcvd.ari_length;
- info = evt->param.rej_rcvd.ari;
- break;
- case IB_CM_LAP_RECEIVED:
- ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path,
- evt->param.lap_rcvd.alternate_path);
- uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE;
- uvt->resp.present = IB_UCM_PRES_ALTERNATE;
- break;
- case IB_CM_APR_RECEIVED:
- uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status;
- uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE;
- uvt->info_len = evt->param.apr_rcvd.info_len;
- info = evt->param.apr_rcvd.apr_info;
- break;
- case IB_CM_SIDR_REQ_RECEIVED:
- uvt->resp.u.sidr_req_resp.pkey =
- evt->param.sidr_req_rcvd.pkey;
- uvt->resp.u.sidr_req_resp.port =
- evt->param.sidr_req_rcvd.port;
- uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
- break;
- case IB_CM_SIDR_REP_RECEIVED:
- ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp,
- &evt->param.sidr_rep_rcvd);
- uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
- uvt->info_len = evt->param.sidr_rep_rcvd.info_len;
- info = evt->param.sidr_rep_rcvd.info;
- break;
- default:
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- }
-
- if (uvt->data_len) {
- uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL);
- if (!uvt->data)
- goto err1;
-
- uvt->resp.present |= IB_UCM_PRES_DATA;
- }
-
- if (uvt->info_len) {
- uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL);
- if (!uvt->info)
- goto err2;
-
- uvt->resp.present |= IB_UCM_PRES_INFO;
- }
- return 0;
-
-err2:
- kfree(uvt->data);
-err1:
- return -ENOMEM;
-}
-
-static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
- const struct ib_cm_event *event)
-{
- struct ib_ucm_event *uevent;
- struct ib_ucm_context *ctx;
- int result = 0;
-
- ctx = cm_id->context;
-
- uevent = kzalloc(sizeof *uevent, GFP_KERNEL);
- if (!uevent)
- goto err1;
-
- uevent->ctx = ctx;
- uevent->cm_id = cm_id;
- uevent->resp.uid = ctx->uid;
- uevent->resp.id = ctx->id;
- uevent->resp.event = event->event;
-
- result = ib_ucm_event_process(event, uevent);
- if (result)
- goto err2;
-
- mutex_lock(&ctx->file->file_mutex);
- list_add_tail(&uevent->file_list, &ctx->file->events);
- list_add_tail(&uevent->ctx_list, &ctx->events);
- wake_up_interruptible(&ctx->file->poll_wait);
- mutex_unlock(&ctx->file->file_mutex);
- return 0;
-
-err2:
- kfree(uevent);
-err1:
- /* Destroy new cm_id's */
- return ib_ucm_new_cm_id(event->event);
-}
-
-static ssize_t ib_ucm_event(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_context *ctx;
- struct ib_ucm_event_get cmd;
- struct ib_ucm_event *uevent;
- int result = 0;
-
- if (out_len < sizeof(struct ib_ucm_event_resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- mutex_lock(&file->file_mutex);
- while (list_empty(&file->events)) {
- mutex_unlock(&file->file_mutex);
-
- if (file->filp->f_flags & O_NONBLOCK)
- return -EAGAIN;
-
- if (wait_event_interruptible(file->poll_wait,
- !list_empty(&file->events)))
- return -ERESTARTSYS;
-
- mutex_lock(&file->file_mutex);
- }
-
- uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
-
- if (ib_ucm_new_cm_id(uevent->resp.event)) {
- ctx = ib_ucm_ctx_alloc(file);
- if (!ctx) {
- result = -ENOMEM;
- goto done;
- }
-
- ctx->cm_id = uevent->cm_id;
- ctx->cm_id->context = ctx;
- uevent->resp.id = ctx->id;
- }
-
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &uevent->resp, sizeof(uevent->resp))) {
- result = -EFAULT;
- goto done;
- }
-
- if (uevent->data) {
- if (cmd.data_len < uevent->data_len) {
- result = -ENOMEM;
- goto done;
- }
- if (copy_to_user(u64_to_user_ptr(cmd.data),
- uevent->data, uevent->data_len)) {
- result = -EFAULT;
- goto done;
- }
- }
-
- if (uevent->info) {
- if (cmd.info_len < uevent->info_len) {
- result = -ENOMEM;
- goto done;
- }
- if (copy_to_user(u64_to_user_ptr(cmd.info),
- uevent->info, uevent->info_len)) {
- result = -EFAULT;
- goto done;
- }
- }
-
- list_del(&uevent->file_list);
- list_del(&uevent->ctx_list);
- uevent->ctx->events_reported++;
-
- kfree(uevent->data);
- kfree(uevent->info);
- kfree(uevent);
-done:
- mutex_unlock(&file->file_mutex);
- return result;
-}
-
-static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_create_id cmd;
- struct ib_ucm_create_id_resp resp;
- struct ib_ucm_context *ctx;
- int result;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- mutex_lock(&file->file_mutex);
- ctx = ib_ucm_ctx_alloc(file);
- mutex_unlock(&file->file_mutex);
- if (!ctx)
- return -ENOMEM;
-
- ctx->uid = cmd.uid;
- ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
- ib_ucm_event_handler, ctx);
- if (IS_ERR(ctx->cm_id)) {
- result = PTR_ERR(ctx->cm_id);
- goto err1;
- }
-
- resp.id = ctx->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp))) {
- result = -EFAULT;
- goto err2;
- }
- return 0;
-
-err2:
- ib_destroy_cm_id(ctx->cm_id);
-err1:
- xa_erase(&ctx_id_table, ctx->id);
- kfree(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_destroy_id cmd;
- struct ib_ucm_destroy_id_resp resp;
- struct ib_ucm_context *ctx;
- int result = 0;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- xa_lock(&ctx_id_table);
- ctx = xa_load(&ctx_id_table, cmd.id);
- if (!ctx)
- ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file)
- ctx = ERR_PTR(-EINVAL);
- else
- __xa_erase(&ctx_id_table, ctx->id);
- xa_unlock(&ctx_id_table);
-
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- ib_ucm_ctx_put(ctx);
- wait_for_completion(&ctx->comp);
-
- /* No new events will be generated after destroying the cm_id. */
- ib_destroy_cm_id(ctx->cm_id);
- /* Cleanup events not yet reported to the user. */
- ib_ucm_cleanup_events(ctx);
-
- resp.events_reported = ctx->events_reported;
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
- result = -EFAULT;
-
- kfree(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_attr_id_resp resp;
- struct ib_ucm_attr_id cmd;
- struct ib_ucm_context *ctx;
- int result = 0;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- resp.service_id = ctx->cm_id->service_id;
- resp.service_mask = ctx->cm_id->service_mask;
- resp.local_id = ctx->cm_id->local_id;
- resp.remote_id = ctx->cm_id->remote_id;
-
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
- result = -EFAULT;
-
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_uverbs_qp_attr resp;
- struct ib_ucm_init_qp_attr cmd;
- struct ib_ucm_context *ctx;
- struct ib_qp_attr qp_attr;
- int result = 0;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- resp.qp_attr_mask = 0;
- memset(&qp_attr, 0, sizeof qp_attr);
- qp_attr.qp_state = cmd.qp_state;
- result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
- if (result)
- goto out;
-
- ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
-
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
- result = -EFAULT;
-
-out:
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static int ucm_validate_listen(__be64 service_id, __be64 service_mask)
-{
- service_id &= service_mask;
-
- if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) ||
- ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID))
- return -EINVAL;
-
- return 0;
-}
-
-static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_listen cmd;
- struct ib_ucm_context *ctx;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- result = ucm_validate_listen(cmd.service_id, cmd.service_mask);
- if (result)
- goto out;
-
- result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask);
-out:
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_notify(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_notify cmd;
- struct ib_ucm_context *ctx;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
-{
- void *data;
-
- *dest = NULL;
-
- if (!len)
- return 0;
-
- data = memdup_user(u64_to_user_ptr(src), len);
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- *dest = data;
- return 0;
-}
-
-static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
-{
- struct ib_user_path_rec upath;
- struct sa_path_rec *sa_path;
-
- *path = NULL;
-
- if (!src)
- return 0;
-
- sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL);
- if (!sa_path)
- return -ENOMEM;
-
- if (copy_from_user(&upath, u64_to_user_ptr(src),
- sizeof(upath))) {
-
- kfree(sa_path);
- return -EFAULT;
- }
-
- ib_copy_path_rec_from_user(sa_path, &upath);
- *path = sa_path;
- return 0;
-}
-
-static ssize_t ib_ucm_send_req(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_req_param param;
- struct ib_ucm_context *ctx;
- struct ib_ucm_req cmd;
- int result;
-
- param.private_data = NULL;
- param.primary_path = NULL;
- param.alternate_path = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&param.primary_path, cmd.primary_path);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&param.alternate_path, cmd.alternate_path);
- if (result)
- goto done;
-
- param.private_data_len = cmd.len;
- param.service_id = cmd.sid;
- param.qp_num = cmd.qpn;
- param.qp_type = cmd.qp_type;
- param.starting_psn = cmd.psn;
- param.peer_to_peer = cmd.peer_to_peer;
- param.responder_resources = cmd.responder_resources;
- param.initiator_depth = cmd.initiator_depth;
- param.remote_cm_response_timeout = cmd.remote_cm_response_timeout;
- param.flow_control = cmd.flow_control;
- param.local_cm_response_timeout = cmd.local_cm_response_timeout;
- param.retry_count = cmd.retry_count;
- param.rnr_retry_count = cmd.rnr_retry_count;
- param.max_cm_retries = cmd.max_cm_retries;
- param.srq = cmd.srq;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_req(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(param.private_data);
- kfree(param.primary_path);
- kfree(param.alternate_path);
- return result;
-}
-
-static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_rep_param param;
- struct ib_ucm_context *ctx;
- struct ib_ucm_rep cmd;
- int result;
-
- param.private_data = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
- if (result)
- return result;
-
- param.qp_num = cmd.qpn;
- param.starting_psn = cmd.psn;
- param.private_data_len = cmd.len;
- param.responder_resources = cmd.responder_resources;
- param.initiator_depth = cmd.initiator_depth;
- param.failover_accepted = cmd.failover_accepted;
- param.flow_control = cmd.flow_control;
- param.rnr_retry_count = cmd.rnr_retry_count;
- param.srq = cmd.srq;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- ctx->uid = cmd.uid;
- result = ib_send_cm_rep(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
- kfree(param.private_data);
- return result;
-}
-
-static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file,
- const char __user *inbuf, int in_len,
- int (*func)(struct ib_cm_id *cm_id,
- const void *private_data,
- u8 private_data_len))
-{
- struct ib_ucm_private_data cmd;
- struct ib_ucm_context *ctx;
- const void *private_data = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len);
- if (result)
- return result;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = func(ctx->cm_id, private_data, cmd.len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
- kfree(private_data);
- return result;
-}
-
-static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu);
-}
-
-static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq);
-}
-
-static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep);
-}
-
-static ssize_t ib_ucm_send_info(struct ib_ucm_file *file,
- const char __user *inbuf, int in_len,
- int (*func)(struct ib_cm_id *cm_id,
- int status,
- const void *info,
- u8 info_len,
- const void *data,
- u8 data_len))
-{
- struct ib_ucm_context *ctx;
- struct ib_ucm_info cmd;
- const void *data = NULL;
- const void *info = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len);
- if (result)
- goto done;
-
- result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len);
- if (result)
- goto done;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = func(ctx->cm_id, cmd.status, info, cmd.info_len,
- data, cmd.data_len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(data);
- kfree(info);
- return result;
-}
-
-static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej);
-}
-
-static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr);
-}
-
-static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_context *ctx;
- struct ib_ucm_mra cmd;
- const void *data = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
- if (result)
- return result;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
- kfree(data);
- return result;
-}
-
-static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_context *ctx;
- struct sa_path_rec *path = NULL;
- struct ib_ucm_lap cmd;
- const void *data = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&path, cmd.path);
- if (result)
- goto done;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(data);
- kfree(path);
- return result;
-}
-
-static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_sidr_req_param param = {};
- struct ib_ucm_context *ctx;
- struct ib_ucm_sidr_req cmd;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&param.path, cmd.path);
- if (result)
- goto done;
-
- param.private_data_len = cmd.len;
- param.service_id = cmd.sid;
- param.timeout_ms = cmd.timeout;
- param.max_cm_retries = cmd.max_cm_retries;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_sidr_req(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(param.private_data);
- kfree(param.path);
- return result;
-}
-
-static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_sidr_rep_param param;
- struct ib_ucm_sidr_rep cmd;
- struct ib_ucm_context *ctx;
- int result;
-
- param.info = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data,
- cmd.data, cmd.data_len);
- if (result)
- goto done;
-
- result = ib_ucm_alloc_data(&param.info, cmd.info, cmd.info_len);
- if (result)
- goto done;
-
- param.qp_num = cmd.qpn;
- param.qkey = cmd.qkey;
- param.status = cmd.status;
- param.info_length = cmd.info_len;
- param.private_data_len = cmd.data_len;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_sidr_rep(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(param.private_data);
- kfree(param.info);
- return result;
-}
-
-static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len) = {
- [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id,
- [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id,
- [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id,
- [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen,
- [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify,
- [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req,
- [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep,
- [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu,
- [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq,
- [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep,
- [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej,
- [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra,
- [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap,
- [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr,
- [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req,
- [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep,
- [IB_USER_CM_CMD_EVENT] = ib_ucm_event,
- [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr,
-};
-
-static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
- size_t len, loff_t *pos)
-{
- struct ib_ucm_file *file = filp->private_data;
- struct ib_ucm_cmd_hdr hdr;
- ssize_t result;
-
- if (!ib_safe_file_access(filp)) {
- pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
- task_tgid_vnr(current), current->comm);
- return -EACCES;
- }
-
- if (len < sizeof(hdr))
- return -EINVAL;
-
- if (copy_from_user(&hdr, buf, sizeof(hdr)))
- return -EFAULT;
-
- if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
- return -EINVAL;
- hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table));
-
- if (hdr.in + sizeof(hdr) > len)
- return -EINVAL;
-
- result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr),
- hdr.in, hdr.out);
- if (!result)
- result = len;
-
- return result;
-}
-
-static __poll_t ib_ucm_poll(struct file *filp,
- struct poll_table_struct *wait)
-{
- struct ib_ucm_file *file = filp->private_data;
- __poll_t mask = 0;
-
- poll_wait(filp, &file->poll_wait, wait);
-
- if (!list_empty(&file->events))
- mask = EPOLLIN | EPOLLRDNORM;
-
- return mask;
-}
-
-/*
- * ib_ucm_open() does not need the BKL:
- *
- * - no global state is referred to;
- * - there is no ioctl method to race against;
- * - no further module initialization is required for open to work
- * after the device is registered.
- */
-static int ib_ucm_open(struct inode *inode, struct file *filp)
-{
- struct ib_ucm_file *file;
-
- file = kmalloc(sizeof(*file), GFP_KERNEL);
- if (!file)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&file->events);
- INIT_LIST_HEAD(&file->ctxs);
- init_waitqueue_head(&file->poll_wait);
-
- mutex_init(&file->file_mutex);
-
- filp->private_data = file;
- file->filp = filp;
- file->device = container_of(inode->i_cdev, struct ib_ucm_device, cdev);
-
- return stream_open(inode, filp);
-}
-
-static int ib_ucm_close(struct inode *inode, struct file *filp)
-{
- struct ib_ucm_file *file = filp->private_data;
- struct ib_ucm_context *ctx;
-
- mutex_lock(&file->file_mutex);
- while (!list_empty(&file->ctxs)) {
- ctx = list_entry(file->ctxs.next,
- struct ib_ucm_context, file_list);
- mutex_unlock(&file->file_mutex);
-
- xa_erase(&ctx_id_table, ctx->id);
- ib_destroy_cm_id(ctx->cm_id);
- ib_ucm_cleanup_events(ctx);
- kfree(ctx);
-
- mutex_lock(&file->file_mutex);
- }
- mutex_unlock(&file->file_mutex);
- kfree(file);
- return 0;
-}
-
-static void ib_ucm_release_dev(struct device *dev)
-{
- struct ib_ucm_device *ucm_dev;
-
- ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- kfree(ucm_dev);
-}
-
-static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
-{
- clear_bit(ucm_dev->devnum, dev_map);
-}
-
-static const struct file_operations ucm_fops = {
- .owner = THIS_MODULE,
- .open = ib_ucm_open,
- .release = ib_ucm_close,
- .write = ib_ucm_write,
- .poll = ib_ucm_poll,
- .llseek = no_llseek,
-};
-
-static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct ib_ucm_device *ucm_dev;
-
- ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- return sprintf(buf, "%s\n", ucm_dev->ib_dev->name);
-}
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-
-static void ib_ucm_add_one(struct ib_device *device)
-{
- int devnum;
- dev_t base;
- struct ib_ucm_device *ucm_dev;
-
- if (!device->ops.alloc_ucontext || !rdma_cap_ib_cm(device, 1))
- return;
-
- ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
- if (!ucm_dev)
- return;
-
- device_initialize(&ucm_dev->dev);
- ucm_dev->ib_dev = device;
- ucm_dev->dev.release = ib_ucm_release_dev;
-
- devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
- if (devnum >= IB_UCM_MAX_DEVICES)
- goto err;
- ucm_dev->devnum = devnum;
- set_bit(devnum, dev_map);
- if (devnum >= IB_UCM_NUM_FIXED_MINOR)
- base = dynamic_ucm_dev + devnum - IB_UCM_NUM_FIXED_MINOR;
- else
- base = IB_UCM_BASE_DEV + devnum;
-
- cdev_init(&ucm_dev->cdev, &ucm_fops);
- ucm_dev->cdev.owner = THIS_MODULE;
- kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
-
- ucm_dev->dev.class = &cm_class;
- ucm_dev->dev.parent = device->dev.parent;
- ucm_dev->dev.devt = base;
-
- dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
- if (cdev_device_add(&ucm_dev->cdev, &ucm_dev->dev))
- goto err_devnum;
-
- if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev))
- goto err_dev;
-
- ib_set_client_data(device, &ucm_client, ucm_dev);
- return;
-
-err_dev:
- cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
-err_devnum:
- ib_ucm_free_dev(ucm_dev);
-err:
- put_device(&ucm_dev->dev);
- return;
-}
-
-static void ib_ucm_remove_one(struct ib_device *device, void *client_data)
-{
- struct ib_ucm_device *ucm_dev = client_data;
-
- if (!ucm_dev)
- return;
-
- cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
- ib_ucm_free_dev(ucm_dev);
- put_device(&ucm_dev->dev);
-}
-
-static CLASS_ATTR_STRING(abi_version, S_IRUGO,
- __stringify(IB_USER_CM_ABI_VERSION));
-
-static int __init ib_ucm_init(void)
-{
- int ret;
-
- ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR,
- "infiniband_cm");
- if (ret) {
- pr_err("ucm: couldn't register device number\n");
- goto error1;
- }
-
- ret = alloc_chrdev_region(&dynamic_ucm_dev, 0, IB_UCM_NUM_DYNAMIC_MINOR,
- "infiniband_cm");
- if (ret) {
- pr_err("ucm: couldn't register dynamic device number\n");
- goto err_alloc;
- }
-
- ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
- if (ret) {
- pr_err("ucm: couldn't create abi_version attribute\n");
- goto error2;
- }
-
- ret = ib_register_client(&ucm_client);
- if (ret) {
- pr_err("ucm: couldn't register client\n");
- goto error3;
- }
- return 0;
-
-error3:
- class_remove_file(&cm_class, &class_attr_abi_version.attr);
-error2:
- unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
-err_alloc:
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
-error1:
- return ret;
-}
-
-static void __exit ib_ucm_cleanup(void)
-{
- ib_unregister_client(&ucm_client);
- class_remove_file(&cm_class, &class_attr_abi_version.attr);
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
- unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
- WARN_ON(!xa_empty(&ctx_id_table));
-}
-
-module_init(ib_ucm_init);
-module_exit(ib_ucm_cleanup);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 140a338a135f..0274e9b704be 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -52,6 +52,8 @@
#include <rdma/rdma_cm_ib.h>
#include <rdma/ib_addr.h>
#include <rdma/ib.h>
+#include <rdma/rdma_netlink.h>
+#include "core_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -81,7 +83,7 @@ struct ucma_file {
};
struct ucma_context {
- int id;
+ u32 id;
struct completion comp;
atomic_t ref;
int events_reported;
@@ -94,7 +96,7 @@ struct ucma_context {
struct list_head list;
struct list_head mc_list;
/* mark that device is in process of destroying the internal HW
- * resources, protected by the global mut
+ * resources, protected by the ctx_table lock
*/
int closing;
/* sync between removal event and id destroy, protected by file mut */
@@ -104,7 +106,7 @@ struct ucma_context {
struct ucma_multicast {
struct ucma_context *ctx;
- int id;
+ u32 id;
int events_reported;
u64 uid;
@@ -122,9 +124,8 @@ struct ucma_event {
struct work_struct close_work;
};
-static DEFINE_MUTEX(mut);
-static DEFINE_IDR(ctx_idr);
-static DEFINE_IDR(multicast_idr);
+static DEFINE_XARRAY_ALLOC(ctx_table);
+static DEFINE_XARRAY_ALLOC(multicast_table);
static const struct file_operations ucma_fops;
@@ -133,7 +134,7 @@ static inline struct ucma_context *_ucma_find_context(int id,
{
struct ucma_context *ctx;
- ctx = idr_find(&ctx_idr, id);
+ ctx = xa_load(&ctx_table, id);
if (!ctx)
ctx = ERR_PTR(-ENOENT);
else if (ctx->file != file || !ctx->cm_id)
@@ -145,7 +146,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
{
struct ucma_context *ctx;
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
ctx = _ucma_find_context(id, file);
if (!IS_ERR(ctx)) {
if (ctx->closing)
@@ -153,7 +154,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
else
atomic_inc(&ctx->ref);
}
- mutex_unlock(&mut);
+ xa_unlock(&ctx_table);
return ctx;
}
@@ -216,10 +217,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
INIT_LIST_HEAD(&ctx->mc_list);
ctx->file = file;
- mutex_lock(&mut);
- ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
- mutex_unlock(&mut);
- if (ctx->id < 0)
+ if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
goto error;
list_add_tail(&ctx->list, &file->ctx_list);
@@ -238,13 +236,10 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
if (!mc)
return NULL;
- mutex_lock(&mut);
- mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL);
- mutex_unlock(&mut);
- if (mc->id < 0)
+ mc->ctx = ctx;
+ if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL))
goto error;
- mc->ctx = ctx;
list_add_tail(&mc->list, &ctx->mc_list);
return mc;
@@ -319,9 +314,9 @@ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
* handled separately below.
*/
if (ctx->cm_id == cm_id) {
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
ctx->closing = 1;
- mutex_unlock(&mut);
+ xa_unlock(&ctx_table);
queue_work(ctx->file->close_wq, &ctx->close_work);
return;
}
@@ -523,9 +518,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
err2:
rdma_destroy_id(cm_id);
err1:
- mutex_lock(&mut);
- idr_remove(&ctx_idr, ctx->id);
- mutex_unlock(&mut);
+ xa_erase(&ctx_table, ctx->id);
mutex_lock(&file->mut);
list_del(&ctx->list);
mutex_unlock(&file->mut);
@@ -537,13 +530,13 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx)
{
struct ucma_multicast *mc, *tmp;
- mutex_lock(&mut);
+ mutex_lock(&ctx->file->mut);
list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
list_del(&mc->list);
- idr_remove(&multicast_idr, mc->id);
+ xa_erase(&multicast_table, mc->id);
kfree(mc);
}
- mutex_unlock(&mut);
+ mutex_unlock(&ctx->file->mut);
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
@@ -614,11 +607,11 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
ctx = _ucma_find_context(cmd.id, file);
if (!IS_ERR(ctx))
- idr_remove(&ctx_idr, ctx->id);
- mutex_unlock(&mut);
+ __xa_erase(&ctx_table, ctx->id);
+ xa_unlock(&ctx_table);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -630,14 +623,14 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
flush_workqueue(ctx->file->close_wq);
/* At this point it's guaranteed that there is no inflight
* closing task */
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
if (!ctx->closing) {
- mutex_unlock(&mut);
+ xa_unlock(&ctx_table);
ucma_put_ctx(ctx);
wait_for_completion(&ctx->comp);
rdma_destroy_id(ctx->cm_id);
} else {
- mutex_unlock(&mut);
+ xa_unlock(&ctx_table);
}
resp.events_reported = ucma_free_ctx(ctx);
@@ -951,8 +944,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
}
}
- if (copy_to_user(response, resp,
- sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
+ if (copy_to_user(response, resp, struct_size(resp, path_data, i)))
ret = -EFAULT;
kfree(resp);
@@ -1432,9 +1424,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
goto err3;
}
- mutex_lock(&mut);
- idr_replace(&multicast_idr, mc, mc->id);
- mutex_unlock(&mut);
+ xa_store(&multicast_table, mc->id, mc, 0);
mutex_unlock(&file->mut);
ucma_put_ctx(ctx);
@@ -1444,9 +1434,7 @@ err3:
rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
ucma_cleanup_mc_events(mc);
err2:
- mutex_lock(&mut);
- idr_remove(&multicast_idr, mc->id);
- mutex_unlock(&mut);
+ xa_erase(&multicast_table, mc->id);
list_del(&mc->list);
kfree(mc);
err1:
@@ -1508,8 +1496,8 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- mutex_lock(&mut);
- mc = idr_find(&multicast_idr, cmd.id);
+ xa_lock(&multicast_table);
+ mc = xa_load(&multicast_table, cmd.id);
if (!mc)
mc = ERR_PTR(-ENOENT);
else if (mc->ctx->file != file)
@@ -1517,8 +1505,8 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
else if (!atomic_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
else
- idr_remove(&multicast_idr, mc->id);
- mutex_unlock(&mut);
+ __xa_erase(&multicast_table, mc->id);
+ xa_unlock(&multicast_table);
if (IS_ERR(mc)) {
ret = PTR_ERR(mc);
@@ -1615,14 +1603,14 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
* events being added before existing events.
*/
ucma_lock_files(cur_file, new_file);
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
list_move_tail(&ctx->list, &new_file->ctx_list);
ucma_move_events(ctx, new_file);
ctx->file = new_file;
resp.events_reported = ctx->events_reported;
- mutex_unlock(&mut);
+ xa_unlock(&ctx_table);
ucma_unlock_files(cur_file, new_file);
response:
@@ -1757,18 +1745,15 @@ static int ucma_close(struct inode *inode, struct file *filp)
ctx->destroying = 1;
mutex_unlock(&file->mut);
- mutex_lock(&mut);
- idr_remove(&ctx_idr, ctx->id);
- mutex_unlock(&mut);
-
+ xa_erase(&ctx_table, ctx->id);
flush_workqueue(file->close_wq);
/* At that step once ctx was marked as destroying and workqueue
* was flushed we are safe from any inflights handlers that
* might put other closing task.
*/
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
if (!ctx->closing) {
- mutex_unlock(&mut);
+ xa_unlock(&ctx_table);
ucma_put_ctx(ctx);
wait_for_completion(&ctx->comp);
/* rdma_destroy_id ensures that no event handlers are
@@ -1776,7 +1761,7 @@ static int ucma_close(struct inode *inode, struct file *filp)
*/
rdma_destroy_id(ctx->cm_id);
} else {
- mutex_unlock(&mut);
+ xa_unlock(&ctx_table);
}
ucma_free_ctx(ctx);
@@ -1805,6 +1790,19 @@ static struct miscdevice ucma_misc = {
.fops = &ucma_fops,
};
+static int ucma_get_global_nl_info(struct ib_client_nl_info *res)
+{
+ res->abi = RDMA_USER_CM_ABI_VERSION;
+ res->cdev = ucma_misc.this_device;
+ return 0;
+}
+
+static struct ib_client rdma_cma_client = {
+ .name = "rdma_cm",
+ .get_global_nl_info = ucma_get_global_nl_info,
+};
+MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
+
static ssize_t show_abi_version(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1833,7 +1831,14 @@ static int __init ucma_init(void)
ret = -ENOMEM;
goto err2;
}
+
+ ret = ib_register_client(&rdma_cma_client);
+ if (ret)
+ goto err3;
+
return 0;
+err3:
+ unregister_net_sysctl_table(ucma_ctl_table_hdr);
err2:
device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
err1:
@@ -1843,11 +1848,10 @@ err1:
static void __exit ucma_cleanup(void)
{
+ ib_unregister_client(&rdma_cma_client);
unregister_net_sysctl_table(ucma_ctl_table_hdr);
device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
misc_deregister(&ucma_misc);
- idr_destroy(&ctx_idr);
- idr_destroy(&multicast_idr);
}
module_init(ucma_init);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index e7ea819fcb11..08da840ed7ee 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -54,9 +54,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
page = sg_page_iter_page(&sg_iter);
- if (!PageDirty(page) && umem->writable && dirty)
- set_page_dirty_lock(page);
- put_page(page);
+ if (umem->writable && dirty)
+ put_user_pages_dirty_lock(&page, 1);
+ else
+ put_user_page(page);
}
sg_free_table(&umem->sg_head);
@@ -244,7 +245,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
umem->context = context;
umem->length = size;
umem->address = addr;
- umem->page_shift = PAGE_SHIFT;
umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
@@ -361,6 +361,9 @@ static void __ib_umem_release_tail(struct ib_umem *umem)
*/
void ib_umem_release(struct ib_umem *umem)
{
+ if (!umem)
+ return;
+
if (umem->is_odp) {
ib_umem_odp_release(to_ib_umem_odp(umem));
__ib_umem_release_tail(umem);
@@ -385,7 +388,7 @@ int ib_umem_page_count(struct ib_umem *umem)
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> umem->page_shift;
+ n += sg_dma_len(sg) >> PAGE_SHIFT;
return n;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index f962b5bbfa40..2a75c6f8d827 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -59,7 +59,7 @@ static u64 node_start(struct umem_odp_node *n)
struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree);
- return ib_umem_start(&umem_odp->umem);
+ return ib_umem_start(umem_odp);
}
/* Note that the representation of the intervals in the interval tree
@@ -72,7 +72,7 @@ static u64 node_last(struct umem_odp_node *n)
struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree);
- return ib_umem_end(&umem_odp->umem) - 1;
+ return ib_umem_end(umem_odp) - 1;
}
INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
@@ -107,8 +107,6 @@ static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
u64 start, u64 end, void *cookie)
{
- struct ib_umem *umem = &umem_odp->umem;
-
/*
* Increase the number of notifiers running, to
* prevent any further fault handling on this MR.
@@ -119,8 +117,8 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
* all pending page faults. */
smp_wmb();
complete_all(&umem_odp->notifier_completion);
- umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
+ umem_odp->umem.context->invalidate_range(
+ umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp));
return 0;
}
@@ -151,6 +149,7 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
{
struct ib_ucontext_per_mm *per_mm =
container_of(mn, struct ib_ucontext_per_mm, mn);
+ int rc;
if (mmu_notifier_range_blockable(range))
down_read(&per_mm->umem_rwsem);
@@ -167,11 +166,14 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
return 0;
}
- return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
- range->end,
- invalidate_range_start_trampoline,
- mmu_notifier_range_blockable(range),
- NULL);
+ rc = rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
+ range->end,
+ invalidate_range_start_trampoline,
+ mmu_notifier_range_blockable(range),
+ NULL);
+ if (rc)
+ up_read(&per_mm->umem_rwsem);
+ return rc;
}
static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
@@ -205,10 +207,9 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
- struct ib_umem *umem = &umem_odp->umem;
down_write(&per_mm->umem_rwsem);
- if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+ if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
rbt_ib_umem_insert(&umem_odp->interval_tree,
&per_mm->umem_tree);
up_write(&per_mm->umem_rwsem);
@@ -217,10 +218,9 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
- struct ib_umem *umem = &umem_odp->umem;
down_write(&per_mm->umem_rwsem);
- if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+ if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
rbt_ib_umem_remove(&umem_odp->interval_tree,
&per_mm->umem_tree);
complete_all(&umem_odp->notifier_completion);
@@ -351,7 +351,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
umem->context = ctx;
umem->length = size;
umem->address = addr;
- umem->page_shift = PAGE_SHIFT;
+ odp_data->page_shift = PAGE_SHIFT;
umem->writable = root->umem.writable;
umem->is_odp = 1;
odp_data->per_mm = per_mm;
@@ -405,18 +405,19 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
struct mm_struct *mm = umem->owning_mm;
int ret_val;
+ umem_odp->page_shift = PAGE_SHIFT;
if (access & IB_ACCESS_HUGETLB) {
struct vm_area_struct *vma;
struct hstate *h;
down_read(&mm->mmap_sem);
- vma = find_vma(mm, ib_umem_start(umem));
+ vma = find_vma(mm, ib_umem_start(umem_odp));
if (!vma || !is_vm_hugetlb_page(vma)) {
up_read(&mm->mmap_sem);
return -EINVAL;
}
h = hstate_vma(vma);
- umem->page_shift = huge_page_shift(h);
+ umem_odp->page_shift = huge_page_shift(h);
up_read(&mm->mmap_sem);
}
@@ -424,16 +425,16 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
init_completion(&umem_odp->notifier_completion);
- if (ib_umem_num_pages(umem)) {
+ if (ib_umem_odp_num_pages(umem_odp)) {
umem_odp->page_list =
vzalloc(array_size(sizeof(*umem_odp->page_list),
- ib_umem_num_pages(umem)));
+ ib_umem_odp_num_pages(umem_odp)));
if (!umem_odp->page_list)
return -ENOMEM;
umem_odp->dma_list =
vzalloc(array_size(sizeof(*umem_odp->dma_list),
- ib_umem_num_pages(umem)));
+ ib_umem_odp_num_pages(umem_odp)));
if (!umem_odp->dma_list) {
ret_val = -ENOMEM;
goto out_page_list;
@@ -456,16 +457,14 @@ out_page_list:
void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{
- struct ib_umem *umem = &umem_odp->umem;
-
/*
* Ensure that no more pages are mapped in the umem.
*
* It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more.
*/
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
remove_umem_from_per_mm(umem_odp);
put_per_mm(umem_odp);
@@ -487,7 +486,7 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
* The function returns -EFAULT if the DMA mapping operation fails. It returns
* -EAGAIN if a concurrent invalidation prevents us from updating the page.
*
- * The page is released via put_page even if the operation failed. For
+ * The page is released via put_user_page even if the operation failed. For
* on-demand pinning, the page is released whenever it isn't stored in the
* umem.
*/
@@ -498,8 +497,8 @@ static int ib_umem_odp_map_dma_single_page(
u64 access_mask,
unsigned long current_seq)
{
- struct ib_umem *umem = &umem_odp->umem;
- struct ib_device *dev = umem->context->device;
+ struct ib_ucontext *context = umem_odp->umem.context;
+ struct ib_device *dev = context->device;
dma_addr_t dma_addr;
int remove_existing_mapping = 0;
int ret = 0;
@@ -514,10 +513,9 @@ static int ib_umem_odp_map_dma_single_page(
goto out;
}
if (!(umem_odp->dma_list[page_index])) {
- dma_addr = ib_dma_map_page(dev,
- page,
- 0, BIT(umem->page_shift),
- DMA_BIDIRECTIONAL);
+ dma_addr =
+ ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift),
+ DMA_BIDIRECTIONAL);
if (ib_dma_mapping_error(dev, dma_addr)) {
ret = -EFAULT;
goto out;
@@ -536,15 +534,16 @@ static int ib_umem_odp_map_dma_single_page(
}
out:
- put_page(page);
+ put_user_page(page);
if (remove_existing_mapping) {
ib_umem_notifier_start_account(umem_odp);
- umem->context->invalidate_range(
+ context->invalidate_range(
umem_odp,
- ib_umem_start(umem) + (page_index << umem->page_shift),
- ib_umem_start(umem) +
- ((page_index + 1) << umem->page_shift));
+ ib_umem_start(umem_odp) +
+ (page_index << umem_odp->page_shift),
+ ib_umem_start(umem_odp) +
+ ((page_index + 1) << umem_odp->page_shift));
ib_umem_notifier_end_account(umem_odp);
ret = -EAGAIN;
}
@@ -581,27 +580,26 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
u64 bcnt, u64 access_mask,
unsigned long current_seq)
{
- struct ib_umem *umem = &umem_odp->umem;
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
struct page **local_page_list = NULL;
u64 page_mask, off;
- int j, k, ret = 0, start_idx, npages = 0, page_shift;
- unsigned int flags = 0;
+ int j, k, ret = 0, start_idx, npages = 0;
+ unsigned int flags = 0, page_shift;
phys_addr_t p = 0;
if (access_mask == 0)
return -EINVAL;
- if (user_virt < ib_umem_start(umem) ||
- user_virt + bcnt > ib_umem_end(umem))
+ if (user_virt < ib_umem_start(umem_odp) ||
+ user_virt + bcnt > ib_umem_end(umem_odp))
return -EFAULT;
local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
if (!local_page_list)
return -ENOMEM;
- page_shift = umem->page_shift;
+ page_shift = umem_odp->page_shift;
page_mask = ~(BIT(page_shift) - 1);
off = user_virt & (~page_mask);
user_virt = user_virt & page_mask;
@@ -621,7 +619,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
if (access_mask & ODP_WRITE_ALLOWED_BIT)
flags |= FOLL_WRITE;
- start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
+ start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift;
k = start_idx;
while (bcnt > 0) {
@@ -659,7 +657,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
ret = -EFAULT;
break;
}
- put_page(local_page_list[j]);
+ put_user_page(local_page_list[j]);
continue;
}
@@ -686,8 +684,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
* ib_umem_odp_map_dma_single_page().
*/
if (npages - (j + 1) > 0)
- release_pages(&local_page_list[j+1],
- npages - (j + 1));
+ put_user_pages(&local_page_list[j+1],
+ npages - (j + 1));
break;
}
}
@@ -711,21 +709,20 @@ EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
- struct ib_umem *umem = &umem_odp->umem;
int idx;
u64 addr;
- struct ib_device *dev = umem->context->device;
+ struct ib_device *dev = umem_odp->umem.context->device;
- virt = max_t(u64, virt, ib_umem_start(umem));
- bound = min_t(u64, bound, ib_umem_end(umem));
+ virt = max_t(u64, virt, ib_umem_start(umem_odp));
+ bound = min_t(u64, bound, ib_umem_end(umem_odp));
/* Note that during the run of this function, the
* notifiers_count of the MR is > 0, preventing any racing
* faults from completion. We might be racing with other
* invalidations, so we must make sure we free each page only
* once. */
mutex_lock(&umem_odp->umem_mutex);
- for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
- idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
+ for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
+ idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
if (umem_odp->page_list[idx]) {
struct page *page = umem_odp->page_list[idx];
dma_addr_t dma = umem_odp->dma_list[idx];
@@ -733,7 +730,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
WARN_ON(!dma_addr);
- ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+ ib_dma_unmap_page(dev, dma_addr,
+ BIT(umem_odp->page_shift),
DMA_BIDIRECTIONAL);
if (dma & ODP_WRITE_ALLOWED_BIT) {
struct page *head_page = compound_head(page);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 671f07ba1fad..9f8a48016b41 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -54,6 +54,7 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_mad.h>
+#include <rdma/rdma_netlink.h>
#include "core_priv.h"
@@ -744,7 +745,7 @@ found:
"process %s did not enable P_Key index support.\n",
current->comm);
dev_warn(&file->port->dev,
- " Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
+ " Documentation/infiniband/user_mad.rst has info on the new ABI.\n");
}
}
@@ -1124,11 +1125,48 @@ static const struct file_operations umad_sm_fops = {
.llseek = no_llseek,
};
+static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res)
+{
+ struct ib_umad_device *umad_dev = client_data;
+
+ if (!rdma_is_port_valid(ibdev, res->port))
+ return -EINVAL;
+
+ res->abi = IB_USER_MAD_ABI_VERSION;
+ res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].dev;
+
+ return 0;
+}
+
static struct ib_client umad_client = {
.name = "umad",
.add = ib_umad_add_one,
- .remove = ib_umad_remove_one
+ .remove = ib_umad_remove_one,
+ .get_nl_info = ib_umad_get_nl_info,
};
+MODULE_ALIAS_RDMA_CLIENT("umad");
+
+static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res)
+{
+ struct ib_umad_device *umad_dev =
+ ib_get_client_data(ibdev, &umad_client);
+
+ if (!rdma_is_port_valid(ibdev, res->port))
+ return -EINVAL;
+
+ res->abi = IB_USER_MAD_ABI_VERSION;
+ res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].sm_dev;
+
+ return 0;
+}
+
+static struct ib_client issm_client = {
+ .name = "issm",
+ .get_nl_info = ib_issm_get_nl_info,
+};
+MODULE_ALIAS_RDMA_CLIENT("issm");
static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -1387,13 +1425,17 @@ static int __init ib_umad_init(void)
}
ret = ib_register_client(&umad_client);
- if (ret) {
- pr_err("couldn't register ib_umad client\n");
+ if (ret)
goto out_class;
- }
+
+ ret = ib_register_client(&issm_client);
+ if (ret)
+ goto out_client;
return 0;
+out_client:
+ ib_unregister_client(&umad_client);
out_class:
class_unregister(&umad_class);
@@ -1411,6 +1453,7 @@ out:
static void __exit ib_umad_cleanup(void)
{
+ ib_unregister_client(&issm_client);
ib_unregister_client(&umad_client);
class_unregister(&umad_class);
unregister_chrdev_region(base_umad_dev,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 63fe14c7c68f..7ddd0e5bc6b3 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -756,7 +756,9 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
mr->dm = NULL;
+ mr->sig_attrs = NULL;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
mr->res.type = RDMA_RESTRACK_MR;
@@ -1021,12 +1023,11 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
attr.comp_vector = cmd->comp_vector;
attr.flags = cmd->flags;
- cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
+ cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
+ if (!cq) {
+ ret = -ENOMEM;
goto err_file;
}
-
cq->device = ib_dev;
cq->uobject = &obj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
@@ -1034,6 +1035,10 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
+ ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
+ if (ret)
+ goto err_free;
+
obj->uobject.object = cq;
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uobject.id;
@@ -1054,7 +1059,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
err_cb:
ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs));
-
+ cq = NULL;
+err_free:
+ kfree(cq);
err_file:
if (ev_file)
ib_uverbs_release_ucq(attrs->ufile, ev_file, obj);
@@ -2541,7 +2548,7 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
struct ib_uqp_object *obj;
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
- int ret = -EINVAL;
+ int ret;
bool found = false;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -3715,9 +3722,6 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
* trailing driver_data flex array. In this case the size of the base struct
* cannot be changed.
*/
-#define offsetof_after(_struct, _member) \
- (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member))
-
#define UAPI_DEF_WRITE_IO(req, resp) \
.write.has_resp = 1 + \
BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \
@@ -3748,11 +3752,11 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
*/
#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \
.write.has_resp = 1, \
- .write.req_size = offsetof_after(req, req_last_member), \
- .write.resp_size = offsetof_after(resp, resp_last_member)
+ .write.req_size = offsetofend(req, req_last_member), \
+ .write.resp_size = offsetofend(resp, resp_last_member)
#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \
- .write.req_size = offsetof_after(req, req_last_member)
+ .write.req_size = offsetofend(req, req_last_member)
const struct uapi_definition uverbs_def_write_intf[] = {
DECLARE_UVERBS_OBJECT(
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 84a5e9a6d483..11c13c1381cf 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -51,6 +51,7 @@
#include <rdma/ib.h>
#include <rdma/uverbs_std_types.h>
+#include <rdma/rdma_netlink.h>
#include "uverbs.h"
#include "core_priv.h"
@@ -198,7 +199,7 @@ void ib_uverbs_release_file(struct kref *ref)
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
if (ib_dev && !ib_dev->ops.disassociate_ucontext)
- module_put(ib_dev->owner);
+ module_put(ib_dev->ops.owner);
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
if (atomic_dec_and_test(&file->device->refcount))
@@ -1065,7 +1066,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
module_dependent = !(ib_dev->ops.disassociate_ucontext);
if (module_dependent) {
- if (!try_module_get(ib_dev->owner)) {
+ if (!try_module_get(ib_dev->ops.owner)) {
ret = -ENODEV;
goto err;
}
@@ -1100,7 +1101,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
return stream_open(inode, filp);
err_module:
- module_put(ib_dev->owner);
+ module_put(ib_dev->ops.owner);
err:
mutex_unlock(&dev->lists_mutex);
@@ -1148,12 +1149,41 @@ static const struct file_operations uverbs_mmap_fops = {
.compat_ioctl = ib_uverbs_ioctl,
};
+static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res)
+{
+ struct ib_uverbs_device *uverbs_dev = client_data;
+ int ret;
+
+ if (res->port != -1)
+ return -EINVAL;
+
+ res->abi = ibdev->ops.uverbs_abi_ver;
+ res->cdev = &uverbs_dev->dev;
+
+ /*
+ * To support DRIVER_ID binding in userspace some of the driver need
+ * upgrading to expose their PCI dependent revision information
+ * through get_context instead of relying on modalias matching. When
+ * the drivers are fixed they can drop this flag.
+ */
+ if (!ibdev->ops.uverbs_no_driver_id_binding) {
+ ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID,
+ ibdev->ops.driver_id);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
static struct ib_client uverbs_client = {
.name = "uverbs",
.no_kverbs_req = true,
.add = ib_uverbs_add_one,
- .remove = ib_uverbs_remove_one
+ .remove = ib_uverbs_remove_one,
+ .get_nl_info = ib_uverbs_get_nl_info,
};
+MODULE_ALIAS_RDMA_CLIENT("uverbs");
static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
char *buf)
@@ -1186,7 +1216,7 @@ static ssize_t abi_version_show(struct device *device,
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
+ ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 07ea4e3c4566..e39fe6a8aac4 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -111,9 +111,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
INIT_LIST_HEAD(&obj->comp_list);
INIT_LIST_HEAD(&obj->async_list);
- cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
+ cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
+ if (!cq) {
+ ret = -ENOMEM;
goto err_event_file;
}
@@ -122,10 +122,15 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
- obj->uobject.object = cq;
- obj->uobject.user_handle = user_handle;
atomic_set(&cq->usecnt, 0);
cq->res.type = RDMA_RESTRACK_CQ;
+
+ ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
+ if (ret)
+ goto err_free;
+
+ obj->uobject.object = cq;
+ obj->uobject.user_handle = user_handle;
rdma_restrack_uadd(&cq->res);
ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
@@ -136,7 +141,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
return 0;
err_cq:
ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs));
-
+ cq = NULL;
+err_free:
+ kfree(cq);
err_event_file:
if (ev_file)
uverbs_uobject_put(ev_file_uobj);
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 997f7a3a558a..c1286a52dc84 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -128,6 +128,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_DM;
mr->dm = dm;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 7a987acf0c0b..00c547887132 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -22,6 +22,8 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
return ERR_PTR(-EOVERFLOW);
elm = kzalloc(alloc_size, GFP_KERNEL);
+ if (!elm)
+ return ERR_PTR(-ENOMEM);
rc = radix_tree_insert(&uapi->radix, key, elm);
if (rc) {
kfree(elm);
@@ -645,7 +647,7 @@ struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev)
return ERR_PTR(-ENOMEM);
INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
- uapi->driver_id = ibdev->driver_id;
+ uapi->driver_id = ibdev->ops.driver_id;
rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false);
if (rc)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e666a1f7608d..92349bf37589 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -209,7 +209,7 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
EXPORT_SYMBOL(ib_rate_to_mbps);
__attribute_const__ enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type)
+rdma_node_get_transport(unsigned int node_type)
{
if (node_type == RDMA_NODE_USNIC)
@@ -299,6 +299,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_DMA;
mr->uobject = NULL;
mr->need_inval = false;
@@ -316,7 +317,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
EXPORT_SYMBOL(__ib_alloc_pd);
/**
- * ib_dealloc_pd - Deallocates a protection domain.
+ * ib_dealloc_pd_user - Deallocates a protection domain.
* @pd: The protection domain to deallocate.
* @udata: Valid user data or NULL for kernel object
*
@@ -1157,6 +1158,10 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
qp_init_attr->cap.max_recv_sge))
return ERR_PTR(-EINVAL);
+ if ((qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) &&
+ !(device->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER))
+ return ERR_PTR(-EINVAL);
+
/*
* If the callers is using the RDMA API calculate the resources
* needed for the RDMA READ/WRITE operations.
@@ -1232,6 +1237,8 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
qp->max_write_sge = qp_init_attr->cap.max_send_sge;
qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge,
device->attrs.max_sge_rd);
+ if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN)
+ qp->integrity_en = true;
return qp;
@@ -1683,6 +1690,14 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
}
}
+ /*
+ * Bind this qp to a counter automatically based on the rdma counter
+ * rules. This only set in RST2INIT with port specified
+ */
+ if (!qp->counter && (attr_mask & IB_QP_PORT) &&
+ ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT))
+ rdma_counter_bind_qp_auto(qp, attr->port_num);
+
ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
if (ret)
goto out;
@@ -1878,6 +1893,7 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
+ rdma_counter_unbind_qp(qp, true);
rdma_restrack_del(&qp->res);
ret = qp->device->ops.destroy_qp(qp, udata);
if (!ret) {
@@ -1916,21 +1932,28 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
const char *caller)
{
struct ib_cq *cq;
+ int ret;
+
+ cq = rdma_zalloc_drv_obj(device, ib_cq);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
- cq = device->ops.create_cq(device, cq_attr, NULL);
-
- if (!IS_ERR(cq)) {
- cq->device = device;
- cq->uobject = NULL;
- cq->comp_handler = comp_handler;
- cq->event_handler = event_handler;
- cq->cq_context = cq_context;
- atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
- rdma_restrack_kadd(&cq->res);
+ cq->device = device;
+ cq->uobject = NULL;
+ cq->comp_handler = comp_handler;
+ cq->event_handler = event_handler;
+ cq->cq_context = cq_context;
+ atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_set_task(&cq->res, caller);
+
+ ret = device->ops.create_cq(cq, cq_attr, NULL);
+ if (ret) {
+ kfree(cq);
+ return ERR_PTR(ret);
}
+ rdma_restrack_kadd(&cq->res);
return cq;
}
EXPORT_SYMBOL(__ib_create_cq);
@@ -1949,7 +1972,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
return -EBUSY;
rdma_restrack_del(&cq->res);
- return cq->device->ops.destroy_cq(cq, udata);
+ cq->device->ops.destroy_cq(cq, udata);
+ kfree(cq);
+ return 0;
}
EXPORT_SYMBOL(ib_destroy_cq_user);
@@ -1966,6 +1991,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
{
struct ib_pd *pd = mr->pd;
struct ib_dm *dm = mr->dm;
+ struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
int ret;
rdma_restrack_del(&mr->res);
@@ -1974,6 +2000,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
atomic_dec(&pd->usecnt);
if (dm)
atomic_dec(&dm->usecnt);
+ kfree(sig_attrs);
}
return ret;
@@ -1981,7 +2008,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
EXPORT_SYMBOL(ib_dereg_mr_user);
/**
- * ib_alloc_mr() - Allocates a memory region
+ * ib_alloc_mr_user() - Allocates a memory region
* @pd: protection domain associated with the region
* @mr_type: memory region type
* @max_num_sg: maximum sg entries available for registration.
@@ -2001,6 +2028,9 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
if (!pd->device->ops.alloc_mr)
return ERR_PTR(-EOPNOTSUPP);
+ if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY))
+ return ERR_PTR(-EINVAL);
+
mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata);
if (!IS_ERR(mr)) {
mr->device = pd->device;
@@ -2011,12 +2041,66 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
mr->need_inval = false;
mr->res.type = RDMA_RESTRACK_MR;
rdma_restrack_kadd(&mr->res);
+ mr->type = mr_type;
+ mr->sig_attrs = NULL;
}
return mr;
}
EXPORT_SYMBOL(ib_alloc_mr_user);
+/**
+ * ib_alloc_mr_integrity() - Allocates an integrity memory region
+ * @pd: protection domain associated with the region
+ * @max_num_data_sg: maximum data sg entries available for registration
+ * @max_num_meta_sg: maximum metadata sg entries available for
+ * registration
+ *
+ * Notes:
+ * Memory registration page/sg lists must not exceed max_num_sg,
+ * also the integrity page/sg lists must not exceed max_num_meta_sg.
+ *
+ */
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg)
+{
+ struct ib_mr *mr;
+ struct ib_sig_attrs *sig_attrs;
+
+ if (!pd->device->ops.alloc_mr_integrity ||
+ !pd->device->ops.map_mr_sg_pi)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!max_num_meta_sg)
+ return ERR_PTR(-EINVAL);
+
+ sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL);
+ if (!sig_attrs)
+ return ERR_PTR(-ENOMEM);
+
+ mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg,
+ max_num_meta_sg);
+ if (IS_ERR(mr)) {
+ kfree(sig_attrs);
+ return mr;
+ }
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_kadd(&mr->res);
+ mr->type = IB_MR_TYPE_INTEGRITY;
+ mr->sig_attrs = sig_attrs;
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_alloc_mr_integrity);
+
/* "Fast" memory regions */
struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
@@ -2226,19 +2310,17 @@ EXPORT_SYMBOL(ib_create_wq);
*/
int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
- int err;
struct ib_cq *cq = wq->cq;
struct ib_pd *pd = wq->pd;
if (atomic_read(&wq->usecnt))
return -EBUSY;
- err = wq->device->ops.destroy_wq(wq, udata);
- if (!err) {
- atomic_dec(&pd->usecnt);
- atomic_dec(&cq->usecnt);
- }
- return err;
+ wq->device->ops.destroy_wq(wq, udata);
+ atomic_dec(&pd->usecnt);
+ atomic_dec(&cq->usecnt);
+
+ return 0;
}
EXPORT_SYMBOL(ib_destroy_wq);
@@ -2376,6 +2458,43 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
EXPORT_SYMBOL(ib_set_vf_guid);
/**
+ * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection
+ * information) and set an appropriate memory region for registration.
+ * @mr: memory region
+ * @data_sg: dma mapped scatterlist for data
+ * @data_sg_nents: number of entries in data_sg
+ * @data_sg_offset: offset in bytes into data_sg
+ * @meta_sg: dma mapped scatterlist for metadata
+ * @meta_sg_nents: number of entries in meta_sg
+ * @meta_sg_offset: offset in bytes into meta_sg
+ * @page_size: page vector desired page size
+ *
+ * Constraints:
+ * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY.
+ *
+ * Return: 0 on success.
+ *
+ * After this completes successfully, the memory region
+ * is ready for registration.
+ */
+int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset, unsigned int page_size)
+{
+ if (unlikely(!mr->device->ops.map_mr_sg_pi ||
+ WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY)))
+ return -EOPNOTSUPP;
+
+ mr->page_size = page_size;
+
+ return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg,
+ meta_sg_nents, meta_sg_offset);
+}
+EXPORT_SYMBOL(ib_map_mr_sg_pi);
+
+/**
* ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
* and set it the memory region.
* @mr: memory region
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index 77094be1b262..433fca59febd 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -7,7 +7,6 @@ obj-$(CONFIG_INFINIBAND_EFA) += efa/
obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
-obj-$(CONFIG_INFINIBAND_NES) += nes/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/
obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 2c3685faa57a..a91653aabf38 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -805,10 +805,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
rdev->sqp_ah = NULL;
}
- if (!IS_ERR_OR_NULL(qp->rumem))
- ib_umem_release(qp->rumem);
- if (!IS_ERR_OR_NULL(qp->sumem))
- ib_umem_release(qp->sumem);
+ ib_umem_release(qp->rumem);
+ ib_umem_release(qp->sumem);
mutex_lock(&rdev->qp_lock);
list_del(&qp->list);
@@ -1201,12 +1199,8 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
qp_destroy:
bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
free_umem:
- if (udata) {
- if (qp->rumem)
- ib_umem_release(qp->rumem);
- if (qp->sumem)
- ib_umem_release(qp->sumem);
- }
+ ib_umem_release(qp->rumem);
+ ib_umem_release(qp->sumem);
fail:
kfree(qp);
return ERR_PTR(rc);
@@ -1302,8 +1296,7 @@ void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
if (qplib_srq->cq)
nq = qplib_srq->cq->nq;
bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq);
- if (srq->umem)
- ib_umem_release(srq->umem);
+ ib_umem_release(srq->umem);
atomic_dec(&rdev->srq_count);
if (nq)
nq->budget--;
@@ -1412,8 +1405,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
return 0;
fail:
- if (srq->umem)
- ib_umem_release(srq->umem);
+ ib_umem_release(srq->umem);
exit:
return rc;
}
@@ -2517,9 +2509,8 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
}
/* Completion Queues */
-int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
- int rc;
struct bnxt_re_cq *cq;
struct bnxt_qplib_nq *nq;
struct bnxt_re_dev *rdev;
@@ -2528,29 +2519,20 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
rdev = cq->rdev;
nq = cq->qplib_cq.nq;
- rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to destroy HW CQ");
- return rc;
- }
- if (!IS_ERR_OR_NULL(cq->umem))
- ib_umem_release(cq->umem);
+ bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
+ ib_umem_release(cq->umem);
atomic_dec(&rdev->cq_count);
nq->budget--;
kfree(cq->cql);
- kfree(cq);
-
- return 0;
}
-struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
- struct bnxt_re_cq *cq = NULL;
+ struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq);
int rc, entries;
int cqe = attr->cqe;
struct bnxt_qplib_nq *nq = NULL;
@@ -2559,11 +2541,8 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
/* Validate CQ fields */
if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
dev_err(rdev_to_dev(rdev), "Failed to create CQ -max exceeded");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
cq->rdev = rdev;
cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq);
@@ -2641,15 +2620,13 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
}
}
- return &cq->ib_cq;
+ return 0;
c2fail:
- if (udata)
- ib_umem_release(cq->umem);
+ ib_umem_release(cq->umem);
fail:
kfree(cq->cql);
- kfree(cq);
- return ERR_PTR(rc);
+ return rc;
}
static u8 __req_to_ib_wc_status(u8 qstatus)
@@ -3353,8 +3330,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
mr->npages = 0;
mr->pages = NULL;
}
- if (!IS_ERR_OR_NULL(mr->ib_umem))
- ib_umem_release(mr->ib_umem);
+ ib_umem_release(mr->ib_umem);
kfree(mr);
atomic_dec(&rdev->mr_count);
@@ -3630,10 +3606,10 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
u32 chip_met_rev_num = 0;
int rc;
- dev_dbg(rdev_to_dev(rdev), "ABI version requested %d",
- ibdev->uverbs_abi_ver);
+ dev_dbg(rdev_to_dev(rdev), "ABI version requested %u",
+ ibdev->ops.uverbs_abi_ver);
- if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
+ if (ibdev->ops.uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
dev_dbg(rdev_to_dev(rdev), " is different from the device %d ",
BNXT_RE_ABI_VERSION);
return -EPERM;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 09a33049e42f..31662b1ee35a 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -94,11 +94,11 @@ struct bnxt_re_qp {
};
struct bnxt_re_cq {
+ struct ib_cq ib_cq;
struct bnxt_re_dev *rdev;
spinlock_t cq_lock; /* protect cq */
u16 cq_count;
u16 cq_period;
- struct ib_cq ib_cq;
struct bnxt_qplib_cq qplib_cq;
struct bnxt_qplib_cqe *cql;
#define MAX_CQL_PER_POLL 1024
@@ -190,10 +190,9 @@ int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr,
const struct ib_send_wr **bad_send_wr);
int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
-struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 814f959c7db9..029babe713f3 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -596,6 +596,10 @@ static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
}
static const struct ib_device_ops bnxt_re_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_BNXT_RE,
+ .uverbs_abi_ver = BNXT_RE_ABI_VERSION,
+
.add_gid = bnxt_re_add_gid,
.alloc_hw_stats = bnxt_re_ib_alloc_hw_stats,
.alloc_mr = bnxt_re_alloc_mr,
@@ -637,6 +641,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.reg_user_mr = bnxt_re_reg_user_mr,
.req_notify_cq = bnxt_re_req_notify_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
@@ -648,7 +653,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
int ret;
/* ib device init */
- ibdev->owner = THIS_MODULE;
ibdev->node_type = RDMA_NODE_IB_CA;
strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
strlen(BNXT_RE_DESC) + 5);
@@ -661,7 +665,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
/* User space */
- ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
ibdev->uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -691,7 +694,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
- ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
if (ret)
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 8ac72ac7cbac..95b22a651673 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -174,7 +174,6 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
return -ENOMEM;
}
dma_unmap_addr_set(cq, mapping, cq->dma_addr);
- memset(cq->queue, 0, size);
setup.id = cq->cqid;
setup.base_addr = (u64) (cq->dma_addr);
setup.size = 1UL << cq->size_log2;
@@ -187,20 +186,6 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
}
-#ifdef notyet
-int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
-{
- struct rdma_cq_setup setup;
- setup.id = cq->cqid;
- setup.base_addr = (u64) (cq->dma_addr);
- setup.size = 1UL << cq->size_log2;
- setup.credits = setup.size;
- setup.credit_thres = setup.size; /* TBD: overflow recovery */
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-#endif
-
static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
{
struct cxio_qpid_list *entry;
@@ -219,7 +204,7 @@ static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
if (!qpid)
goto out;
for (i = qpid+1; i & rdev_p->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
break;
entry->qpid = i;
@@ -237,7 +222,7 @@ static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
{
struct cxio_qpid_list *entry;
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return;
pr_debug("%s qpid 0x%x\n", __func__, qpid);
@@ -317,17 +302,15 @@ err1:
return -ENOMEM;
}
-int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+void cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
{
- int err;
- err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
+ cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
kfree(cq->sw_queue);
dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
(1UL << (cq->size_log2))
* sizeof(struct t3_cqe) + 1, cq->queue,
dma_unmap_addr(cq, mapping));
cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
- return err;
}
int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
@@ -538,8 +521,6 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
rdev_p->ctrl_qp.dma_addr);
rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
- memset(rdev_p->ctrl_qp.workq, 0,
- (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
mutex_init(&rdev_p->ctrl_qp.lock);
init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
@@ -565,9 +546,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
wqe->sge_cmd = cpu_to_be64(sge_cmd);
wqe->ctx1 = cpu_to_be64(ctx1);
wqe->ctx0 = cpu_to_be64(ctx0);
- pr_debug("CtrlQP dma_addr 0x%llx workq %p size %d\n",
- (unsigned long long)rdev_p->ctrl_qp.dma_addr,
- rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
+ pr_debug("CtrlQP dma_addr %pad workq %p size %d\n",
+ &rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq,
+ 1 << T3_CTRL_QP_SIZE_LOG2);
skb->priority = CPL_PRIORITY_CONTROL;
return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
err:
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index c64e50b5a548..40c029ffa425 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -158,8 +158,7 @@ void cxio_rdev_close(struct cxio_rdev *rdev);
int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
enum t3_cq_opcode op, u32 credit);
int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
-int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
-int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+void cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 1c90c86fc8b8..0bca72cb4d9a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -170,7 +170,7 @@ static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
{
struct cpl_tid_release *req;
- skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+ skb = get_skb(skb, sizeof(*req), GFP_KERNEL);
if (!skb)
return;
req = skb_put(skb, sizeof(*req));
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 3a481dfb1607..e775c1a1a450 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -88,7 +88,7 @@ static int iwch_alloc_ucontext(struct ib_ucontext *ucontext,
return 0;
}
-static int iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct iwch_cq *chp;
@@ -100,17 +100,16 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
wait_event(chp->wait, !atomic_read(&chp->refcnt));
cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
- kfree(chp);
- return 0;
}
-static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+static int iwch_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
- struct iwch_dev *rhp;
- struct iwch_cq *chp;
+ struct iwch_dev *rhp = to_iwch_dev(ibcq->device);
+ struct iwch_cq *chp = to_iwch_cq(ibcq);
struct iwch_create_cq_resp uresp;
struct iwch_create_cq_req ureq;
static int warned;
@@ -118,19 +117,13 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
if (attr->flags)
- return ERR_PTR(-EINVAL);
-
- rhp = to_iwch_dev(ibdev);
- chp = kzalloc(sizeof(*chp), GFP_KERNEL);
- if (!chp)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
if (udata) {
if (!t3a_device(rhp)) {
- if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) {
- kfree(chp);
- return ERR_PTR(-EFAULT);
- }
+ if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+ return -EFAULT;
+
chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
}
}
@@ -151,10 +144,9 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
entries = roundup_pow_of_two(entries);
chp->cq.size_log2 = ilog2(entries);
- if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) {
- kfree(chp);
- return ERR_PTR(-ENOMEM);
- }
+ if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata))
+ return -ENOMEM;
+
chp->rhp = rhp;
chp->ibcq.cqe = 1 << chp->cq.size_log2;
spin_lock_init(&chp->lock);
@@ -163,8 +155,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
init_waitqueue_head(&chp->wait);
if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) {
cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
- kfree(chp);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
if (udata) {
@@ -172,10 +163,10 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
struct iwch_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct iwch_ucontext, ibucontext);
- mm = kmalloc(sizeof *mm, GFP_KERNEL);
+ mm = kmalloc(sizeof(*mm), GFP_KERNEL);
if (!mm) {
iwch_destroy_cq(&chp->ibcq, udata);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
uresp.cqid = chp->cq.cqid;
uresp.size_log2 = chp->cq.size_log2;
@@ -185,7 +176,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
spin_unlock(&ucontext->mmap_lock);
mm->key = uresp.key;
mm->addr = virt_to_phys(chp->cq.queue);
- if (udata->outlen < sizeof uresp) {
+ if (udata->outlen < sizeof(uresp)) {
if (!warned++)
pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n");
mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
@@ -196,86 +187,19 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
sizeof(struct t3_cqe));
uresp.memsize = mm->len;
uresp.reserved = 0;
- resplen = sizeof uresp;
+ resplen = sizeof(uresp);
}
if (ib_copy_to_udata(udata, &uresp, resplen)) {
kfree(mm);
iwch_destroy_cq(&chp->ibcq, udata);
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
insert_mmap(ucontext, mm);
}
- pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
+ pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n",
chp->cq.cqid, chp, (1 << chp->cq.size_log2),
- (unsigned long long)chp->cq.dma_addr);
- return &chp->ibcq;
-}
-
-static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
-#ifdef notyet
- struct iwch_cq *chp = to_iwch_cq(cq);
- struct t3_cq oldcq, newcq;
- int ret;
-
- pr_debug("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
-
- /* We don't downsize... */
- if (cqe <= cq->cqe)
- return 0;
-
- /* create new t3_cq with new size */
- cqe = roundup_pow_of_two(cqe+1);
- newcq.size_log2 = ilog2(cqe);
-
- /* Dont allow resize to less than the current wce count */
- if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
- return -ENOMEM;
- }
-
- /* Quiesce all QPs using this CQ */
- ret = iwch_quiesce_qps(chp);
- if (ret) {
- return ret;
- }
-
- ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
- if (ret) {
- return ret;
- }
-
- /* copy CQEs */
- memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
- sizeof(struct t3_cqe));
-
- /* old iwch_qp gets new t3_cq but keeps old cqid */
- oldcq = chp->cq;
- chp->cq = newcq;
- chp->cq.cqid = oldcq.cqid;
-
- /* resize new t3_cq to update the HW context */
- ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
- if (ret) {
- chp->cq = oldcq;
- return ret;
- }
- chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
-
- /* destroy old t3_cq */
- oldcq.cqid = newcq.cqid;
- ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
- if (ret) {
- pr_err("%s - cxio_destroy_cq failed %d\n", __func__, ret);
- }
-
- /* add user hooks here */
-
- /* resume qps */
- ret = iwch_resume_qps(chp);
- return ret;
-#else
- return -ENOSYS;
-#endif
+ &chp->cq.dma_addr);
+ return 0;
}
static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
@@ -422,8 +346,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
xa_erase_irq(&rhp->mrs, mmid);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
- if (mhp->umem)
- ib_umem_release(mhp->umem);
+ ib_umem_release(mhp->umem);
pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
kfree(mhp);
return 0;
@@ -553,7 +476,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
- if (i == PAGE_SIZE / sizeof *pages) {
+ if (i == PAGE_SIZE / sizeof(*pages)) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
goto pbl_done;
@@ -587,7 +510,7 @@ pbl_done:
pr_debug("%s user resp pbl_addr 0x%x\n", __func__,
uresp.pbl_addr);
- if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
iwch_dereg_mr(&mhp->ibmr, udata);
err = -EFAULT;
goto err;
@@ -880,13 +803,13 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
struct iwch_mm_entry *mm1, *mm2;
- mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
+ mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL);
if (!mm1) {
iwch_destroy_qp(&qhp->ibqp, udata);
return ERR_PTR(-ENOMEM);
}
- mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
if (!mm2) {
kfree(mm1);
iwch_destroy_qp(&qhp->ibqp, udata);
@@ -903,7 +826,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
uresp.db_key = ucontext->key;
ucontext->key += PAGE_SIZE;
spin_unlock(&ucontext->mmap_lock);
- if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
kfree(mm1);
kfree(mm2);
iwch_destroy_qp(&qhp->ibqp, udata);
@@ -911,7 +834,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
}
mm1->key = uresp.key;
mm1->addr = virt_to_phys(qhp->wq.queue);
- mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr));
+ mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr));
insert_mmap(ucontext, mm1);
mm2->key = uresp.db_key;
mm2->addr = qhp->wq.udb & PAGE_MASK;
@@ -919,10 +842,11 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
insert_mmap(ucontext, mm2);
}
qhp->ibqp.qp_num = qhp->wq.qpid;
- pr_debug("%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
- __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
- qhp->wq.qpid, qhp, (unsigned long long)qhp->wq.dma_addr,
- 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
+ pr_debug(
+ "%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n",
+ __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+ qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2,
+ qhp->wq.rq_addr);
return &qhp->ibqp;
}
@@ -932,7 +856,7 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct iwch_dev *rhp;
struct iwch_qp *qhp;
enum iwch_qp_attr_mask mask = 0;
- struct iwch_qp_attributes attrs;
+ struct iwch_qp_attributes attrs = {};
pr_debug("%s ib_qp %p\n", __func__, ibqp);
@@ -944,7 +868,6 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (!attr_mask)
return 0;
- memset(&attrs, 0, sizeof attrs);
qhp = to_iwch_qp(ibqp);
rhp = qhp->rhp;
@@ -1040,7 +963,6 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
return -EINVAL;
dev = to_iwch_dev(ibdev);
- memset(props, 0, sizeof *props);
memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
props->hw_ver = dev->rdev.t3cdev_p->type;
props->fw_ver = fw_vers_string_to_u64(dev);
@@ -1304,6 +1226,11 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
}
static const struct ib_device_ops iwch_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_CXGB3,
+ .uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION,
+ .uverbs_no_driver_id_binding = 1,
+
.alloc_hw_stats = iwch_alloc_stats,
.alloc_mr = iwch_alloc_mr,
.alloc_mw = iwch_alloc_mw,
@@ -1341,8 +1268,8 @@ static const struct ib_device_ops iwch_dev_ops = {
.query_port = iwch_query_port,
.reg_user_mr = iwch_reg_user_mr,
.req_notify_cq = iwch_arm_cq,
- .resize_cq = iwch_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext),
};
@@ -1351,7 +1278,6 @@ int iwch_register_device(struct iwch_dev *dev)
pr_debug("%s iwch_dev %p\n", __func__, dev);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
- dev->ibdev.owner = THIS_MODULE;
dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_MGT_EXTENSIONS;
@@ -1383,12 +1309,10 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
dev->ibdev.num_comp_vectors = 1;
dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
- dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name,
sizeof(dev->ibdev.iw_ifname));
- dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
return ib_register_device(&dev->ibdev, "cxgb3_%d");
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 09fcfc9e052d..e87fc0408470 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -953,7 +953,7 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
mpalen = sizeof(*mpa) + ep->plen;
if (mpa_rev_to_use == 2)
mpalen += sizeof(struct mpa_v2_conn_params);
- wrlen = roundup(mpalen + sizeof *req, 16);
+ wrlen = roundup(mpalen + sizeof(*req), 16);
skb = get_skb(skb, wrlen, GFP_KERNEL);
if (!skb) {
connect_reply_upcall(ep, -ENOMEM);
@@ -997,8 +997,9 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
}
if (mpa_rev_to_use == 2) {
- mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
- sizeof (struct mpa_v2_conn_params));
+ mpa->private_data_size =
+ htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
pr_debug("initiator ird %u ord %u\n", ep->ird,
ep->ord);
mpa_v2_params.ird = htons((u16)ep->ird);
@@ -1057,7 +1058,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
mpalen += sizeof(struct mpa_v2_conn_params);
- wrlen = roundup(mpalen + sizeof *req, 16);
+ wrlen = roundup(mpalen + sizeof(*req), 16);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
@@ -1088,8 +1089,9 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
- mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
- sizeof (struct mpa_v2_conn_params));
+ mpa->private_data_size =
+ htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons(((u16)ep->ird) |
(peer2peer ? MPA_V2_PEER2PEER_MODEL :
0));
@@ -1136,7 +1138,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
mpalen += sizeof(struct mpa_v2_conn_params);
- wrlen = roundup(mpalen + sizeof *req, 16);
+ wrlen = roundup(mpalen + sizeof(*req), 16);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
@@ -1171,8 +1173,9 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
- mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
- sizeof (struct mpa_v2_conn_params));
+ mpa->private_data_size =
+ htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
if (peer2peer && (ep->mpa_attr.p2p_type !=
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 52ce586621c6..b1bb61c65f4f 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -34,16 +34,15 @@
#include "iw_cxgb4.h"
-static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
- struct c4iw_dev_ucontext *uctx, struct sk_buff *skb,
- struct c4iw_wr_wait *wr_waitp)
+static void destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
+ struct c4iw_dev_ucontext *uctx, struct sk_buff *skb,
+ struct c4iw_wr_wait *wr_waitp)
{
struct fw_ri_res_wr *res_wr;
struct fw_ri_res *res;
int wr_len;
- int ret;
- wr_len = sizeof *res_wr + sizeof *res;
+ wr_len = sizeof(*res_wr) + sizeof(*res);
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
res_wr = __skb_put_zero(skb, wr_len);
@@ -59,14 +58,13 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
res->u.cq.iqid = cpu_to_be32(cq->cqid);
c4iw_init_wr_wait(wr_waitp);
- ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
+ c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
kfree(cq->sw_queue);
dma_free_coherent(&(rdev->lldi.pdev->dev),
cq->memsize, cq->queue,
dma_unmap_addr(cq, mapping));
c4iw_put_cqid(rdev, cq->cqid, uctx);
- return ret;
}
static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
@@ -104,7 +102,6 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
goto err3;
}
dma_unmap_addr_set(cq, mapping, cq->dma_addr);
- memset(cq->queue, 0, cq->memsize);
if (user && ucontext->is_32b_cqe) {
cq->qp_errp = &((struct t4_status_page *)
@@ -117,7 +114,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
}
/* build fw_ri_res_wr */
- wr_len = sizeof *res_wr + sizeof *res;
+ wr_len = sizeof(*res_wr) + sizeof(*res);
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb) {
@@ -970,7 +967,7 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
return !err || err == -ENODATA ? npolled : err;
}
-int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct c4iw_cq *chp;
struct c4iw_ucontext *ucontext;
@@ -988,18 +985,16 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
chp->destroy_skb, chp->wr_waitp);
c4iw_put_wr_wait(chp->wr_waitp);
- kfree(chp);
- return 0;
}
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
- struct c4iw_dev *rhp;
- struct c4iw_cq *chp;
+ struct c4iw_dev *rhp = to_c4iw_dev(ibcq->device);
+ struct c4iw_cq *chp = to_c4iw_cq(ibcq);
struct c4iw_create_cq ucmd;
struct c4iw_create_cq_resp uresp;
int ret, wr_len;
@@ -1010,22 +1005,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
pr_debug("ib_dev %p entries %d\n", ibdev, entries);
if (attr->flags)
- return ERR_PTR(-EINVAL);
-
- rhp = to_c4iw_dev(ibdev);
+ return -EINVAL;
if (vector >= rhp->rdev.lldi.nciq)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (udata) {
if (udata->inlen < sizeof(ucmd))
ucontext->is_32b_cqe = 1;
}
- chp = kzalloc(sizeof(*chp), GFP_KERNEL);
- if (!chp)
- return ERR_PTR(-ENOMEM);
-
chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
if (!chp->wr_waitp) {
ret = -ENOMEM;
@@ -1095,10 +1084,10 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
if (ucontext) {
ret = -ENOMEM;
- mm = kmalloc(sizeof *mm, GFP_KERNEL);
+ mm = kmalloc(sizeof(*mm), GFP_KERNEL);
if (!mm)
goto err_remove_handle;
- mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
if (!mm2)
goto err_free_mm;
@@ -1135,10 +1124,11 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
mm2->len = PAGE_SIZE;
insert_mmap(ucontext, mm2);
}
- pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
- chp->cq.cqid, chp, chp->cq.size,
- chp->cq.memsize, (unsigned long long)chp->cq.dma_addr);
- return &chp->ibcq;
+
+ pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr %pad\n",
+ chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
+ &chp->cq.dma_addr);
+ return 0;
err_free_mm2:
kfree(mm2);
err_free_mm:
@@ -1154,8 +1144,7 @@ err_free_skb:
err_free_wr_wait:
c4iw_put_wr_wait(chp->wr_waitp);
err_free_chp:
- kfree(chp);
- return ERR_PTR(ret);
+ return ret;
}
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 4c0d925c5ff5..a8b9548bd1a2 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -327,7 +327,7 @@ static int qp_open(struct inode *inode, struct file *file)
unsigned long index;
int count = 1;
- qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
+ qpd = kmalloc(sizeof(*qpd), GFP_KERNEL);
if (!qpd)
return -ENOMEM;
@@ -421,7 +421,7 @@ static int stag_open(struct inode *inode, struct file *file)
int ret = 0;
int count = 1;
- stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
+ stagd = kmalloc(sizeof(*stagd), GFP_KERNEL);
if (!stagd) {
ret = -ENOMEM;
goto out;
@@ -1075,7 +1075,7 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
DRV_VERSION);
- ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) {
ctx = ERR_PTR(-ENOMEM);
goto out;
@@ -1243,10 +1243,9 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
case CXGB4_STATE_START_RECOVERY:
pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
if (ctx->dev) {
- struct ib_event event;
+ struct ib_event event = {};
ctx->dev->rdev.flags |= T4_FATAL_ERROR;
- memset(&event, 0, sizeof event);
event.event = IB_EVENT_DEVICE_FATAL;
event.device = &ctx->dev->ibdev;
ib_dispatch_event(&event);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 916ef982172e..7d06b0f8d49a 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -490,13 +490,13 @@ struct c4iw_qp {
struct t4_wq wq;
spinlock_t lock;
struct mutex mutex;
- struct kref kref;
wait_queue_head_t wait;
int sq_sig_all;
struct c4iw_srq *srq;
- struct work_struct free_work;
struct c4iw_ucontext *ucontext;
struct c4iw_wr_wait *wr_waitp;
+ struct completion qp_rel_comp;
+ refcount_t qp_refcnt;
};
static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -992,10 +992,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
-int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask srq_attr_mask,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 811c0c8c5b16..aa772ee0706f 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -130,8 +130,9 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE :
len;
- wr_len = roundup(sizeof *req + sizeof *sc +
- roundup(copy_len, T4_ULPTX_MIN_IO), 16);
+ wr_len = roundup(sizeof(*req) + sizeof(*sc) +
+ roundup(copy_len, T4_ULPTX_MIN_IO),
+ 16);
if (!skb) {
skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
@@ -807,8 +808,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
mhp->attr.pbl_size << 3);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
- if (mhp->umem)
- ib_umem_release(mhp->umem);
+ ib_umem_release(mhp->umem);
pr_debug("mmid 0x%x ptr %p\n", mmid, mhp);
c4iw_put_wr_wait(mhp->wr_waitp);
kfree(mhp);
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 74b795642fca..5e59c5708729 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -271,7 +271,6 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
return -EINVAL;
dev = to_c4iw_dev(ibdev);
- memset(props, 0, sizeof *props);
memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type);
props->fw_ver = dev->rdev.lldi.fw_vers;
@@ -490,6 +489,10 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
}
static const struct ib_device_ops c4iw_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_CXGB4,
+ .uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION,
+
.alloc_hw_stats = c4iw_alloc_stats,
.alloc_mr = c4iw_alloc_mr,
.alloc_mw = c4iw_alloc_mw,
@@ -534,6 +537,7 @@ static const struct ib_device_ops c4iw_dev_ops = {
.reg_user_mr = c4iw_reg_user_mr,
.req_notify_cq = c4iw_arm_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
};
@@ -561,7 +565,6 @@ void c4iw_register_device(struct work_struct *work)
pr_debug("c4iw_dev %p\n", dev);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
- dev->ibdev.owner = THIS_MODULE;
dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
if (fastreg_support)
dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
@@ -594,13 +597,11 @@ void c4iw_register_device(struct work_struct *work)
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
- dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
memcpy(dev->ibdev.iw_ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iw_ifname));
rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
- dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops);
ret = set_netdevs(&dev->ibdev, &dev->rdev);
if (ret)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index e92b9544357a..eb9368be28c1 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -274,7 +274,6 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
(unsigned long long)virt_to_phys(wq->sq.queue),
wq->rq.queue,
(unsigned long long)virt_to_phys(wq->rq.queue));
- memset(wq->rq.queue, 0, wq->rq.memsize);
dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
}
@@ -303,7 +302,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->rq.msn = 1;
/* build fw_ri_res_wr */
- wr_len = sizeof *res_wr + 2 * sizeof *res;
+ wr_len = sizeof(*res_wr) + 2 * sizeof(*res);
if (need_rq)
wr_len += sizeof(*res);
skb = alloc_skb(wr_len, GFP_KERNEL);
@@ -439,7 +438,7 @@ static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
rem -= len;
}
}
- len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp);
+ len = roundup(plen + sizeof(*immdp), 16) - (plen + sizeof(*immdp));
if (len)
memset(dstp, 0, len);
immdp->op = FW_RI_DATA_IMMD;
@@ -528,7 +527,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
T4_MAX_SEND_INLINE, &plen);
if (ret)
return ret;
- size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
+ size = sizeof(wqe->send) + sizeof(struct fw_ri_immd) +
plen;
} else {
ret = build_isgl((__be64 *)sq->queue,
@@ -537,7 +536,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
wr->sg_list, wr->num_sge, &plen);
if (ret)
return ret;
- size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
+ size = sizeof(wqe->send) + sizeof(struct fw_ri_isgl) +
wr->num_sge * sizeof(struct fw_ri_sge);
}
} else {
@@ -545,7 +544,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
wqe->send.u.immd_src[0].r1 = 0;
wqe->send.u.immd_src[0].r2 = 0;
wqe->send.u.immd_src[0].immdlen = 0;
- size = sizeof wqe->send + sizeof(struct fw_ri_immd);
+ size = sizeof(wqe->send) + sizeof(struct fw_ri_immd);
plen = 0;
}
*len16 = DIV_ROUND_UP(size, 16);
@@ -579,7 +578,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
T4_MAX_WRITE_INLINE, &plen);
if (ret)
return ret;
- size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
+ size = sizeof(wqe->write) + sizeof(struct fw_ri_immd) +
plen;
} else {
ret = build_isgl((__be64 *)sq->queue,
@@ -588,7 +587,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
wr->sg_list, wr->num_sge, &plen);
if (ret)
return ret;
- size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
+ size = sizeof(wqe->write) + sizeof(struct fw_ri_isgl) +
wr->num_sge * sizeof(struct fw_ri_sge);
}
} else {
@@ -596,7 +595,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
wqe->write.u.immd_src[0].r1 = 0;
wqe->write.u.immd_src[0].r2 = 0;
wqe->write.u.immd_src[0].immdlen = 0;
- size = sizeof wqe->write + sizeof(struct fw_ri_immd);
+ size = sizeof(wqe->write) + sizeof(struct fw_ri_immd);
plen = 0;
}
*len16 = DIV_ROUND_UP(size, 16);
@@ -683,7 +682,7 @@ static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
}
wqe->read.r2 = 0;
wqe->read.r5 = 0;
- *len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
+ *len16 = DIV_ROUND_UP(sizeof(wqe->read), 16);
return 0;
}
@@ -766,8 +765,8 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
if (ret)
return ret;
- *len16 = DIV_ROUND_UP(sizeof wqe->recv +
- wr->num_sge * sizeof(struct fw_ri_sge), 16);
+ *len16 = DIV_ROUND_UP(
+ sizeof(wqe->recv) + wr->num_sge * sizeof(struct fw_ri_sge), 16);
return 0;
}
@@ -886,47 +885,21 @@ static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr,
{
wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
wqe->inv.r2 = 0;
- *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
+ *len16 = DIV_ROUND_UP(sizeof(wqe->inv), 16);
return 0;
}
-static void free_qp_work(struct work_struct *work)
-{
- struct c4iw_ucontext *ucontext;
- struct c4iw_qp *qhp;
- struct c4iw_dev *rhp;
-
- qhp = container_of(work, struct c4iw_qp, free_work);
- ucontext = qhp->ucontext;
- rhp = qhp->rhp;
-
- pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
- destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
-
- c4iw_put_wr_wait(qhp->wr_waitp);
- kfree(qhp);
-}
-
-static void queue_qp_free(struct kref *kref)
-{
- struct c4iw_qp *qhp;
-
- qhp = container_of(kref, struct c4iw_qp, kref);
- pr_debug("qhp %p\n", qhp);
- queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
-}
-
void c4iw_qp_add_ref(struct ib_qp *qp)
{
pr_debug("ib_qp %p\n", qp);
- kref_get(&to_c4iw_qp(qp)->kref);
+ refcount_inc(&to_c4iw_qp(qp)->qp_refcnt);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
pr_debug("ib_qp %p\n", qp);
- kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
+ if (refcount_dec_and_test(&to_c4iw_qp(qp)->qp_refcnt))
+ complete(&to_c4iw_qp(qp)->qp_rel_comp);
}
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@@ -1606,7 +1579,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
- wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
+ wqe->u.terminate.immdlen = cpu_to_be32(sizeof(*term));
term = (struct terminate_message *)wqe->u.terminate.termmsg;
if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
term->layer_etype = qhp->attr.layer_etype;
@@ -1751,16 +1724,15 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
{
pr_debug("p2p_type = %d\n", p2p_type);
- memset(&init->u, 0, sizeof init->u);
+ memset(&init->u, 0, sizeof(init->u));
switch (p2p_type) {
case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
init->u.write.stag_sink = cpu_to_be32(1);
init->u.write.to_sink = cpu_to_be64(1);
init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
- init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
- sizeof(struct fw_ri_immd),
- 16);
+ init->u.write.len16 = DIV_ROUND_UP(
+ sizeof(init->u.write) + sizeof(struct fw_ri_immd), 16);
break;
case FW_RI_INIT_P2PTYPE_READ_REQ:
init->u.write.opcode = FW_RI_RDMA_READ_WR;
@@ -1768,7 +1740,7 @@ static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
init->u.read.to_src_lo = cpu_to_be32(1);
init->u.read.stag_sink = cpu_to_be32(1);
init->u.read.to_sink_lo = cpu_to_be32(1);
- init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
+ init->u.read.len16 = DIV_ROUND_UP(sizeof(init->u.read), 16);
break;
}
}
@@ -1782,7 +1754,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp,
qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
- skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
+ skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
ret = -ENOMEM;
goto out;
@@ -2099,10 +2071,12 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_qp *qhp;
+ struct c4iw_ucontext *ucontext;
struct c4iw_qp_attributes attrs;
qhp = to_c4iw_qp(ib_qp);
rhp = qhp->rhp;
+ ucontext = qhp->ucontext;
attrs.next_state = C4IW_QP_STATE_ERROR;
if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
@@ -2120,7 +2094,17 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
c4iw_qp_rem_ref(ib_qp);
+ wait_for_completion(&qhp->qp_rel_comp);
+
pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid);
+ pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
+
+ destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
+
+ c4iw_put_wr_wait(qhp->wr_waitp);
+
+ kfree(qhp);
return 0;
}
@@ -2230,8 +2214,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
spin_lock_init(&qhp->lock);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
- kref_init(&qhp->kref);
- INIT_WORK(&qhp->free_work, free_qp_work);
+ init_completion(&qhp->qp_rel_comp);
+ refcount_set(&qhp->qp_refcnt, 1);
ret = xa_insert_irq(&rhp->qps, qhp->wq.sq.qid, qhp, GFP_KERNEL);
if (ret)
@@ -2302,7 +2286,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
ucontext->key += PAGE_SIZE;
}
spin_unlock(&ucontext->mmap_lock);
- ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (ret)
goto err_free_ma_sync_key;
sq_key_mm->key = uresp.sq_key;
@@ -2386,7 +2370,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct c4iw_dev *rhp;
struct c4iw_qp *qhp;
enum c4iw_qp_attr_mask mask = 0;
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {};
pr_debug("ib_qp %p\n", ibqp);
@@ -2398,7 +2382,6 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (!attr_mask)
return 0;
- memset(&attrs, 0, sizeof attrs);
qhp = to_c4iw_qp(ibqp);
rhp = qhp->rhp;
@@ -2482,8 +2465,8 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{
struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
- memset(attr, 0, sizeof *attr);
- memset(init_attr, 0, sizeof *init_attr);
+ memset(attr, 0, sizeof(*attr));
+ memset(init_attr, 0, sizeof(*init_attr));
attr->qp_state = to_ib_qp_state(qhp->attr.state);
init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 57ed26b3cc21..5c95c789f302 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -126,7 +126,7 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
rdev->stats.qid.cur += rdev->qpmask + 1;
mutex_unlock(&rdev->stats.lock);
for (i = qid+1; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -137,13 +137,13 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
* now put the same ids on the qp list since they all
* map to the same db/gts page.
*/
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = qid;
list_add_tail(&entry->entry, &uctx->qpids);
for (i = qid+1; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -165,7 +165,7 @@ void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
{
struct c4iw_qid_list *entry;
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return;
pr_debug("qid 0x%x\n", qid);
@@ -200,7 +200,7 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
rdev->stats.qid.cur += rdev->qpmask + 1;
mutex_unlock(&rdev->stats.lock);
for (i = qid+1; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -211,13 +211,13 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
* now put the same ids on the cq list since they all
* map to the same db/gts page.
*/
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = qid;
list_add_tail(&entry->entry, &uctx->cqids);
for (i = qid; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -239,7 +239,7 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
{
struct c4iw_qid_list *entry;
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return;
pr_debug("qid 0x%x\n", qid);
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 9e3cc3239c13..119f8efec564 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -7,10 +7,8 @@
#define _EFA_H_
#include <linux/bitops.h>
-#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
-#include <linux/sched.h>
#include <rdma/efa-abi.h>
#include <rdma/ib_verbs.h>
@@ -136,10 +134,9 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
-int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
-struct ib_cq *efa_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index a5c788741a04..2cb42484b0f8 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -39,8 +39,6 @@
enum efa_cmd_status {
EFA_CMD_SUBMITTED,
EFA_CMD_COMPLETED,
- /* Abort - canceled by the driver */
- EFA_CMD_ABORTED,
};
struct efa_comp_ctx {
@@ -280,36 +278,34 @@ static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
struct efa_comp_ctx *comp_ctx)
{
- u16 comp_id = comp_ctx->user_cqe->acq_common_descriptor.command &
- EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+ u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command &
+ EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+ u16 ctx_id = cmd_id & (aq->depth - 1);
- ibdev_dbg(aq->efa_dev, "Putting completion command_id %d\n", comp_id);
+ ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
comp_ctx->occupied = 0;
- efa_com_dealloc_ctx_id(aq, comp_id);
+ efa_com_dealloc_ctx_id(aq, ctx_id);
}
static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
- u16 command_id, bool capture)
+ u16 cmd_id, bool capture)
{
- if (command_id >= aq->depth) {
- ibdev_err(aq->efa_dev,
- "command id is larger than the queue size. cmd_id: %u queue size %d\n",
- command_id, aq->depth);
- return NULL;
- }
+ u16 ctx_id = cmd_id & (aq->depth - 1);
- if (aq->comp_ctx[command_id].occupied && capture) {
- ibdev_err(aq->efa_dev, "Completion context is occupied\n");
+ if (aq->comp_ctx[ctx_id].occupied && capture) {
+ ibdev_err(aq->efa_dev,
+ "Completion context for command_id %#x is occupied\n",
+ cmd_id);
return NULL;
}
if (capture) {
- aq->comp_ctx[command_id].occupied = 1;
- ibdev_dbg(aq->efa_dev, "Taking completion ctxt command_id %d\n",
- command_id);
+ aq->comp_ctx[ctx_id].occupied = 1;
+ ibdev_dbg(aq->efa_dev,
+ "Take completion ctxt for command_id %#x\n", cmd_id);
}
- return &aq->comp_ctx[command_id];
+ return &aq->comp_ctx[ctx_id];
}
static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
@@ -320,6 +316,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
{
struct efa_comp_ctx *comp_ctx;
u16 queue_size_mask;
+ u16 cmd_id;
u16 ctx_id;
u16 pi;
@@ -328,13 +325,16 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
ctx_id = efa_com_alloc_ctx_id(aq);
+ /* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
+ cmd_id = ctx_id & queue_size_mask;
+ cmd_id |= aq->sq.pc & ~queue_size_mask;
+ cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
+
+ cmd->aq_common_descriptor.command_id = cmd_id;
cmd->aq_common_descriptor.flags |= aq->sq.phase &
EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
- cmd->aq_common_descriptor.command_id |= ctx_id &
- EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
-
- comp_ctx = efa_com_get_comp_ctx(aq, ctx_id, true);
+ comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
if (!comp_ctx) {
efa_com_dealloc_ctx_id(aq, ctx_id);
return ERR_PTR(-EINVAL);
@@ -532,16 +532,6 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c
msleep(aq->poll_interval);
}
- if (comp_ctx->status == EFA_CMD_ABORTED) {
- ibdev_err(aq->efa_dev, "Command was aborted\n");
- atomic64_inc(&aq->stats.aborted_cmd);
- err = -ENODEV;
- goto out;
- }
-
- WARN_ONCE(comp_ctx->status != EFA_CMD_COMPLETED,
- "Invalid completion status %d\n", comp_ctx->status);
-
err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
out:
efa_com_put_comp_ctx(aq, comp_ctx);
@@ -666,66 +656,6 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
}
/**
- * efa_com_abort_admin_commands - Abort all the outstanding admin commands.
- * @edev: EFA communication layer struct
- *
- * This method aborts all the outstanding admin commands.
- * The caller should then call efa_com_wait_for_abort_completion to make sure
- * all the commands were completed.
- */
-static void efa_com_abort_admin_commands(struct efa_com_dev *edev)
-{
- struct efa_com_admin_queue *aq = &edev->aq;
- struct efa_comp_ctx *comp_ctx;
- unsigned long flags;
- u16 i;
-
- spin_lock(&aq->sq.lock);
- spin_lock_irqsave(&aq->cq.lock, flags);
- for (i = 0; i < aq->depth; i++) {
- comp_ctx = efa_com_get_comp_ctx(aq, i, false);
- if (!comp_ctx)
- break;
-
- comp_ctx->status = EFA_CMD_ABORTED;
-
- complete(&comp_ctx->wait_event);
- }
- spin_unlock_irqrestore(&aq->cq.lock, flags);
- spin_unlock(&aq->sq.lock);
-}
-
-/**
- * efa_com_wait_for_abort_completion - Wait for admin commands abort.
- * @edev: EFA communication layer struct
- *
- * This method wait until all the outstanding admin commands will be completed.
- */
-static void efa_com_wait_for_abort_completion(struct efa_com_dev *edev)
-{
- struct efa_com_admin_queue *aq = &edev->aq;
- int i;
-
- /* all mine */
- for (i = 0; i < aq->depth; i++)
- down(&aq->avail_cmds);
-
- /* let it go */
- for (i = 0; i < aq->depth; i++)
- up(&aq->avail_cmds);
-}
-
-static void efa_com_admin_flush(struct efa_com_dev *edev)
-{
- struct efa_com_admin_queue *aq = &edev->aq;
-
- clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
-
- efa_com_abort_admin_commands(edev);
- efa_com_wait_for_abort_completion(edev);
-}
-
-/**
* efa_com_admin_destroy - Destroy the admin and the async events queues.
* @edev: EFA communication layer struct
*/
@@ -737,7 +667,7 @@ void efa_com_admin_destroy(struct efa_com_dev *edev)
struct efa_com_admin_sq *sq = &aq->sq;
u16 size;
- efa_com_admin_flush(edev);
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
devm_kfree(edev->dmadev, aq->comp_ctx_pool);
devm_kfree(edev->dmadev, aq->comp_ctx);
diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h
index 84d96724a74b..c67dd8109d1c 100644
--- a/drivers/infiniband/hw/efa/efa_com.h
+++ b/drivers/infiniband/hw/efa/efa_com.h
@@ -45,7 +45,6 @@ struct efa_com_admin_sq {
/* Don't use anything other than atomic64 */
struct efa_com_stats_admin {
- atomic64_t aborted_cmd;
atomic64_t submitted_cmd;
atomic64_t completed_cmd;
atomic64_t no_completion;
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index c0016648804c..62345d8abf3c 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -3,7 +3,6 @@
* Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
-#include "efa.h"
#include "efa_com.h"
#include "efa_com_cmd.h"
@@ -57,7 +56,7 @@ int efa_com_create_qp(struct efa_com_dev *edev,
res->send_sub_cq_idx = cmd_completion.send_sub_cq_idx;
res->recv_sub_cq_idx = cmd_completion.recv_sub_cq_idx;
- return err;
+ return 0;
}
int efa_com_modify_qp(struct efa_com_dev *edev,
@@ -181,7 +180,7 @@ int efa_com_create_cq(struct efa_com_dev *edev,
result->cq_idx = cmd_completion.cq_idx;
result->actual_depth = params->cq_depth;
- return err;
+ return 0;
}
int efa_com_destroy_cq(struct efa_com_dev *edev,
@@ -307,7 +306,8 @@ int efa_com_create_ah(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to create ah [%d]\n", err);
+ ibdev_err(edev->efa_dev, "Failed to create ah for %pI6 [%d]\n",
+ ah_cmd.dest_addr, err);
return err;
}
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index db974caf1eb1..dd1c6d49466f 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -100,7 +100,7 @@ static int efa_request_mgmnt_irq(struct efa_dev *dev)
nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector);
irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
- return err;
+ return 0;
}
static void efa_setup_mgmnt_irq(struct efa_dev *dev)
@@ -197,6 +197,10 @@ static void efa_stats_init(struct efa_dev *dev)
}
static const struct ib_device_ops efa_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_EFA,
+ .uverbs_abi_ver = EFA_UVERBS_ABI_VERSION,
+
.alloc_pd = efa_alloc_pd,
.alloc_ucontext = efa_alloc_ucontext,
.create_ah = efa_create_ah,
@@ -220,6 +224,7 @@ static const struct ib_device_ops efa_dev_ops = {
.reg_user_mr = efa_reg_mr,
INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext),
};
@@ -259,12 +264,10 @@ static int efa_ib_device_add(struct efa_dev *dev)
if (err)
goto err_release_doorbell_bar;
- dev->ibdev.owner = THIS_MODULE;
dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED;
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = 1;
dev->ibdev.dev.parent = &pdev->dev;
- dev->ibdev.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION;
dev->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -287,7 +290,6 @@ static int efa_ib_device_add(struct efa_dev *dev)
dev->ibdev.uverbs_ex_cmd_mask =
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
- dev->ibdev.driver_id = RDMA_DRIVER_EFA;
ib_set_device_ops(&dev->ibdev, &efa_dev_ops);
err = ib_register_device(&dev->ibdev, "efa_%d");
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index fb6115244d4c..df77bc312a25 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -447,12 +447,6 @@ void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct efa_dev *dev = to_edev(ibpd->device);
struct efa_pd *pd = to_epd(ibpd);
- if (udata->inlen &&
- !ib_is_udata_cleared(udata, 0, udata->inlen)) {
- ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
- return;
- }
-
ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
efa_pd_dealloc(dev, pd->pdn);
}
@@ -470,12 +464,6 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
struct efa_qp *qp = to_eqp(ibqp);
int err;
- if (udata->inlen &&
- !ib_is_udata_cleared(udata, 0, udata->inlen)) {
- ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
- return -EINVAL;
- }
-
ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
err = efa_destroy_qp_handle(dev, qp->qp_handle);
if (err)
@@ -870,31 +858,18 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
return efa_com_destroy_cq(&dev->edev, &params);
}
-int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibcq->device);
struct efa_cq *cq = to_ecq(ibcq);
- int err;
-
- if (udata->inlen &&
- !ib_is_udata_cleared(udata, 0, udata->inlen)) {
- ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
- return -EINVAL;
- }
ibdev_dbg(&dev->ibdev,
"Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
- err = efa_destroy_cq_idx(dev, cq->cq_idx);
- if (err)
- return err;
-
+ efa_destroy_cq_idx(dev, cq->cq_idx);
dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
DMA_FROM_DEVICE);
-
- kfree(cq);
- return 0;
}
static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
@@ -910,17 +885,20 @@ static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
return 0;
}
-static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
- int vector, struct ib_ucontext *ibucontext,
- struct ib_udata *udata)
+int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct efa_ucontext, ibucontext);
struct efa_ibv_create_cq_resp resp = {};
struct efa_com_create_cq_params params;
struct efa_com_create_cq_result result;
+ struct ib_device *ibdev = ibcq->device;
struct efa_dev *dev = to_edev(ibdev);
struct efa_ibv_create_cq cmd = {};
+ struct efa_cq *cq = to_ecq(ibcq);
bool cq_entry_inserted = false;
- struct efa_cq *cq;
+ int entries = attr->cqe;
int err;
ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
@@ -978,19 +956,13 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
goto err_out;
}
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq) {
- err = -ENOMEM;
- goto err_out;
- }
-
- cq->ucontext = to_eucontext(ibucontext);
+ cq->ucontext = ucontext;
cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
DMA_FROM_DEVICE);
if (!cq->cpu_addr) {
err = -ENOMEM;
- goto err_free_cq;
+ goto err_out;
}
params.uarn = cq->ucontext->uarn;
@@ -1009,8 +981,8 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
err = cq_mmap_entries_setup(dev, cq, &resp);
if (err) {
- ibdev_dbg(ibdev,
- "Could not setup cq[%u] mmap entries\n", cq->cq_idx);
+ ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
+ cq->cq_idx);
goto err_destroy_cq;
}
@@ -1026,11 +998,10 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
}
}
- ibdev_dbg(ibdev,
- "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
+ ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
- return &cq->ibcq;
+ return 0;
err_destroy_cq:
efa_destroy_cq_idx(dev, cq->cq_idx);
@@ -1039,23 +1010,9 @@ err_free_mapped:
DMA_FROM_DEVICE);
if (!cq_entry_inserted)
free_pages_exact(cq->cpu_addr, cq->size);
-err_free_cq:
- kfree(cq);
err_out:
atomic64_inc(&dev->stats.sw_stats.create_cq_err);
- return ERR_PTR(err);
-}
-
-struct ib_cq *efa_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
-{
- struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata,
- struct efa_ucontext,
- ibucontext);
-
- return do_create_cq(ibdev, attr->cqe, attr->comp_vector,
- &ucontext->ibucontext, udata);
+ return err;
}
static int umem_to_page_list(struct efa_dev *dev,
@@ -1065,21 +1022,15 @@ static int umem_to_page_list(struct efa_dev *dev,
u8 hp_shift)
{
u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
- struct sg_dma_page_iter sg_iter;
- unsigned int page_idx = 0;
+ struct ib_block_iter biter;
unsigned int hp_idx = 0;
ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
hp_cnt, pages_in_hp);
- for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- if (page_idx % pages_in_hp == 0) {
- page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter);
- hp_idx++;
- }
-
- page_idx++;
- }
+ rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
+ BIT(hp_shift))
+ page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
return 0;
}
@@ -1114,14 +1065,14 @@ err:
*/
static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
{
- unsigned int entry, payloads_in_sg, chunk_list_size, chunk_idx, payload_idx;
struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
+ unsigned int chunk_list_size, chunk_idx, payload_idx;
int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
struct efa_com_ctrl_buff_info *ctrl_buf;
u64 *cur_chunk_buf, *prev_chunk_buf;
- struct scatterlist *sg;
+ struct ib_block_iter biter;
dma_addr_t dma_addr;
int i;
@@ -1155,18 +1106,15 @@ static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
chunk_idx = 0;
payload_idx = 0;
cur_chunk_buf = chunk_list->chunks[0].buf;
- for_each_sg(pages_sgl, sg, sg_dma_cnt, entry) {
- payloads_in_sg = sg_dma_len(sg) >> EFA_CHUNK_PAYLOAD_SHIFT;
- for (i = 0; i < payloads_in_sg; i++) {
- cur_chunk_buf[payload_idx++] =
- (sg_dma_address(sg) & ~(EFA_CHUNK_PAYLOAD_SIZE - 1)) +
- (EFA_CHUNK_PAYLOAD_SIZE * i);
-
- if (payload_idx == EFA_PTRS_PER_CHUNK) {
- chunk_idx++;
- cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
- payload_idx = 0;
- }
+ rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
+ EFA_CHUNK_PAYLOAD_SIZE) {
+ cur_chunk_buf[payload_idx++] =
+ rdma_block_iter_dma_address(&biter);
+
+ if (payload_idx == EFA_PTRS_PER_CHUNK) {
+ chunk_idx++;
+ cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
+ payload_idx = 0;
}
}
@@ -1314,30 +1262,30 @@ static int pbl_create(struct efa_dev *dev,
int err;
pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
- pbl->pbl_buf = kzalloc(pbl->pbl_buf_size_in_bytes,
- GFP_KERNEL | __GFP_NOWARN);
- if (pbl->pbl_buf) {
- pbl->physically_continuous = 1;
+ pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
+ if (!pbl->pbl_buf)
+ return -ENOMEM;
+
+ if (is_vmalloc_addr(pbl->pbl_buf)) {
+ pbl->physically_continuous = 0;
err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
hp_shift);
if (err)
- goto err_continuous;
- err = pbl_continuous_initialize(dev, pbl);
+ goto err_free;
+
+ err = pbl_indirect_initialize(dev, pbl);
if (err)
- goto err_continuous;
+ goto err_free;
} else {
- pbl->physically_continuous = 0;
- pbl->pbl_buf = vzalloc(pbl->pbl_buf_size_in_bytes);
- if (!pbl->pbl_buf)
- return -ENOMEM;
-
+ pbl->physically_continuous = 1;
err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
hp_shift);
if (err)
- goto err_indirect;
- err = pbl_indirect_initialize(dev, pbl);
+ goto err_free;
+
+ err = pbl_continuous_initialize(dev, pbl);
if (err)
- goto err_indirect;
+ goto err_free;
}
ibdev_dbg(&dev->ibdev,
@@ -1346,24 +1294,20 @@ static int pbl_create(struct efa_dev *dev,
return 0;
-err_continuous:
- kfree(pbl->pbl_buf);
- return err;
-err_indirect:
- vfree(pbl->pbl_buf);
+err_free:
+ kvfree(pbl->pbl_buf);
return err;
}
static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
{
- if (pbl->physically_continuous) {
+ if (pbl->physically_continuous)
dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
- kfree(pbl->pbl_buf);
- } else {
+ else
pbl_indirect_terminate(dev, pbl);
- vfree(pbl->pbl_buf);
- }
+
+ kvfree(pbl->pbl_buf);
}
static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
@@ -1417,56 +1361,6 @@ static int efa_create_pbl(struct efa_dev *dev,
return 0;
}
-static void efa_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, u8 *shift, u32 *ncont)
-{
- struct scatterlist *sg;
- u64 base = ~0, p = 0;
- unsigned long tmp;
- unsigned long m;
- u64 len, pfn;
- int i = 0;
- int entry;
-
- addr = addr >> PAGE_SHIFT;
- tmp = (unsigned long)addr;
- m = find_first_bit(&tmp, BITS_PER_LONG);
- if (max_page_shift)
- m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE);
- pfn = sg_dma_address(sg) >> PAGE_SHIFT;
- if (base + p != pfn) {
- /*
- * If either the offset or the new
- * base are unaligned update m
- */
- tmp = (unsigned long)(pfn | p);
- if (!IS_ALIGNED(tmp, 1 << m))
- m = find_first_bit(&tmp, BITS_PER_LONG);
-
- base = pfn;
- p = 0;
- }
-
- p += len;
- i += len;
- }
-
- if (i) {
- m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
- *ncont = DIV_ROUND_UP(i, (1 << m));
- } else {
- m = 0;
- *ncont = 0;
- }
-
- *shift = PAGE_SHIFT + m;
- *count = i;
-}
-
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
@@ -1474,11 +1368,10 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
struct efa_dev *dev = to_edev(ibpd->device);
struct efa_com_reg_mr_params params = {};
struct efa_com_reg_mr_result result = {};
- unsigned long max_page_shift;
struct pbl_context pbl;
+ unsigned int pg_sz;
struct efa_mr *mr;
int inline_size;
- int npages;
int err;
if (udata->inlen &&
@@ -1515,13 +1408,24 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
params.iova = virt_addr;
params.mr_length_in_bytes = length;
params.permissions = access_flags & 0x1;
- max_page_shift = fls64(dev->dev_attr.page_size_cap);
- efa_cont_pages(mr->umem, start, max_page_shift, &npages,
- &params.page_shift, &params.page_num);
+ pg_sz = ib_umem_find_best_pgsz(mr->umem,
+ dev->dev_attr.page_size_cap,
+ virt_addr);
+ if (!pg_sz) {
+ err = -EOPNOTSUPP;
+ ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
+ dev->dev_attr.page_size_cap);
+ goto err_unmap;
+ }
+
+ params.page_shift = __ffs(pg_sz);
+ params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)),
+ pg_sz);
+
ibdev_dbg(&dev->ibdev,
- "start %#llx length %#llx npages %d params.page_shift %u params.page_num %u\n",
- start, length, npages, params.page_shift, params.page_num);
+ "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
+ start, length, params.page_shift, params.page_num);
inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
if (params.page_num <= inline_size) {
@@ -1567,12 +1471,6 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
struct efa_mr *mr = to_emr(ibmr);
int err;
- if (udata->inlen &&
- !ib_is_udata_cleared(udata, 0, udata->inlen)) {
- ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
- return -EINVAL;
- }
-
ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
if (mr->umem) {
@@ -1580,8 +1478,8 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
err = efa_com_dereg_mr(&dev->edev, &params);
if (err)
return err;
- ib_umem_release(mr->umem);
}
+ ib_umem_release(mr->umem);
kfree(mr);
@@ -1707,13 +1605,15 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
err = -EINVAL;
}
- if (err)
+ if (err) {
ibdev_dbg(
&dev->ibdev,
"Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
entry->address, length, entry->mmap_flag, err);
+ return err;
+ }
- return err;
+ return 0;
}
int efa_mmap(struct ib_ucontext *ibucontext,
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 4044a8c8dbf4..0405d26d0833 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := \
affinity.o \
+ aspm.o \
chip.o \
device.o \
driver.o \
diff --git a/drivers/infiniband/hw/hfi1/aspm.c b/drivers/infiniband/hw/hfi1/aspm.c
new file mode 100644
index 000000000000..a3c53be4072c
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/aspm.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2019 Intel Corporation.
+ *
+ */
+
+#include "aspm.h"
+
+/* Time after which the timer interrupt will re-enable ASPM */
+#define ASPM_TIMER_MS 1000
+/* Time for which interrupts are ignored after a timer has been scheduled */
+#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
+/* Two interrupts within this time trigger ASPM disable */
+#define ASPM_TRIGGER_MS 1
+#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
+#define ASPM_L1_SUPPORTED(reg) \
+ ((((reg) & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
+
+uint aspm_mode = ASPM_MODE_DISABLED;
+module_param_named(aspm, aspm_mode, uint, 0444);
+MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
+
+static bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+ u32 up, dn;
+
+ /*
+ * If the driver does not have access to the upstream component,
+ * it cannot support ASPM L1 at all.
+ */
+ if (!parent)
+ return false;
+
+ pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
+ dn = ASPM_L1_SUPPORTED(dn);
+
+ pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
+ up = ASPM_L1_SUPPORTED(up);
+
+ /* ASPM works on A-step but is reported as not supported */
+ return (!!dn || is_ax(dd)) && !!up;
+}
+
+/* Set L1 entrance latency for slower entry to L1 */
+static void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
+{
+ u32 l1_ent_lat = 0x4u;
+ u32 reg32;
+
+ pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
+ reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
+ reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
+ pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
+}
+
+static void aspm_hw_enable_l1(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+
+ /*
+ * If the driver does not have access to the upstream component,
+ * it cannot support ASPM L1 at all.
+ */
+ if (!parent)
+ return;
+
+ /* Enable ASPM L1 first in upstream component and then downstream */
+ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC,
+ PCI_EXP_LNKCTL_ASPM_L1);
+ pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC,
+ PCI_EXP_LNKCTL_ASPM_L1);
+}
+
+void aspm_hw_disable_l1(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+
+ /* Disable ASPM L1 first in downstream component and then upstream */
+ pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC, 0x0);
+ if (parent)
+ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC, 0x0);
+}
+
+static void aspm_enable(struct hfi1_devdata *dd)
+{
+ if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
+ !dd->aspm_supported)
+ return;
+
+ aspm_hw_enable_l1(dd);
+ dd->aspm_enabled = true;
+}
+
+static void aspm_disable(struct hfi1_devdata *dd)
+{
+ if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
+ return;
+
+ aspm_hw_disable_l1(dd);
+ dd->aspm_enabled = false;
+}
+
+static void aspm_disable_inc(struct hfi1_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->aspm_lock, flags);
+ aspm_disable(dd);
+ atomic_inc(&dd->aspm_disabled_cnt);
+ spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+static void aspm_enable_dec(struct hfi1_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->aspm_lock, flags);
+ if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
+ aspm_enable(dd);
+ spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+/* ASPM processing for each receive context interrupt */
+void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
+{
+ bool restart_timer;
+ bool close_interrupts;
+ unsigned long flags;
+ ktime_t now, prev;
+
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ /* PSM contexts are open */
+ if (!rcd->aspm_intr_enable)
+ goto unlock;
+
+ prev = rcd->aspm_ts_last_intr;
+ now = ktime_get();
+ rcd->aspm_ts_last_intr = now;
+
+ /* An interrupt pair close together in time */
+ close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
+
+ /* Don't push out our timer till this much time has elapsed */
+ restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
+ ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
+ restart_timer = restart_timer && close_interrupts;
+
+ /* Disable ASPM and schedule timer */
+ if (rcd->aspm_enabled && close_interrupts) {
+ aspm_disable_inc(rcd->dd);
+ rcd->aspm_enabled = false;
+ restart_timer = true;
+ }
+
+ if (restart_timer) {
+ mod_timer(&rcd->aspm_timer,
+ jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
+ rcd->aspm_ts_timer_sched = now;
+ }
+unlock:
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Timer function for re-enabling ASPM in the absence of interrupt activity */
+static void aspm_ctx_timer_function(struct timer_list *t)
+{
+ struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ aspm_enable_dec(rcd->dd);
+ rcd->aspm_enabled = true;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
+void aspm_disable_all(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ unsigned long flags;
+ u16 i;
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd) {
+ del_timer_sync(&rcd->aspm_timer);
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ rcd->aspm_intr_enable = false;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+ hfi1_rcd_put(rcd);
+ }
+ }
+
+ aspm_disable(dd);
+ atomic_set(&dd->aspm_disabled_cnt, 0);
+}
+
+/* Re-enable interrupt processing for verbs contexts */
+void aspm_enable_all(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ unsigned long flags;
+ u16 i;
+
+ aspm_enable(dd);
+
+ if (aspm_mode != ASPM_MODE_DYNAMIC)
+ return;
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd) {
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ rcd->aspm_intr_enable = true;
+ rcd->aspm_enabled = true;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+ hfi1_rcd_put(rcd);
+ }
+ }
+}
+
+static void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
+{
+ spin_lock_init(&rcd->aspm_lock);
+ timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0);
+ rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
+ aspm_mode == ASPM_MODE_DYNAMIC &&
+ rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
+}
+
+void aspm_init(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ u16 i;
+
+ spin_lock_init(&dd->aspm_lock);
+ dd->aspm_supported = aspm_hw_l1_supported(dd);
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd)
+ aspm_ctx_init(rcd);
+ hfi1_rcd_put(rcd);
+ }
+
+ /* Start with ASPM disabled */
+ aspm_hw_set_l1_ent_latency(dd);
+ dd->aspm_enabled = false;
+ aspm_hw_disable_l1(dd);
+
+ /* Now turn on ASPM if configured */
+ aspm_enable_all(dd);
+}
+
+void aspm_exit(struct hfi1_devdata *dd)
+{
+ aspm_disable_all(dd);
+
+ /* Turn on ASPM on exit to conserve power */
+ aspm_enable(dd);
+}
+
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index e8133870ee87..75d5d18da3da 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -57,266 +57,20 @@ enum aspm_mode {
ASPM_MODE_DYNAMIC = 2, /* ASPM enabled/disabled dynamically */
};
-/* Time after which the timer interrupt will re-enable ASPM */
-#define ASPM_TIMER_MS 1000
-/* Time for which interrupts are ignored after a timer has been scheduled */
-#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
-/* Two interrupts within this time trigger ASPM disable */
-#define ASPM_TRIGGER_MS 1
-#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
-#define ASPM_L1_SUPPORTED(reg) \
- (((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
+void aspm_init(struct hfi1_devdata *dd);
+void aspm_exit(struct hfi1_devdata *dd);
+void aspm_hw_disable_l1(struct hfi1_devdata *dd);
+void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd);
+void aspm_disable_all(struct hfi1_devdata *dd);
+void aspm_enable_all(struct hfi1_devdata *dd);
-static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
-{
- struct pci_dev *parent = dd->pcidev->bus->self;
- u32 up, dn;
-
- /*
- * If the driver does not have access to the upstream component,
- * it cannot support ASPM L1 at all.
- */
- if (!parent)
- return false;
-
- pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
- dn = ASPM_L1_SUPPORTED(dn);
-
- pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
- up = ASPM_L1_SUPPORTED(up);
-
- /* ASPM works on A-step but is reported as not supported */
- return (!!dn || is_ax(dd)) && !!up;
-}
-
-/* Set L1 entrance latency for slower entry to L1 */
-static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
-{
- u32 l1_ent_lat = 0x4u;
- u32 reg32;
-
- pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
- reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
- reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
- pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
-}
-
-static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd)
-{
- struct pci_dev *parent = dd->pcidev->bus->self;
-
- /*
- * If the driver does not have access to the upstream component,
- * it cannot support ASPM L1 at all.
- */
- if (!parent)
- return;
-
- /* Enable ASPM L1 first in upstream component and then downstream */
- pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC,
- PCI_EXP_LNKCTL_ASPM_L1);
- pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC,
- PCI_EXP_LNKCTL_ASPM_L1);
-}
-
-static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd)
-{
- struct pci_dev *parent = dd->pcidev->bus->self;
-
- /* Disable ASPM L1 first in downstream component and then upstream */
- pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC, 0x0);
- if (parent)
- pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC, 0x0);
-}
-
-static inline void aspm_enable(struct hfi1_devdata *dd)
-{
- if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
- !dd->aspm_supported)
- return;
-
- aspm_hw_enable_l1(dd);
- dd->aspm_enabled = true;
-}
-
-static inline void aspm_disable(struct hfi1_devdata *dd)
-{
- if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
- return;
-
- aspm_hw_disable_l1(dd);
- dd->aspm_enabled = false;
-}
-
-static inline void aspm_disable_inc(struct hfi1_devdata *dd)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&dd->aspm_lock, flags);
- aspm_disable(dd);
- atomic_inc(&dd->aspm_disabled_cnt);
- spin_unlock_irqrestore(&dd->aspm_lock, flags);
-}
-
-static inline void aspm_enable_dec(struct hfi1_devdata *dd)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&dd->aspm_lock, flags);
- if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
- aspm_enable(dd);
- spin_unlock_irqrestore(&dd->aspm_lock, flags);
-}
-
-/* ASPM processing for each receive context interrupt */
static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
{
- bool restart_timer;
- bool close_interrupts;
- unsigned long flags;
- ktime_t now, prev;
-
/* Quickest exit for minimum impact */
- if (!rcd->aspm_intr_supported)
- return;
-
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- /* PSM contexts are open */
- if (!rcd->aspm_intr_enable)
- goto unlock;
-
- prev = rcd->aspm_ts_last_intr;
- now = ktime_get();
- rcd->aspm_ts_last_intr = now;
-
- /* An interrupt pair close together in time */
- close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
-
- /* Don't push out our timer till this much time has elapsed */
- restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
- ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
- restart_timer = restart_timer && close_interrupts;
-
- /* Disable ASPM and schedule timer */
- if (rcd->aspm_enabled && close_interrupts) {
- aspm_disable_inc(rcd->dd);
- rcd->aspm_enabled = false;
- restart_timer = true;
- }
-
- if (restart_timer) {
- mod_timer(&rcd->aspm_timer,
- jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
- rcd->aspm_ts_timer_sched = now;
- }
-unlock:
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
-}
-
-/* Timer function for re-enabling ASPM in the absence of interrupt activity */
-static inline void aspm_ctx_timer_function(struct timer_list *t)
-{
- struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer);
- unsigned long flags;
-
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- aspm_enable_dec(rcd->dd);
- rcd->aspm_enabled = true;
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
-}
-
-/*
- * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
- * are open.
- */
-static inline void aspm_disable_all(struct hfi1_devdata *dd)
-{
- struct hfi1_ctxtdata *rcd;
- unsigned long flags;
- u16 i;
-
- for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
- rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd) {
- del_timer_sync(&rcd->aspm_timer);
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- rcd->aspm_intr_enable = false;
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
- hfi1_rcd_put(rcd);
- }
- }
-
- aspm_disable(dd);
- atomic_set(&dd->aspm_disabled_cnt, 0);
-}
-
-/* Re-enable interrupt processing for verbs contexts */
-static inline void aspm_enable_all(struct hfi1_devdata *dd)
-{
- struct hfi1_ctxtdata *rcd;
- unsigned long flags;
- u16 i;
-
- aspm_enable(dd);
-
- if (aspm_mode != ASPM_MODE_DYNAMIC)
+ if (likely(!rcd->aspm_intr_supported))
return;
- for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
- rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd) {
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- rcd->aspm_intr_enable = true;
- rcd->aspm_enabled = true;
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
- hfi1_rcd_put(rcd);
- }
- }
-}
-
-static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
-{
- spin_lock_init(&rcd->aspm_lock);
- timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0);
- rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
- aspm_mode == ASPM_MODE_DYNAMIC &&
- rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
-}
-
-static inline void aspm_init(struct hfi1_devdata *dd)
-{
- struct hfi1_ctxtdata *rcd;
- u16 i;
-
- spin_lock_init(&dd->aspm_lock);
- dd->aspm_supported = aspm_hw_l1_supported(dd);
-
- for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
- rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd)
- aspm_ctx_init(rcd);
- hfi1_rcd_put(rcd);
- }
-
- /* Start with ASPM disabled */
- aspm_hw_set_l1_ent_latency(dd);
- dd->aspm_enabled = false;
- aspm_hw_disable_l1(dd);
-
- /* Now turn on ASPM if configured */
- aspm_enable_all(dd);
-}
-
-static inline void aspm_exit(struct hfi1_devdata *dd)
-{
- aspm_disable_all(dd);
-
- /* Turn on ASPM on exit to conserve power */
- aspm_enable(dd);
+ __aspm_ctx_disable(rcd);
}
#endif /* _ASPM_H */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 15efb4a380b2..d268bf9c42ee 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -987,9 +987,6 @@ static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target)
struct hfi1_pportdata *ppd;
int ret;
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
-
ppd = private2ppd(fp);
ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
@@ -1155,6 +1152,7 @@ static int exprom_wp_debugfs_release(struct inode *in, struct file *fp)
{ \
.name = nm, \
.ops = { \
+ .owner = THIS_MODULE, \
.read = readroutine, \
.write = writeroutine, \
.llseek = generic_file_llseek, \
@@ -1165,6 +1163,7 @@ static int exprom_wp_debugfs_release(struct inode *in, struct file *fp)
{ \
.name = nm, \
.ops = { \
+ .owner = THIS_MODULE, \
.read = readf, \
.write = writef, \
.llseek = generic_file_llseek, \
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 4228393e6c4c..184dba3c2828 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -2744,8 +2744,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
u16 link_width;
u16 link_speed;
- response_data_size = sizeof(struct opa_port_status_rsp) +
- num_vls * sizeof(struct _vls_pctrs);
+ response_data_size = struct_size(rsp, vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
return reply((struct ib_mad_hdr *)pmp);
@@ -3014,8 +3013,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
}
/* Sanity check */
- response_data_size = sizeof(struct opa_port_data_counters_msg) +
- num_vls * sizeof(struct _vls_dctrs);
+ response_data_size = struct_size(req, port[0].vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3232,8 +3230,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- response_data_size = sizeof(struct opa_port_error_counters64_msg) +
- num_vls * sizeof(struct _vls_ectrs);
+ response_data_size = struct_size(req, port[0].vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index c96d193bb236..61aa5504d7c3 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -450,10 +450,6 @@ static int hfi1_pcie_caps;
module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
-uint aspm_mode = ASPM_MODE_DISABLED;
-module_param_named(aspm, aspm_mode, uint, 0444);
-MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
-
/**
* tune_pcie_caps() - Code to adjust PCIe capabilities.
* @dd: Valid device data structure
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 4e5c2d1b8cfa..79126b2b14ab 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1594,9 +1594,8 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
else
sc_del_credit_return_intr(sc);
trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl);
- if (needint) {
+ if (needint)
sc_return_credits(sc);
- }
}
/**
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 4e0e9fc0a777..f8e733aa3bb8 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -348,7 +348,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
break;
case IB_QPT_GSI:
case IB_QPT_UD:
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ ah = rvt_get_swqe_ah(wqe);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
@@ -702,8 +702,8 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
sde ? sde->this_idx : 0,
send_context,
send_context ? send_context->sw_index : 0,
- ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
- ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
+ ib_cq_head(qp->ibqp.send_cq),
+ ib_cq_tail(qp->ibqp.send_cq),
qp->pid,
qp->s_state,
qp->s_ack_state,
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 7c8cfb149da0..0477c14633ab 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1830,23 +1830,14 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
}
while (qp->s_last != qp->s_acked) {
- u32 s_last;
-
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
trdma_clean_swqe(qp, wqe);
rvt_qp_wqe_unreserve(qp, wqe);
- s_last = qp->s_last;
- trace_hfi1_qp_send_completion(qp, wqe, s_last);
- if (++s_last >= qp->s_size)
- s_last = 0;
- qp->s_last = s_last;
- /* see post_send() */
- barrier();
- rvt_put_qp_swqe(qp, wqe);
- rvt_qp_swqe_complete(qp,
+ trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
+ rvt_qp_complete_swqe(qp,
wqe,
ib_hfi1_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
@@ -1890,19 +1881,10 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
trace_hfi1_rc_completion(qp, wqe->lpsn);
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
- u32 s_last;
-
trdma_clean_swqe(qp, wqe);
- rvt_put_qp_swqe(qp, wqe);
rvt_qp_wqe_unreserve(qp, wqe);
- s_last = qp->s_last;
- trace_hfi1_qp_send_completion(qp, wqe, s_last);
- if (++s_last >= qp->s_size)
- s_last = 0;
- qp->s_last = s_last;
- /* see post_send() */
- barrier();
- rvt_qp_swqe_complete(qp,
+ trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
+ rvt_qp_complete_swqe(qp,
wqe,
ib_hfi1_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
@@ -3026,8 +3008,7 @@ send_last:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- ib_bth_is_solicited(ohdr));
+ rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_ONLY):
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index aa9c8d3ef87b..92acccaaaa86 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -475,7 +475,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
* Must hold the qp s_lock and the exp_lock.
*
* Return:
- * false if either of the conditions below are statisfied:
+ * false if either of the conditions below are satisfied:
* 1. The list is empty or
* 2. The indicated qp is at the head of the list and the
* HFI1_S_WAIT_TID_SPACE bit is set in qp->s_flags.
@@ -2024,7 +2024,6 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
if (e->opcode == TID_OP(READ_REQ)) {
struct ib_reth *reth;
- u32 offset;
u32 len;
u32 rkey;
u64 vaddr;
@@ -2036,7 +2035,6 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
* The requester always restarts from the start of the original
* request.
*/
- offset = delta_psn(psn, e->psn) * qp->pmtu;
len = be32_to_cpu(reth->length);
if (psn != e->psn || len != req->total_len)
goto unlock;
@@ -4550,7 +4548,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
struct rvt_swqe *wqe;
struct tid_rdma_request *req;
struct tid_rdma_flow *flow;
- u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn;
+ u32 aeth, psn, req_psn, ack_psn, resync_psn, ack_kpsn;
unsigned long flags;
u16 fidx;
@@ -4754,7 +4752,6 @@ done:
IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
flow = &req->flows[req->acked_tail];
- fspsn = full_flow_psn(flow, flow->flow_state.spsn);
trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
flow);
req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index d1372cc66de6..2f84290a88ca 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -79,6 +79,8 @@ __print_symbolic(opcode, \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \
+ ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \
+ ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \
ib_opcode_name(TID_RDMA_WRITE_REQ), \
ib_opcode_name(TID_RDMA_WRITE_RESP), \
ib_opcode_name(TID_RDMA_WRITE_DATA), \
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 4ed4fcfabd6c..0c77f18120ed 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -476,8 +476,7 @@ last_imm:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- ib_bth_is_solicited(ohdr));
+ rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_FIRST):
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 4cb0fce5c096..e804af71b629 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -87,7 +87,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
rcu_read_lock();
qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
- swqe->ud_wr.remote_qpn);
+ rvt_get_swqe_remote_qpn(swqe));
if (!qp) {
ibp->rvp.n_pkt_drops++;
rcu_read_unlock();
@@ -105,7 +105,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto drop;
}
- ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(swqe);
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
@@ -135,8 +135,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_num) {
u32 qkey;
- qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
- sqp->qkey : swqe->ud_wr.remote_qkey;
+ qkey = (int)rvt_get_swqe_remote_qkey(swqe) < 0 ?
+ sqp->qkey : rvt_get_swqe_remote_qkey(swqe);
if (unlikely(qkey != qp->qkey))
goto drop; /* silently drop per IBTA spec */
}
@@ -240,7 +240,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) {
if (sqp->ibqp.qp_type == IB_QPT_GSI ||
sqp->ibqp.qp_type == IB_QPT_SMI)
- wc.pkey_index = swqe->ud_wr.pkey_index;
+ wc.pkey_index = rvt_get_swqe_pkey_index(swqe);
else
wc.pkey_index = sqp->s_pkey_index;
} else {
@@ -255,8 +255,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- swqe->wr.send_flags & IB_SEND_SOLICITED);
+ rvt_recv_cq(qp, &wc, swqe->wr.send_flags & IB_SEND_SOLICITED);
ibp->rvp.n_loop_pkts++;
bail_unlock:
spin_unlock_irqrestore(&qp->r_lock, flags);
@@ -283,20 +282,21 @@ static void hfi1_make_bth_deth(struct rvt_qp *qp, struct rvt_swqe *wqe,
bth0 |= IB_BTH_SOLICITED;
bth0 |= extra_bytes << 20;
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
- *pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
+ *pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe));
else
*pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
if (!bypass)
bth0 |= *pkey;
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
+ ohdr->bth[1] = cpu_to_be32(rvt_get_swqe_remote_qpn(wqe));
ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
- qp->qkey : wqe->ud_wr.remote_qkey);
+ ohdr->u.ud.deth[0] =
+ cpu_to_be32((int)rvt_get_swqe_remote_qkey(wqe) < 0 ? qp->qkey :
+ rvt_get_swqe_remote_qkey(wqe));
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
}
@@ -316,7 +316,7 @@ void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
extra_bytes = -wqe->length & 3;
nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC;
@@ -380,7 +380,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
u32 dlid, slid, nwords, extra_bytes;
- u32 dest_qp = wqe->ud_wr.remote_qpn;
+ u32 dest_qp = rvt_get_swqe_remote_qpn(wqe);
u32 src_qp = qp->ibqp.qp_num;
u16 len, pkey;
u8 l4, sc5;
@@ -388,7 +388,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
/*
* Build 16B Management Packet if either the destination
@@ -450,7 +450,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (is_mgmt) {
l4 = OPA_16B_L4_FM;
- pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
+ pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe));
hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt,
dest_qp, src_qp);
} else {
@@ -515,7 +515,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
priv->hdr_type = hfi1_get_hdr_type(ppd->lid, ah_attr);
if ((!hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) ||
(rdma_ah_get_dlid(ah_attr) == be32_to_cpu(OPA_LID_PERMISSIVE))) {
@@ -1061,7 +1061,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited);
+ rvt_recv_cq(qp, &wc, solicited);
return;
drop:
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 02eee8eff1db..b89a9b9aef7a 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -118,13 +118,10 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty)
{
- size_t i;
-
- for (i = 0; i < npages; i++) {
- if (dirty)
- set_page_dirty_lock(p[i]);
- put_page(p[i]);
- }
+ if (dirty)
+ put_user_pages_dirty_lock(p, npages);
+ else
+ put_user_pages(p, npages);
if (mm) { /* during close after signal, mm can be NULL */
atomic64_sub(npages, &mm->pinned_vm);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index bad3229bad37..c4b243f50c76 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1779,6 +1779,9 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
}
static const struct ib_device_ops hfi1_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_HFI1,
+
.alloc_hw_stats = alloc_hw_stats,
.alloc_rdma_netdev = hfi1_vnic_alloc_rn,
.get_dev_fw_str = hfi1_get_dev_fw_str,
@@ -1829,7 +1832,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
*/
if (!ib_hfi1_sys_image_guid)
ib_hfi1_sys_image_guid = ibdev->node_guid;
- ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
@@ -1923,7 +1925,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
&ib_hfi1_attr_group);
- ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
+ ret = rvt_register_device(&dd->verbs_dev.rdi);
if (ret)
goto err_verbs_txreq;
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index 61cda7d00627..8bf847bcd8d3 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -8,25 +8,24 @@ config INFINIBAND_HNS
is used in Hisilicon Hip06 and more further ICT SoC based on
platform device.
- To compile this driver as a module, choose M here: the module
- will be called hns-roce.
+ To compile HIP06 or HIP08 driver as module, choose M here.
config INFINIBAND_HNS_HIP06
- tristate "Hisilicon Hip06 Family RoCE support"
+ bool "Hisilicon Hip06 Family RoCE support"
depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET
---help---
RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and
Hip07 SoC. These RoCE engines are platform devices.
- To compile this driver as a module, choose M here: the module
- will be called hns-roce-hw-v1.
+ To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
+ module will be called hns-roce-hw-v1
config INFINIBAND_HNS_HIP08
- tristate "Hisilicon Hip08 Family RoCE support"
+ bool "Hisilicon Hip08 Family RoCE support"
depends on INFINIBAND_HNS && PCI && HNS3
---help---
RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
The RoCE engine is a PCI device.
- To compile this driver as a module, choose M here: the module
- will be called hns-roce-hw-v2.
+ To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
+ module will be called hns-roce-hw-v2.
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index f22d9922cfee..e105945b94a1 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -5,11 +5,16 @@
ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
-obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o
-obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
-hns-roce-hw-v1-objs := hns_roce_hw_v1.o
-obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
-hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o
+
+ifdef CONFIG_INFINIBAND_HNS_HIP06
+hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs)
+obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o
+endif
+
+ifdef CONFIG_INFINIBAND_HNS_HIP08
+hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs)
+obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o
+endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index dac058d3df53..8c063c598d2a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -34,6 +34,7 @@
#include <linux/platform_device.h>
#include <linux/vmalloc.h>
#include "hns_roce_device.h"
+#include <rdma/ib_umem.h>
int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj)
{
@@ -67,7 +68,6 @@ void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
{
hns_roce_bitmap_free_range(bitmap, obj, 1, rr);
}
-EXPORT_SYMBOL_GPL(hns_roce_bitmap_free);
int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
int align, unsigned long *obj)
@@ -174,7 +174,6 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
kfree(buf->page_list);
}
}
-EXPORT_SYMBOL_GPL(hns_roce_buf_free);
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
struct hns_roce_buf *buf, u32 page_shift)
@@ -238,6 +237,104 @@ err_free:
return -ENOMEM;
}
+int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, int start, struct hns_roce_buf *buf)
+{
+ int i, end;
+ int total;
+
+ end = start + buf_cnt;
+ if (end > buf->npages) {
+ dev_err(hr_dev->dev,
+ "invalid kmem region,offset %d,buf_cnt %d,total %d!\n",
+ start, buf_cnt, buf->npages);
+ return -EINVAL;
+ }
+
+ total = 0;
+ for (i = start; i < end; i++)
+ if (buf->nbufs == 1)
+ bufs[total++] = buf->direct.map +
+ ((dma_addr_t)i << buf->page_shift);
+ else
+ bufs[total++] = buf->page_list[i].map;
+
+ return total;
+}
+
+int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, int start, struct ib_umem *umem,
+ int page_shift)
+{
+ struct ib_block_iter biter;
+ int total = 0;
+ int idx = 0;
+ u64 addr;
+
+ if (page_shift < PAGE_SHIFT) {
+ dev_err(hr_dev->dev, "invalid page shift %d!\n", page_shift);
+ return -EINVAL;
+ }
+
+ /* convert system page cnt to hw page cnt */
+ rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
+ 1 << page_shift) {
+ addr = rdma_block_iter_dma_address(&biter);
+ if (idx >= start) {
+ bufs[total++] = addr;
+ if (total >= buf_cnt)
+ goto done;
+ }
+ idx++;
+ }
+
+done:
+ return total;
+}
+
+void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum,
+ int offset, int buf_cnt)
+{
+ if (hopnum == HNS_ROCE_HOP_NUM_0)
+ region->hopnum = 0;
+ else
+ region->hopnum = hopnum;
+
+ region->offset = offset;
+ region->count = buf_cnt;
+}
+
+void hns_roce_free_buf_list(dma_addr_t **bufs, int region_cnt)
+{
+ int i;
+
+ for (i = 0; i < region_cnt; i++) {
+ kfree(bufs[i]);
+ bufs[i] = NULL;
+ }
+}
+
+int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions,
+ dma_addr_t **bufs, int region_cnt)
+{
+ struct hns_roce_buf_region *r;
+ int i;
+
+ for (i = 0; i < region_cnt; i++) {
+ r = &regions[i];
+ bufs[i] = kcalloc(r->count, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!bufs[i])
+ goto err_alloc;
+ }
+
+ return 0;
+
+err_alloc:
+ hns_roce_free_buf_list(bufs, i);
+
+ return -ENOMEM;
+}
+
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
{
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 2acf946d02e5..0cd09bf4d7ea 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -103,7 +103,6 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
context->out_param = out_param;
complete(&context->done);
}
-EXPORT_SYMBOL_GPL(hns_roce_cmd_event);
/* this should be called with "use_events" */
static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
@@ -162,7 +161,7 @@ static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, unsigned long in_modifier,
u8 op_modifier, u16 op, unsigned long timeout)
{
- int ret = 0;
+ int ret;
down(&hr_dev->cmd.event_sem);
ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
@@ -204,7 +203,6 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_cmd_mbox);
int hns_roce_cmd_init(struct hns_roce_dev *hr_dev)
{
@@ -291,7 +289,6 @@ struct hns_roce_cmd_mailbox
return mailbox;
}
-EXPORT_SYMBOL_GPL(hns_roce_alloc_cmd_mailbox);
void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox)
@@ -302,4 +299,3 @@ void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma);
kfree(mailbox);
}
-EXPORT_SYMBOL_GPL(hns_roce_free_cmd_mailbox);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 9caf35061721..4e50c22a2da4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -205,7 +205,6 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
-EXPORT_SYMBOL_GPL(hns_roce_free_cq);
static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
struct ib_udata *udata,
@@ -235,8 +234,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
&buf->hr_mtt);
} else {
ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
- (*umem)->page_shift,
- &buf->hr_mtt);
+ PAGE_SHIFT, &buf->hr_mtt);
}
if (ret)
goto err_buf;
@@ -300,15 +298,15 @@ static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev,
&buf->hr_buf);
}
-struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct device *dev = hr_dev->dev;
struct hns_roce_ib_create_cq ucmd;
struct hns_roce_ib_create_cq_resp resp = {};
- struct hns_roce_cq *hr_cq = NULL;
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
struct hns_roce_uar *uar = NULL;
int vector = attr->comp_vector;
int cq_entries = attr->cqe;
@@ -319,13 +317,9 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n",
cq_entries, hr_dev->caps.max_cqes);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
- hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL);
- if (!hr_cq)
- return ERR_PTR(-ENOMEM);
-
if (hr_dev->caps.min_cqes)
cq_entries = max(cq_entries, hr_dev->caps.min_cqes);
@@ -416,7 +410,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
goto err_cqc;
}
- return &hr_cq->ib_cq;
+ return 0;
err_cqc:
hns_roce_free_cq(hr_dev, hr_cq);
@@ -428,9 +422,8 @@ err_dbmap:
err_mtt:
hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
- if (udata)
- ib_umem_release(hr_cq->umem);
- else
+ ib_umem_release(hr_cq->umem);
+ if (!udata)
hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
hr_cq->ib_cq.cqe);
@@ -439,47 +432,37 @@ err_db:
hns_roce_free_db(hr_dev, &hr_cq->db);
err_cq:
- kfree(hr_cq);
- return ERR_PTR(ret);
+ return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_ib_create_cq);
-int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
- int ret = 0;
if (hr_dev->hw->destroy_cq) {
- ret = hr_dev->hw->destroy_cq(ib_cq, udata);
- } else {
- hns_roce_free_cq(hr_dev, hr_cq);
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
-
- if (udata) {
- ib_umem_release(hr_cq->umem);
-
- if (hr_cq->db_en == 1)
- hns_roce_db_unmap_user(
- rdma_udata_to_drv_context(
- udata,
- struct hns_roce_ucontext,
- ibucontext),
- &hr_cq->db);
- } else {
- /* Free the buff of stored cq */
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
- ib_cq->cqe);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
- hns_roce_free_db(hr_dev, &hr_cq->db);
- }
-
- kfree(hr_cq);
+ hr_dev->hw->destroy_cq(ib_cq, udata);
+ return;
}
- return ret;
+ hns_roce_free_cq(hr_dev, hr_cq);
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+
+ ib_umem_release(hr_cq->umem);
+ if (udata) {
+ if (hr_cq->db_en == 1)
+ hns_roce_db_unmap_user(rdma_udata_to_drv_context(
+ udata,
+ struct hns_roce_ucontext,
+ ibucontext),
+ &hr_cq->db);
+ } else {
+ /* Free the buff of stored cq */
+ hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+ }
}
-EXPORT_SYMBOL_GPL(hns_roce_ib_destroy_cq);
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
{
@@ -495,7 +478,6 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
++cq->arm_sn;
cq->comp(cq);
}
-EXPORT_SYMBOL_GPL(hns_roce_cq_completion);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
{
@@ -517,7 +499,6 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
}
-EXPORT_SYMBOL_GPL(hns_roce_cq_event);
int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index 0c6c1fe87705..627aa46ef683 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -51,7 +51,6 @@ out:
return ret;
}
-EXPORT_SYMBOL(hns_roce_db_map_user);
void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
struct hns_roce_db *db)
@@ -67,7 +66,6 @@ void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
mutex_unlock(&context->page_mutex);
}
-EXPORT_SYMBOL(hns_roce_db_unmap_user);
static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir(
struct device *dma_device)
@@ -78,7 +76,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir(
if (!pgdir)
return NULL;
- bitmap_fill(pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2);
+ bitmap_fill(pgdir->order1,
+ HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT);
pgdir->bits[0] = pgdir->order0;
pgdir->bits[1] = pgdir->order1;
pgdir->page = dma_alloc_coherent(dma_device, PAGE_SIZE,
@@ -116,7 +115,7 @@ found:
db->u.pgdir = pgdir;
db->index = i;
db->db_record = pgdir->page + db->index;
- db->dma = pgdir->db_dma + db->index * 4;
+ db->dma = pgdir->db_dma + db->index * HNS_ROCE_DB_UNIT_SIZE;
db->order = order;
return 0;
@@ -150,7 +149,6 @@ out:
return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_alloc_db);
void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
{
@@ -170,7 +168,8 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
i >>= o;
set_bit(i, db->u.pgdir->bits[o]);
- if (bitmap_full(db->u.pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2)) {
+ if (bitmap_full(db->u.pgdir->order1,
+ HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT)) {
dma_free_coherent(hr_dev->dev, PAGE_SIZE, db->u.pgdir->page,
db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
@@ -179,4 +178,3 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
mutex_unlock(&hr_dev->pgdir_mutex);
}
-EXPORT_SYMBOL_GPL(hns_roce_free_db);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 563cf39df6d5..a548b28aab63 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,9 +37,12 @@
#define DRV_NAME "hns_roce"
+/* hip08 is a pci device, it includes two version according pci version id */
+#define PCI_REVISION_ID_HIP08_A 0x20
+#define PCI_REVISION_ID_HIP08_B 0x21
+
#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
-#define MAC_ADDR_OCTET_NUM 6
#define HNS_ROCE_MAX_MSG_LEN 0x80000000
#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b))
@@ -48,6 +51,10 @@
#define HNS_ROCE_BA_SIZE (32 * 4096)
+#define BA_BYTE_LEN 8
+
+#define BITS_PER_BYTE 8
+
/* Hardware specification only for v1 engine */
#define HNS_ROCE_MIN_CQE_NUM 0x40
#define HNS_ROCE_MIN_WQE_NUM 0x20
@@ -55,6 +62,7 @@
/* Hardware specification only for v1 engine */
#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000
+#define HNS_ROCE_MAX_SGE_NUM 2
#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20
#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \
@@ -64,6 +72,9 @@
#define HNS_ROCE_MAX_IRQ_NUM 128
+#define HNS_ROCE_SGE_IN_WQE 2
+#define HNS_ROCE_SGE_SHIFT 4
+
#define EQ_ENABLE 1
#define EQ_DISABLE 0
@@ -81,6 +92,7 @@
#define HNS_ROCE_MAX_PORTS 6
#define HNS_ROCE_MAX_GID_NUM 16
#define HNS_ROCE_GID_SIZE 16
+#define HNS_ROCE_SGE_SIZE 16
#define HNS_ROCE_HOP_NUM_0 0xff
@@ -111,6 +123,8 @@
#define PAGES_SHIFT_24 24
#define PAGES_SHIFT_32 32
+#define HNS_ROCE_PCI_BAR_NUM 2
+
#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
#define SRQ_DB_REG 0x230
@@ -213,6 +227,9 @@ enum hns_roce_mtt_type {
MTT_TYPE_IDX
};
+#define HNS_ROCE_DB_TYPE_COUNT 2
+#define HNS_ROCE_DB_UNIT_SIZE 4
+
enum {
HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4
};
@@ -324,6 +341,29 @@ struct hns_roce_mtt {
enum hns_roce_mtt_type mtt_type;
};
+struct hns_roce_buf_region {
+ int offset; /* page offset */
+ u32 count; /* page count*/
+ int hopnum; /* addressing hop num */
+};
+
+#define HNS_ROCE_MAX_BT_REGION 3
+#define HNS_ROCE_MAX_BT_LEVEL 3
+struct hns_roce_hem_list {
+ struct list_head root_bt;
+ /* link all bt dma mem by hop config */
+ struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL];
+ struct list_head btm_bt; /* link all bottom bt in @mid_bt */
+ dma_addr_t root_ba; /* pointer to the root ba table */
+ int bt_pg_shift;
+};
+
+/* memory translate region */
+struct hns_roce_mtr {
+ struct hns_roce_hem_list hem_list;
+ int buf_pg_shift;
+};
+
struct hns_roce_mw {
struct ib_mw ibmw;
u32 pdn;
@@ -413,8 +453,8 @@ struct hns_roce_buf {
struct hns_roce_db_pgdir {
struct list_head list;
DECLARE_BITMAP(order0, HNS_ROCE_DB_PER_PAGE);
- DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / 2);
- unsigned long *bits[2];
+ DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT);
+ unsigned long *bits[HNS_ROCE_DB_TYPE_COUNT];
u32 *page;
dma_addr_t db_dma;
};
@@ -472,7 +512,7 @@ struct hns_roce_idx_que {
u32 buf_size;
struct ib_umem *umem;
struct hns_roce_mtt mtt;
- u64 *bitmap;
+ unsigned long *bitmap;
};
struct hns_roce_srq {
@@ -535,7 +575,7 @@ struct hns_roce_av {
u8 hop_limit;
__le32 sl_tclass_flowlabel;
u8 dgid[HNS_ROCE_GID_SIZE];
- u8 mac[6];
+ u8 mac[ETH_ALEN];
__le16 vlan;
bool vlan_en;
};
@@ -620,6 +660,14 @@ struct hns_roce_qp {
struct ib_umem *umem;
struct hns_roce_mtt mtt;
+ struct hns_roce_mtr mtr;
+
+ /* this define must less than HNS_ROCE_MAX_BT_REGION */
+#define HNS_ROCE_WQE_REGION_MAX 3
+ struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX];
+ int region_cnt;
+ int wqe_bt_pg_shift;
+
u32 buff_size;
struct mutex mutex;
u8 port;
@@ -830,6 +878,9 @@ struct hns_roce_caps {
u32 mtt_ba_pg_sz;
u32 mtt_buf_pg_sz;
u32 mtt_hop_num;
+ u32 wqe_sq_hop_num;
+ u32 wqe_sge_hop_num;
+ u32 wqe_rq_hop_num;
u32 sccc_ba_pg_sz;
u32 sccc_buf_pg_sz;
u32 sccc_hop_num;
@@ -921,7 +972,7 @@ struct hns_roce_hw {
int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
struct ib_udata *udata);
- int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
+ void (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
@@ -940,6 +991,16 @@ struct hns_roce_hw {
const struct ib_device_ops *hns_roce_dev_srq_ops;
};
+enum hns_phy_state {
+ HNS_ROCE_PHY_SLEEP = 1,
+ HNS_ROCE_PHY_POLLING = 2,
+ HNS_ROCE_PHY_DISABLED = 3,
+ HNS_ROCE_PHY_TRAINING = 4,
+ HNS_ROCE_PHY_LINKUP = 5,
+ HNS_ROCE_PHY_LINKERR = 6,
+ HNS_ROCE_PHY_TEST = 7
+};
+
struct hns_roce_dev {
struct ib_device ib_dev;
struct platform_device *pdev;
@@ -962,7 +1023,7 @@ struct hns_roce_dev {
struct hns_roce_caps caps;
struct xarray qp_table_xa;
- unsigned char dev_addr[HNS_ROCE_MAX_PORTS][MAC_ADDR_OCTET_NUM];
+ unsigned char dev_addr[HNS_ROCE_MAX_PORTS][ETH_ALEN];
u64 sys_image_guid;
u32 vendor_id;
u32 vendor_part_id;
@@ -1084,6 +1145,19 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev,
int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_mtt *mtt, struct hns_roce_buf *buf);
+void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift,
+ int buf_pg_shift);
+int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t **bufs, struct hns_roce_buf_region *regions,
+ int region_cnt);
+void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr);
+
+/* hns roce hw need current block and next block addr from mtt */
+#define MTT_MIN_COUNT 2
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr);
+
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
@@ -1148,6 +1222,18 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_mtt *mtt, struct ib_umem *umem);
+void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum,
+ int offset, int buf_cnt);
+int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions,
+ dma_addr_t **bufs, int count);
+void hns_roce_free_buf_list(dma_addr_t **bufs, int count);
+
+int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, int start, struct hns_roce_buf *buf);
+int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, int start, struct ib_umem *umem,
+ int page_shift);
+
int hns_roce_create_srq(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
@@ -1178,11 +1264,11 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
__be32 send_ieth(const struct ib_send_wr *wr);
int to_hr_qp_type(int qp_type);
-struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+int hns_roce_ib_create_cq(struct ib_cq *ib_cq,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
-int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
int hns_roce_db_map_user(struct hns_roce_ucontext *context,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 8e29dbb5b5fb..f4da5bd2884f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -56,7 +56,6 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
return false;
}
-EXPORT_SYMBOL_GPL(hns_roce_check_whether_mhop);
static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx,
u32 bt_chunk_num)
@@ -165,7 +164,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ PAGE_SHIFT);
mhop->bt_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz
+ PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN;
mhop->hop_num = hr_dev->caps.mtt_hop_num;
break;
case HEM_TYPE_CQE:
@@ -173,7 +172,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ PAGE_SHIFT);
mhop->bt_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz
+ PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN;
mhop->hop_num = hr_dev->caps.cqe_hop_num;
break;
case HEM_TYPE_SRQWQE:
@@ -181,7 +180,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ PAGE_SHIFT);
mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
+ PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN;
mhop->hop_num = hr_dev->caps.srqwqe_hop_num;
break;
case HEM_TYPE_IDX:
@@ -189,7 +188,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ PAGE_SHIFT);
mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
+ PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN;
mhop->hop_num = hr_dev->caps.idx_hop_num;
break;
default:
@@ -206,7 +205,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
* MTT/CQE alloc hem for bt pages.
*/
bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
- chunk_ba_num = mhop->bt_chunk_size / 8;
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
chunk_size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size :
mhop->bt_chunk_size;
table_idx = (*obj & (table->num_obj - 1)) /
@@ -234,7 +233,6 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
return 0;
}
-EXPORT_SYMBOL_GPL(hns_roce_calc_hem_mhop);
static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
int npages,
@@ -376,18 +374,19 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
- end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies;
- while (1) {
- if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
- if (!(time_before(jiffies, end))) {
- dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
- spin_unlock_irqrestore(lock, flags);
- return -EBUSY;
- }
- } else {
+ end = HW_SYNC_TIMEOUT_MSECS;
+ while (end) {
+ if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT))
break;
- }
+
mdelay(HW_SYNC_SLEEP_TIME_INTERVAL);
+ end -= HW_SYNC_SLEEP_TIME_INTERVAL;
+ }
+
+ if (end <= 0) {
+ dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
+ spin_unlock_irqrestore(lock, flags);
+ return -EBUSY;
}
bt_cmd_l = (u32)bt_ba;
@@ -435,7 +434,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
buf_chunk_size = mhop.buf_chunk_size;
bt_chunk_size = mhop.bt_chunk_size;
hop_num = mhop.hop_num;
- chunk_ba_num = bt_chunk_size / 8;
+ chunk_ba_num = bt_chunk_size / BA_BYTE_LEN;
bt_num = hns_roce_get_bt_num(table->type, hop_num);
switch (bt_num) {
@@ -620,7 +619,6 @@ out:
mutex_unlock(&table->mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_table_get);
static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
@@ -645,7 +643,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
bt_chunk_size = mhop.bt_chunk_size;
hop_num = mhop.hop_num;
- chunk_ba_num = bt_chunk_size / 8;
+ chunk_ba_num = bt_chunk_size / BA_BYTE_LEN;
bt_num = hns_roce_get_bt_num(table->type, hop_num);
switch (bt_num) {
@@ -763,7 +761,6 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
mutex_unlock(&table->mutex);
}
-EXPORT_SYMBOL_GPL(hns_roce_table_put);
void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
@@ -799,7 +796,7 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
i = mhop.l0_idx;
j = mhop.l1_idx;
if (mhop.hop_num == 2)
- hem_idx = i * (mhop.bt_chunk_size / 8) + j;
+ hem_idx = i * (mhop.bt_chunk_size / BA_BYTE_LEN) + j;
else if (mhop.hop_num == 1 ||
mhop.hop_num == HNS_ROCE_HOP_NUM_0)
hem_idx = i;
@@ -836,7 +833,6 @@ out:
mutex_unlock(&table->mutex);
return addr;
}
-EXPORT_SYMBOL_GPL(hns_roce_table_find);
int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
@@ -999,7 +995,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
}
obj_per_chunk = buf_chunk_size / obj_size;
num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
- bt_chunk_num = bt_chunk_size / 8;
+ bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
if (type >= HEM_TYPE_MTT)
num_bt_l0 = bt_chunk_num;
@@ -1156,3 +1152,463 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
&hr_dev->mr_table.mtt_cqe_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
}
+
+struct roce_hem_item {
+ struct list_head list; /* link all hems in the same bt level */
+ struct list_head sibling; /* link all hems in last hop for mtt */
+ void *addr;
+ dma_addr_t dma_addr;
+ size_t count; /* max ba numbers */
+ int start; /* start buf offset in this hem */
+ int end; /* end buf offset in this hem */
+};
+
+static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev,
+ int start, int end,
+ int count, bool exist_bt,
+ int bt_level)
+{
+ struct roce_hem_item *hem;
+
+ hem = kzalloc(sizeof(*hem), GFP_KERNEL);
+ if (!hem)
+ return NULL;
+
+ if (exist_bt) {
+ hem->addr = dma_alloc_coherent(hr_dev->dev,
+ count * BA_BYTE_LEN,
+ &hem->dma_addr, GFP_KERNEL);
+ if (!hem->addr) {
+ kfree(hem);
+ return NULL;
+ }
+ }
+
+ hem->count = count;
+ hem->start = start;
+ hem->end = end;
+ INIT_LIST_HEAD(&hem->list);
+ INIT_LIST_HEAD(&hem->sibling);
+
+ return hem;
+}
+
+static void hem_list_free_item(struct hns_roce_dev *hr_dev,
+ struct roce_hem_item *hem, bool exist_bt)
+{
+ if (exist_bt)
+ dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN,
+ hem->addr, hem->dma_addr);
+ kfree(hem);
+}
+
+static void hem_list_free_all(struct hns_roce_dev *hr_dev,
+ struct list_head *head, bool exist_bt)
+{
+ struct roce_hem_item *hem, *temp_hem;
+
+ list_for_each_entry_safe(hem, temp_hem, head, list) {
+ list_del(&hem->list);
+ hem_list_free_item(hr_dev, hem, exist_bt);
+ }
+}
+
+static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr,
+ u64 table_addr)
+{
+ *(u64 *)(base_addr) = table_addr;
+}
+
+/* assign L0 table address to hem from root bt */
+static void hem_list_assign_bt(struct hns_roce_dev *hr_dev,
+ struct roce_hem_item *hem, void *cpu_addr,
+ u64 phy_addr)
+{
+ hem->addr = cpu_addr;
+ hem->dma_addr = (dma_addr_t)phy_addr;
+}
+
+static inline bool hem_list_page_is_in_range(struct roce_hem_item *hem,
+ int offset)
+{
+ return (hem->start <= offset && offset <= hem->end);
+}
+
+static struct roce_hem_item *hem_list_search_item(struct list_head *ba_list,
+ int page_offset)
+{
+ struct roce_hem_item *hem, *temp_hem;
+ struct roce_hem_item *found = NULL;
+
+ list_for_each_entry_safe(hem, temp_hem, ba_list, list) {
+ if (hem_list_page_is_in_range(hem, page_offset)) {
+ found = hem;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static bool hem_list_is_bottom_bt(int hopnum, int bt_level)
+{
+ /*
+ * hopnum base address table levels
+ * 0 L0(buf)
+ * 1 L0 -> buf
+ * 2 L0 -> L1 -> buf
+ * 3 L0 -> L1 -> L2 -> buf
+ */
+ return bt_level >= (hopnum ? hopnum - 1 : hopnum);
+}
+
+/**
+ * calc base address entries num
+ * @hopnum: num of mutihop addressing
+ * @bt_level: base address table level
+ * @unit: ba entries per bt page
+ */
+static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit)
+{
+ u32 step;
+ int max;
+ int i;
+
+ if (hopnum <= bt_level)
+ return 0;
+ /*
+ * hopnum bt_level range
+ * 1 0 unit
+ * ------------
+ * 2 0 unit * unit
+ * 2 1 unit
+ * ------------
+ * 3 0 unit * unit * unit
+ * 3 1 unit * unit
+ * 3 2 unit
+ */
+ step = 1;
+ max = hopnum - bt_level;
+ for (i = 0; i < max; i++)
+ step = step * unit;
+
+ return step;
+}
+
+/**
+ * calc the root ba entries which could cover all regions
+ * @regions: buf region array
+ * @region_cnt: array size of @regions
+ * @unit: ba entries per bt page
+ */
+int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
+ int region_cnt, int unit)
+{
+ struct hns_roce_buf_region *r;
+ int total = 0;
+ int step;
+ int i;
+
+ for (i = 0; i < region_cnt; i++) {
+ r = (struct hns_roce_buf_region *)&regions[i];
+ if (r->hopnum > 1) {
+ step = hem_list_calc_ba_range(r->hopnum, 1, unit);
+ if (step > 0)
+ total += (r->count + step - 1) / step;
+ } else {
+ total += r->count;
+ }
+ }
+
+ return total;
+}
+
+static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
+ const struct hns_roce_buf_region *r, int unit,
+ int offset, struct list_head *mid_bt,
+ struct list_head *btm_bt)
+{
+ struct roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL };
+ struct list_head temp_list[HNS_ROCE_MAX_BT_LEVEL];
+ struct roce_hem_item *cur, *pre;
+ const int hopnum = r->hopnum;
+ int start_aligned;
+ int distance;
+ int ret = 0;
+ int max_ofs;
+ int level;
+ u32 step;
+ int end;
+
+ if (hopnum <= 1)
+ return 0;
+
+ if (hopnum > HNS_ROCE_MAX_BT_LEVEL) {
+ dev_err(hr_dev->dev, "invalid hopnum %d!\n", hopnum);
+ return -EINVAL;
+ }
+
+ if (offset < r->offset) {
+ dev_err(hr_dev->dev, "invalid offset %d,min %d!\n",
+ offset, r->offset);
+ return -EINVAL;
+ }
+
+ distance = offset - r->offset;
+ max_ofs = r->offset + r->count - 1;
+ for (level = 0; level < hopnum; level++)
+ INIT_LIST_HEAD(&temp_list[level]);
+
+ /* config L1 bt to last bt and link them to corresponding parent */
+ for (level = 1; level < hopnum; level++) {
+ cur = hem_list_search_item(&mid_bt[level], offset);
+ if (cur) {
+ hem_ptrs[level] = cur;
+ continue;
+ }
+
+ step = hem_list_calc_ba_range(hopnum, level, unit);
+ if (step < 1) {
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ start_aligned = (distance / step) * step + r->offset;
+ end = min_t(int, start_aligned + step - 1, max_ofs);
+ cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit,
+ true, level);
+ if (!cur) {
+ ret = -ENOMEM;
+ goto err_exit;
+ }
+ hem_ptrs[level] = cur;
+ list_add(&cur->list, &temp_list[level]);
+ if (hem_list_is_bottom_bt(hopnum, level))
+ list_add(&cur->sibling, &temp_list[0]);
+
+ /* link bt to parent bt */
+ if (level > 1) {
+ pre = hem_ptrs[level - 1];
+ step = (cur->start - pre->start) / step * BA_BYTE_LEN;
+ hem_list_link_bt(hr_dev, pre->addr + step,
+ cur->dma_addr);
+ }
+ }
+
+ list_splice(&temp_list[0], btm_bt);
+ for (level = 1; level < hopnum; level++)
+ list_splice(&temp_list[level], &mid_bt[level]);
+
+ return 0;
+
+err_exit:
+ for (level = 1; level < hopnum; level++)
+ hem_list_free_all(hr_dev, &temp_list[level], true);
+
+ return ret;
+}
+
+static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list, int unit,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt)
+{
+ struct roce_hem_item *hem, *temp_hem, *root_hem;
+ struct list_head temp_list[HNS_ROCE_MAX_BT_REGION];
+ const struct hns_roce_buf_region *r;
+ struct list_head temp_root;
+ struct list_head temp_btm;
+ void *cpu_base;
+ u64 phy_base;
+ int ret = 0;
+ int offset;
+ int total;
+ int step;
+ int i;
+
+ r = &regions[0];
+ root_hem = hem_list_search_item(&hem_list->root_bt, r->offset);
+ if (root_hem)
+ return 0;
+
+ INIT_LIST_HEAD(&temp_root);
+ total = r->offset;
+ /* indicate to last region */
+ r = &regions[region_cnt - 1];
+ root_hem = hem_list_alloc_item(hr_dev, total, r->offset + r->count - 1,
+ unit, true, 0);
+ if (!root_hem)
+ return -ENOMEM;
+ list_add(&root_hem->list, &temp_root);
+
+ hem_list->root_ba = root_hem->dma_addr;
+
+ INIT_LIST_HEAD(&temp_btm);
+ for (i = 0; i < region_cnt; i++)
+ INIT_LIST_HEAD(&temp_list[i]);
+
+ total = 0;
+ for (i = 0; i < region_cnt && total < unit; i++) {
+ r = &regions[i];
+ if (!r->count)
+ continue;
+
+ /* all regions's mid[x][0] shared the root_bt's trunk */
+ cpu_base = root_hem->addr + total * BA_BYTE_LEN;
+ phy_base = root_hem->dma_addr + total * BA_BYTE_LEN;
+
+ /* if hopnum is 0 or 1, cut a new fake hem from the root bt
+ * which's address share to all regions.
+ */
+ if (hem_list_is_bottom_bt(r->hopnum, 0)) {
+ hem = hem_list_alloc_item(hr_dev, r->offset,
+ r->offset + r->count - 1,
+ r->count, false, 0);
+ if (!hem) {
+ ret = -ENOMEM;
+ goto err_exit;
+ }
+ hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base);
+ list_add(&hem->list, &temp_list[i]);
+ list_add(&hem->sibling, &temp_btm);
+ total += r->count;
+ } else {
+ step = hem_list_calc_ba_range(r->hopnum, 1, unit);
+ if (step < 1) {
+ ret = -EINVAL;
+ goto err_exit;
+ }
+ /* if exist mid bt, link L1 to L0 */
+ list_for_each_entry_safe(hem, temp_hem,
+ &hem_list->mid_bt[i][1], list) {
+ offset = hem->start / step * BA_BYTE_LEN;
+ hem_list_link_bt(hr_dev, cpu_base + offset,
+ hem->dma_addr);
+ total++;
+ }
+ }
+ }
+
+ list_splice(&temp_btm, &hem_list->btm_bt);
+ list_splice(&temp_root, &hem_list->root_bt);
+ for (i = 0; i < region_cnt; i++)
+ list_splice(&temp_list[i], &hem_list->mid_bt[i][0]);
+
+ return 0;
+
+err_exit:
+ for (i = 0; i < region_cnt; i++)
+ hem_list_free_all(hr_dev, &temp_list[i], false);
+
+ hem_list_free_all(hr_dev, &temp_root, true);
+
+ return ret;
+}
+
+/* construct the base address table and link them by address hop config */
+int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt)
+{
+ const struct hns_roce_buf_region *r;
+ int ofs, end;
+ int ret = 0;
+ int unit;
+ int i;
+
+ if (region_cnt > HNS_ROCE_MAX_BT_REGION) {
+ dev_err(hr_dev->dev, "invalid region region_cnt %d!\n",
+ region_cnt);
+ return -EINVAL;
+ }
+
+ unit = (1 << hem_list->bt_pg_shift) / BA_BYTE_LEN;
+ for (i = 0; i < region_cnt; i++) {
+ r = &regions[i];
+ if (!r->count)
+ continue;
+
+ end = r->offset + r->count;
+ for (ofs = r->offset; ofs < end; ofs += unit) {
+ ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs,
+ hem_list->mid_bt[i],
+ &hem_list->btm_bt);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "alloc hem trunk fail ret=%d!\n", ret);
+ goto err_alloc;
+ }
+ }
+ }
+
+ ret = hem_list_alloc_root_bt(hr_dev, hem_list, unit, regions,
+ region_cnt);
+ if (ret)
+ dev_err(hr_dev->dev, "alloc hem root fail ret=%d!\n", ret);
+ else
+ return 0;
+
+err_alloc:
+ hns_roce_hem_list_release(hr_dev, hem_list);
+
+ return ret;
+}
+
+void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list)
+{
+ int i, j;
+
+ for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
+ for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
+ hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j],
+ j != 0);
+
+ hem_list_free_all(hr_dev, &hem_list->root_bt, true);
+ INIT_LIST_HEAD(&hem_list->btm_bt);
+ hem_list->root_ba = 0;
+}
+
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list,
+ int bt_page_order)
+{
+ int i, j;
+
+ INIT_LIST_HEAD(&hem_list->root_bt);
+ INIT_LIST_HEAD(&hem_list->btm_bt);
+ for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
+ for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
+ INIT_LIST_HEAD(&hem_list->mid_bt[i][j]);
+
+ hem_list->bt_pg_shift = bt_page_order;
+}
+
+void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ int offset, int *mtt_cnt, u64 *phy_addr)
+{
+ struct list_head *head = &hem_list->btm_bt;
+ struct roce_hem_item *hem, *temp_hem;
+ void *cpu_base = NULL;
+ u64 phy_base = 0;
+ int nr = 0;
+
+ list_for_each_entry_safe(hem, temp_hem, head, sibling) {
+ if (hem_list_page_is_in_range(hem, offset)) {
+ nr = offset - hem->start;
+ cpu_base = hem->addr + nr * BA_BYTE_LEN;
+ phy_base = hem->dma_addr + nr * BA_BYTE_LEN;
+ nr = hem->end + 1 - offset;
+ break;
+ }
+ }
+
+ if (mtt_cnt)
+ *mtt_cnt = nr;
+
+ if (phy_addr)
+ *phy_addr = phy_base;
+
+ return cpu_base;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index d9d668992e49..f1ccb8f35fe5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -34,8 +34,8 @@
#ifndef _HNS_ROCE_HEM_H
#define _HNS_ROCE_HEM_H
-#define HW_SYNC_TIMEOUT_MSECS 500
#define HW_SYNC_SLEEP_TIME_INTERVAL 20
+#define HW_SYNC_TIMEOUT_MSECS (25 * HW_SYNC_SLEEP_TIME_INTERVAL)
#define BT_CMD_SYNC_SHIFT 31
enum {
@@ -133,6 +133,20 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop);
bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type);
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list,
+ int bt_page_order);
+int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
+ int region_cnt, int unit);
+int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt);
+void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list);
+void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ int offset, int *mtt_cnt, u64 *phy_addr);
+
static inline void hns_roce_hem_first(struct hns_roce_hem *hem,
struct hns_roce_hem_iter *iter)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 3afd3e9330e7..81e6dedb1e02 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -717,7 +717,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
union ib_gid dgid;
u64 subnet_prefix;
int attr_mask = 0;
- int ret = -ENOMEM;
+ int ret;
int i, j;
u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 };
u8 phy_port;
@@ -730,10 +730,16 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
/* Reserved cq for loop qp */
cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2;
cq_init_attr.comp_vector = 0;
- cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL);
- if (IS_ERR(cq)) {
- dev_err(dev, "Create cq for reserved loop qp failed!");
+
+ ibdev = &hr_dev->ib_dev;
+ cq = rdma_zalloc_drv_obj(ibdev, ib_cq);
+ if (!cq)
return -ENOMEM;
+
+ ret = hns_roce_ib_create_cq(cq, &cq_init_attr, NULL);
+ if (ret) {
+ dev_err(dev, "Create cq for reserved loop qp failed!");
+ goto alloc_cq_failed;
}
free_mr->mr_free_cq = to_hr_cq(cq);
free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev;
@@ -743,7 +749,6 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
free_mr->mr_free_cq->ib_cq.cq_context = NULL;
atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
- ibdev = &hr_dev->ib_dev;
pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
if (!pd)
goto alloc_mem_failed;
@@ -818,7 +823,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
attr.dest_qp_num = hr_qp->qpn;
memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr),
hr_dev->dev_addr[port],
- MAC_ADDR_OCTET_NUM);
+ ETH_ALEN);
memcpy(&dgid.raw, &subnet_prefix, sizeof(u64));
memcpy(&dgid.raw[8], hr_dev->dev_addr[port], 3);
@@ -865,9 +870,9 @@ alloc_pd_failed:
kfree(pd);
alloc_mem_failed:
- if (hns_roce_ib_destroy_cq(cq, NULL))
- dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
-
+ hns_roce_ib_destroy_cq(cq, NULL);
+alloc_cq_failed:
+ kfree(cq);
return ret;
}
@@ -894,10 +899,8 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
i, ret);
}
- ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL);
- if (ret)
- dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret);
-
+ hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL);
+ kfree(&free_mr->mr_free_cq->ib_cq);
hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL);
kfree(&free_mr->mr_free_pd->ibpd);
}
@@ -966,8 +969,7 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
struct hns_roce_free_mr *free_mr;
struct hns_roce_v1_priv *priv;
struct completion comp;
- unsigned long end =
- msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies;
+ unsigned long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS;
priv = (struct hns_roce_v1_priv *)hr_dev->priv;
free_mr = &priv->free_mr;
@@ -987,10 +989,11 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
- while (time_before_eq(jiffies, end)) {
+ while (end) {
if (try_wait_for_completion(&comp))
return 0;
msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
+ end -= HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE;
}
lp_qp_work->comp_flag = 0;
@@ -1104,8 +1107,7 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
struct hns_roce_free_mr *free_mr;
struct hns_roce_v1_priv *priv;
struct completion comp;
- unsigned long end =
- msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
+ unsigned long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
unsigned long start = jiffies;
int npages;
int ret = 0;
@@ -1135,10 +1137,11 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
queue_work(free_mr->free_mr_wq, &(mr_work->work));
- while (time_before_eq(jiffies, end)) {
+ while (end) {
if (try_wait_for_completion(&comp))
goto free_mr;
msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
+ end -= HNS_ROCE_V1_FREE_MR_WAIT_VALUE;
}
mr_work->comp_flag = 0;
@@ -1161,8 +1164,7 @@ free_mr:
hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
key_to_hw_index(mr->key), 0);
- if (mr->umem)
- ib_umem_release(mr->umem);
+ ib_umem_release(mr->umem);
kfree(mr);
@@ -1557,6 +1559,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
caps->reserved_mrws = 1;
caps->reserved_uars = 0;
caps->reserved_cqs = 0;
+ caps->reserved_qps = 12; /* 2 SQP per port, six ports total 12 */
caps->chunk_sz = HNS_ROCE_V1_TABLE_CHUNK_SIZE;
for (i = 0; i < caps->num_ports; i++)
@@ -1742,11 +1745,14 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port,
int gid_index, const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
+ unsigned long flags;
u32 *p = NULL;
u8 gid_idx = 0;
gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
+ spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+
p = (u32 *)&gid->raw[0];
roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_L_0_REG +
(HNS_ROCE_V1_GID_NUM * gid_idx));
@@ -1763,6 +1769,8 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port,
roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_H_0_REG +
(HNS_ROCE_V1_GID_NUM * gid_idx));
+ spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+
return 0;
}
@@ -2458,10 +2466,10 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
- end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies;
+ end = HW_SYNC_TIMEOUT_MSECS;
while (1) {
if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
- if (!(time_before(jiffies, end))) {
+ if (!end) {
dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
spin_unlock_irqrestore(&hr_dev->bt_cmd_lock,
flags);
@@ -2470,7 +2478,8 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
} else {
break;
}
- msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
+ mdelay(HW_SYNC_SLEEP_TIME_INTERVAL);
+ end -= HW_SYNC_SLEEP_TIME_INTERVAL;
}
bt_cmd_val[0] = (__le32)bt_ba;
@@ -3633,9 +3642,8 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
- if (udata)
- ib_umem_release(hr_qp->umem);
- else {
+ ib_umem_release(hr_qp->umem);
+ if (!udata) {
kfree(hr_qp->sq.wrid);
kfree(hr_qp->rq.wrid);
@@ -3649,7 +3657,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
return 0;
}
-static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
@@ -3658,7 +3666,6 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
u32 cqe_cnt_cur;
u32 cq_buf_size;
int wait_time = 0;
- int ret = 0;
hns_roce_free_cq(hr_dev, hr_cq);
@@ -3680,7 +3687,6 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) {
dev_warn(dev, "Destroy cq 0x%lx timeout!\n",
hr_cq->cqn);
- ret = -ETIMEDOUT;
break;
}
wait_time++;
@@ -3688,17 +3694,12 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
- if (ibcq->uobject)
- ib_umem_release(hr_cq->umem);
- else {
+ ib_umem_release(hr_cq->umem);
+ if (!udata) {
/* Free the buff of stored cq */
cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz;
hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf);
}
-
- kfree(hr_cq);
-
- return ret;
}
static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not)
@@ -3902,7 +3903,8 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
*/
dma_rmb();
- dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe,
+ dev_dbg(dev, "aeqe = %pK, aeqe->asyn.event_type = 0x%lx\n",
+ aeqe,
roce_get_field(aeqe->asyn,
HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
@@ -4265,7 +4267,6 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
}
eq->buf_list[i].map = tmp_dma_addr;
- memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
}
eq->cons_index = 0;
roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index b5392cb5b20f..b76e3beeafb8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1098,7 +1098,7 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
if (ret == CMD_RST_PRC_SUCCESS)
return 0;
if (ret == CMD_RST_PRC_EBUSY)
- return ret;
+ return -EBUSY;
ret = __hns_roce_cmq_send(hr_dev, desc, num);
if (ret) {
@@ -1106,7 +1106,7 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
if (retval == CMD_RST_PRC_SUCCESS)
return 0;
else if (retval == CMD_RST_PRC_EBUSY)
- return retval;
+ return -EBUSY;
}
return ret;
@@ -1130,6 +1130,45 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
return 0;
}
+static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_func_clear *resp;
+ struct hns_roce_cmq_desc desc;
+ unsigned long end;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false);
+ resp = (struct hns_roce_func_clear *)desc.data;
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n",
+ ret);
+ return;
+ }
+
+ msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL);
+ end = HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS;
+ while (end) {
+ msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT);
+ end -= HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR,
+ true);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ continue;
+
+ if (roce_get_bit(resp->func_done, FUNC_CLEAR_RST_FUN_DONE_S)) {
+ hr_dev->is_reset = true;
+ return;
+ }
+ }
+
+ dev_err(hr_dev->dev, "Func clear fail.\n");
+}
+
static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
{
struct hns_roce_query_fw_info *resp;
@@ -1574,7 +1613,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->mtt_ba_pg_sz = 0;
caps->mtt_buf_pg_sz = 0;
caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM;
- caps->cqe_ba_pg_sz = 0;
+ caps->wqe_sq_hop_num = 2;
+ caps->wqe_sge_hop_num = 1;
+ caps->wqe_rq_hop_num = 2;
+ caps->cqe_ba_pg_sz = 6;
caps->cqe_buf_pg_sz = 0;
caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM;
caps->srqwqe_ba_pg_sz = 0;
@@ -1774,7 +1816,6 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev,
goto err_alloc_buf_failed;
link_tbl->pg_list[i].map = t;
- memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz);
entry[i].blk_ba0 = (t >> 12) & 0xffffffff;
roce_set_field(entry[i].blk_ba1_nxt_ptr,
@@ -1891,6 +1932,9 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
+ if (hr_dev->pci_dev->revision == 0x21)
+ hns_roce_function_clear(hr_dev);
+
hns_roce_free_link_table(hr_dev, &priv->tpq);
hns_roce_free_link_table(hr_dev, &priv->tsq);
}
@@ -1974,7 +2018,7 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
unsigned long timeout)
{
struct device *dev = hr_dev->dev;
- unsigned long end = 0;
+ unsigned long end;
u32 status;
end = msecs_to_jiffies(timeout) + jiffies;
@@ -2340,15 +2384,10 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
{
- u32 bitmap_num;
- int bit_num;
-
/* always called with interrupts disabled. */
spin_lock(&srq->lock);
- bitmap_num = wqe_index / (sizeof(u64) * 8);
- bit_num = wqe_index % (sizeof(u64) * 8);
- srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
+ bitmap_clear(srq->idx_que.bitmap, wqe_index, 1);
srq->tail++;
spin_unlock(&srq->lock);
@@ -2977,7 +3016,7 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
{
struct device *dev = hr_dev->dev;
struct hns_roce_cmd_mailbox *mailbox;
- int ret = 0;
+ int ret;
u16 op = 0xff;
if (!hns_roce_check_whether_mhop(hr_dev, table->type))
@@ -3026,7 +3065,6 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
}
static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt,
enum ib_qp_state cur_state,
enum ib_qp_state new_state,
struct hns_roce_v2_qp_context *context,
@@ -3426,7 +3464,9 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
else
roce_set_field(context->byte_4_sqpn_tst,
V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ?
+ V2_QPC_BYTE_4_SGE_SHIFT_S,
+ hr_qp->sq.max_gs >
+ HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
@@ -3520,6 +3560,31 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
}
}
+static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp, int mtt_cnt,
+ u32 page_size)
+{
+ struct device *dev = hr_dev->dev;
+
+ if (hr_qp->rq.wqe_cnt < 1)
+ return true;
+
+ if (mtt_cnt < 1) {
+ dev_err(dev, "qp(0x%lx) rqwqe buf ba find failed\n",
+ hr_qp->qpn);
+ return false;
+ }
+
+ if (mtt_cnt < MTT_MIN_COUNT &&
+ (hr_qp->rq.offset + page_size) < hr_qp->buff_size) {
+ dev_err(dev, "qp(0x%lx) next rqwqe buf ba find failed\n",
+ hr_qp->qpn);
+ return false;
+ }
+
+ return true;
+}
+
static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
struct hns_roce_v2_qp_context *context,
@@ -3529,25 +3594,27 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct device *dev = hr_dev->dev;
+ u64 mtts[MTT_MIN_COUNT] = { 0 };
dma_addr_t dma_handle_3;
dma_addr_t dma_handle_2;
- dma_addr_t dma_handle;
+ u64 wqe_sge_ba;
u32 page_size;
u8 port_num;
u64 *mtts_3;
u64 *mtts_2;
- u64 *mtts;
+ int count;
u8 *dmac;
u8 *smac;
int port;
/* Search qp buf's mtts */
- mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "qp buf pa find failed\n");
- return -EINVAL;
- }
+ page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
+ hr_qp->rq.offset / page_size, mtts,
+ MTT_MIN_COUNT, &wqe_sge_ba);
+ if (!ibqp->srq)
+ if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size))
+ return -EINVAL;
/* Search IRRL's mtts */
mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table,
@@ -3571,7 +3638,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
}
dmac = (u8 *)attr->ah_attr.roce.dmac;
- context->wqe_sge_ba = (u32)(dma_handle >> 3);
+ context->wqe_sge_ba = (u32)(wqe_sge_ba >> 3);
qpc_mask->wqe_sge_ba = 0;
/*
@@ -3581,22 +3648,23 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
* 0 at the same time, else set them to 0x1.
*/
roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_WQE_SGE_BA_M,
- V2_QPC_BYTE_12_WQE_SGE_BA_S, dma_handle >> (32 + 3));
+ V2_QPC_BYTE_12_WQE_SGE_BA_S, wqe_sge_ba >> (32 + 3));
roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_WQE_SGE_BA_M,
V2_QPC_BYTE_12_WQE_SGE_BA_S, 0);
roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
V2_QPC_BYTE_12_SQ_HOP_NUM_S,
- hr_dev->caps.mtt_hop_num == HNS_ROCE_HOP_NUM_0 ?
- 0 : hr_dev->caps.mtt_hop_num);
+ hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ?
+ 0 : hr_dev->caps.wqe_sq_hop_num);
roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0);
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGE_HOP_NUM_M,
V2_QPC_BYTE_20_SGE_HOP_NUM_S,
- ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
- hr_dev->caps.mtt_hop_num : 0);
+ ((ibqp->qp_type == IB_QPT_GSI) ||
+ hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
+ hr_dev->caps.wqe_sge_hop_num : 0);
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGE_HOP_NUM_M,
V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0);
@@ -3604,8 +3672,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_HOP_NUM_M,
V2_QPC_BYTE_20_RQ_HOP_NUM_S,
- hr_dev->caps.mtt_hop_num == HNS_ROCE_HOP_NUM_0 ?
- 0 : hr_dev->caps.mtt_hop_num);
+ hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ?
+ 0 : hr_dev->caps.wqe_rq_hop_num);
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_HOP_NUM_M,
V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0);
@@ -3613,7 +3681,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S,
- hr_dev->caps.mtt_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_qp->wqe_bt_pg_shift + PG_SHIFT_OFFSET);
roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0);
@@ -3626,29 +3694,24 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0);
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- context->rq_cur_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size]
- >> PAGE_ADDR_SHIFT);
+ context->rq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
qpc_mask->rq_cur_blk_addr = 0;
roce_set_field(context->byte_92_srq_info,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S,
- mtts[hr_qp->rq.offset / page_size]
- >> (32 + PAGE_ADDR_SHIFT));
+ mtts[0] >> (32 + PAGE_ADDR_SHIFT));
roce_set_field(qpc_mask->byte_92_srq_info,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0);
- context->rq_nxt_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size + 1]
- >> PAGE_ADDR_SHIFT);
+ context->rq_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT);
qpc_mask->rq_nxt_blk_addr = 0;
roce_set_field(context->byte_104_rq_sge,
V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M,
V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S,
- mtts[hr_qp->rq.offset / page_size + 1]
- >> (32 + PAGE_ADDR_SHIFT));
+ mtts[1] >> (32 + PAGE_ADDR_SHIFT));
roce_set_field(qpc_mask->byte_104_rq_sge,
V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M,
V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0);
@@ -3708,13 +3771,14 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGID_IDX_M,
V2_QPC_BYTE_20_SGID_IDX_S, 0);
- memcpy(&(context->dmac), dmac, 4);
+ memcpy(&(context->dmac), dmac, sizeof(u32));
roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M,
V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4])));
qpc_mask->dmac = 0;
roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M,
V2_QPC_BYTE_52_DMAC_S, 0);
+ /* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */
roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
V2_QPC_BYTE_56_LP_PKTN_INI_S, 4);
roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
@@ -3756,6 +3820,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
+ /* rocee send 2^lp_sgen_ini segs every time */
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_LP_SGEN_INI_M,
V2_QPC_BYTE_168_LP_SGEN_INI_S, 3);
@@ -3774,18 +3839,30 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct device *dev = hr_dev->dev;
- dma_addr_t dma_handle;
+ u64 sge_cur_blk = 0;
+ u64 sq_cur_blk = 0;
u32 page_size;
- u64 *mtts;
+ int count;
/* Search qp buf's mtts */
- mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "qp buf pa find failed\n");
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL);
+ if (count < 1) {
+ dev_err(dev, "qp(0x%lx) buf pa find failed\n", hr_qp->qpn);
return -EINVAL;
}
+ if (hr_qp->sge.offset) {
+ page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
+ hr_qp->sge.offset / page_size,
+ &sge_cur_blk, 1, NULL);
+ if (count < 1) {
+ dev_err(dev, "qp(0x%lx) sge pa find failed\n",
+ hr_qp->qpn);
+ return -EINVAL;
+ }
+ }
+
/* Not support alternate path and path migration */
if ((attr_mask & IB_QP_ALT_PATH) ||
(attr_mask & IB_QP_PATH_MIG_STATE)) {
@@ -3799,37 +3876,37 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
+ context->sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT);
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S,
- mtts[0] >> (32 + PAGE_ADDR_SHIFT));
+ sq_cur_blk >> (32 + PAGE_ADDR_SHIFT));
qpc_mask->sq_cur_blk_addr = 0;
roce_set_field(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0);
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- context->sq_cur_sge_blk_addr =
- ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
- ((u32)(mtts[hr_qp->sge.offset / page_size]
- >> PAGE_ADDR_SHIFT)) : 0;
+ context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) ||
+ hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
+ ((u32)(sge_cur_blk >>
+ PAGE_ADDR_SHIFT)) : 0;
roce_set_field(context->byte_184_irrl_idx,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S,
- ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
- (mtts[hr_qp->sge.offset / page_size] >>
+ ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs >
+ HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
+ (sge_cur_blk >>
(32 + PAGE_ADDR_SHIFT)) : 0);
qpc_mask->sq_cur_sge_blk_addr = 0;
roce_set_field(qpc_mask->byte_184_irrl_idx,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0);
- context->rx_sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
+ context->rx_sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT);
roce_set_field(context->byte_232_irrl_sge,
V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M,
V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S,
- mtts[0] >> (32 + PAGE_ADDR_SHIFT));
+ sq_cur_blk >> (32 + PAGE_ADDR_SHIFT));
qpc_mask->rx_sq_cur_blk_addr = 0;
roce_set_field(qpc_mask->byte_232_irrl_sge,
V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M,
@@ -4144,7 +4221,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
roce_set_field(context->byte_224_retry_msg,
V2_QPC_BYTE_224_RETRY_MSG_PSN_M,
V2_QPC_BYTE_224_RETRY_MSG_PSN_S,
- attr->sq_psn >> 16);
+ attr->sq_psn >> V2_QPC_BYTE_220_RETRY_MSG_PSN_S);
roce_set_field(qpc_mask->byte_224_retry_msg,
V2_QPC_BYTE_224_RETRY_MSG_PSN_M,
V2_QPC_BYTE_224_RETRY_MSG_PSN_S, 0);
@@ -4230,7 +4307,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_60_QP_ST_S, 0);
/* SW pass context to HW */
- ret = hns_roce_v2_qp_modify(hr_dev, &hr_qp->mtt, cur_state, new_state,
+ ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state,
context, hr_qp);
if (ret) {
dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret);
@@ -4374,11 +4451,12 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
V2_QPC_BYTE_56_DQPN_M,
V2_QPC_BYTE_56_DQPN_S);
qp_attr->qp_access_flags = ((roce_get_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RRE_S)) << 2) |
- ((roce_get_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RWE_S)) << 1) |
- ((roce_get_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_ATE_S)) << 3);
+ V2_QPC_BYTE_76_RRE_S)) << V2_QP_RWE_S) |
+ ((roce_get_bit(context->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RWE_S)) << V2_QP_RRE_S) |
+ ((roce_get_bit(context->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_ATE_S)) << V2_QP_ATE_S);
+
if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
hr_qp->ibqp.qp_type == IB_QPT_UC) {
struct ib_global_route *grh =
@@ -4487,7 +4565,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
(hr_qp->ibqp.qp_type == IB_QPT_UD))
hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
- hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+ hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
if (udata) {
struct hns_roce_ucontext *context =
@@ -4501,7 +4579,6 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1))
hns_roce_db_unmap_user(context, &hr_qp->rdb);
- ib_umem_release(hr_qp->umem);
} else {
kfree(hr_qp->sq.wrid);
kfree(hr_qp->rq.wrid);
@@ -4509,6 +4586,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
if (hr_qp->rq.wqe_cnt)
hns_roce_free_db(hr_dev, &hr_qp->rdb);
}
+ ib_umem_release(hr_qp->umem);
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
hr_qp->rq.wqe_cnt) {
@@ -4682,7 +4760,6 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
dev_warn(dev, "Path migration failed.\n");
break;
case HNS_ROCE_EVENT_TYPE_COMM_EST:
- dev_info(dev, "Communication established.\n");
break;
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
dev_warn(dev, "Send queue drained.\n");
@@ -5151,8 +5228,8 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
eq->l1_dma[i]);
- for (j = 0; j < bt_chk_sz / 8; j++) {
- idx = i * (bt_chk_sz / 8) + j;
+ for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
+ idx = i * (bt_chk_sz / BA_BYTE_LEN) + j;
if ((i == eq->l0_last_num - 1)
&& j == eq->l1_last_num - 1) {
eqe_alloc = (buf_chk_sz / eq->eqe_size)
@@ -5368,9 +5445,9 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
- ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1)
- / buf_chk_sz;
- bt_num = (ba_num + bt_chk_sz / 8 - 1) / (bt_chk_sz / 8);
+ ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size),
+ buf_chk_sz);
+ bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN);
/* hop_num = 0 */
if (mhop_num == HNS_ROCE_HOP_NUM_0) {
@@ -5387,8 +5464,6 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
eq->cur_eqe_ba = eq->l0_dma;
eq->nxt_eqe_ba = 0;
- memset(eq->bt_l0, 0, eq->entries * eq->eqe_size);
-
return 0;
}
@@ -5415,12 +5490,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
goto err_dma_alloc_l0;
if (mhop_num == 1) {
- if (ba_num > (bt_chk_sz / 8))
+ if (ba_num > (bt_chk_sz / BA_BYTE_LEN))
dev_err(dev, "ba_num %d is too large for 1 hop\n",
ba_num);
/* alloc buf */
- for (i = 0; i < bt_chk_sz / 8; i++) {
+ for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) {
if (eq_buf_cnt + 1 < ba_num) {
size = buf_chk_sz;
} else {
@@ -5444,7 +5519,7 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
} else if (mhop_num == 2) {
/* alloc L1 BT and buf */
- for (i = 0; i < bt_chk_sz / 8; i++) {
+ for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) {
eq->bt_l1[i] = dma_alloc_coherent(dev, bt_chk_sz,
&(eq->l1_dma[i]),
GFP_KERNEL);
@@ -5452,8 +5527,8 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
goto err_dma_alloc_l1;
*(eq->bt_l0 + i) = eq->l1_dma[i];
- for (j = 0; j < bt_chk_sz / 8; j++) {
- idx = i * bt_chk_sz / 8 + j;
+ for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
+ idx = i * bt_chk_sz / BA_BYTE_LEN + j;
if (eq_buf_cnt + 1 < ba_num) {
size = buf_chk_sz;
} else {
@@ -5498,8 +5573,8 @@ err_dma_alloc_l1:
dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
eq->l1_dma[i]);
- for (j = 0; j < bt_chk_sz / 8; j++) {
- idx = i * bt_chk_sz / 8 + j;
+ for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
+ idx = i * bt_chk_sz / BA_BYTE_LEN + j;
dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
eq->buf_dma[idx]);
}
@@ -5522,11 +5597,11 @@ err_dma_alloc_buf:
dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
eq->l1_dma[i]);
- for (j = 0; j < bt_chk_sz / 8; j++) {
+ for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) {
if (i == record_i && j >= record_j)
break;
- idx = i * bt_chk_sz / 8 + j;
+ idx = i * bt_chk_sz / BA_BYTE_LEN + j;
dma_free_coherent(dev, buf_chk_sz,
eq->buf[idx],
eq->buf_dma[idx]);
@@ -5972,18 +6047,19 @@ out:
return ret;
}
-static int find_empty_entry(struct hns_roce_idx_que *idx_que)
+static int find_empty_entry(struct hns_roce_idx_que *idx_que,
+ unsigned long size)
{
- int bit_num;
- int i;
+ int wqe_idx;
- /* bitmap[i] is set zero if all bits are allocated */
- for (i = 0; idx_que->bitmap[i] == 0; ++i)
- ;
- bit_num = ffs(idx_que->bitmap[i]);
- idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));
+ if (unlikely(bitmap_full(idx_que->bitmap, size)))
+ return -ENOSPC;
+
+ wqe_idx = find_first_zero_bit(idx_que->bitmap, size);
- return i * sizeof(u64) * 8 + (bit_num - 1);
+ bitmap_set(idx_que->bitmap, wqe_idx, 1);
+
+ return wqe_idx;
}
static void fill_idx_queue(struct hns_roce_idx_que *idx_que,
@@ -6029,7 +6105,13 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
break;
}
- wqe_idx = find_empty_entry(&srq->idx_que);
+ wqe_idx = find_empty_entry(&srq->idx_que, srq->max);
+ if (wqe_idx < 0) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
fill_idx_queue(&srq->idx_que, ind, wqe_idx);
wqe = get_srq_wqe(srq, wqe_idx);
dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
@@ -6041,9 +6123,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
}
if (i < srq->max_gs) {
- dseg->len = 0;
- dseg->lkey = cpu_to_le32(0x100);
- dseg->addr = 0;
+ dseg[i].len = 0;
+ dseg[i].lkey = cpu_to_le32(0x100);
+ dseg[i].addr = 0;
}
srq->wrid[wqe_idx] = wr->wr_id;
@@ -6059,7 +6141,8 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
*/
wmb();
- srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn;
+ srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S |
+ (srq->srqn & V2_DB_BYTE_4_TAG_M);
srq_db.parameter = srq->head;
hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
@@ -6301,6 +6384,7 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
if (!hr_dev)
return 0;
+ hr_dev->is_reset = true;
hr_dev->active = false;
hr_dev->dis_db = true;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index edfdbe2ce0db..478f5a5b7aa1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -54,7 +54,7 @@
#define HNS_ROCE_V2_MAX_CQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100
#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
-#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
+#define HNS_ROCE_V2_MAX_CQE_NUM 0x400000
#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
@@ -241,6 +241,7 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_POST_MB = 0x8504,
HNS_ROCE_OPC_QUERY_MB_ST = 0x8505,
HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
+ HNS_ROCE_OPC_FUNC_CLEAR = 0x8508,
HNS_ROCE_OPC_CLR_SCCC = 0x8509,
HNS_ROCE_OPC_QUERY_SCCC = 0x850a,
HNS_ROCE_OPC_RESET_SCCC = 0x850b,
@@ -886,6 +887,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16
#define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16)
+#define V2_QP_RWE_S 1 /* rdma write enable */
+#define V2_QP_RRE_S 2 /* rdma read enable */
+#define V2_QP_ATE_S 3 /* rdma atomic enable */
+
struct hns_roce_v2_cqe {
__le32 byte_4;
union {
@@ -1226,6 +1231,22 @@ struct hns_roce_query_fw_info {
__le32 rsv[5];
};
+struct hns_roce_func_clear {
+ __le32 rst_funcid_en;
+ __le32 func_done;
+ __le32 rsv[4];
+};
+
+#define FUNC_CLEAR_RST_FUN_DONE_S 0
+/* Each physical function manages up to 248 virtual functionsï¼›
+ * it takes up to 100ms for each function to execute clearï¼›
+ * if an abnormal reset occurs, it is executed twice at most;
+ * so it takes up to 249 * 2 * 100ms.
+ */
+#define HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS (249 * 2 * 100)
+#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40
+#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20
+
struct hns_roce_cfg_llm_a {
__le32 base_addr_l;
__le32 base_addr_h;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 8da5f18bf820..1e4ba48f5613 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -57,17 +57,16 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
{
return gid_index * hr_dev->caps.num_ports + port;
}
-EXPORT_SYMBOL_GPL(hns_get_gid_index);
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
{
u8 phy_port;
u32 i = 0;
- if (!memcmp(hr_dev->dev_addr[port], addr, MAC_ADDR_OCTET_NUM))
+ if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN))
return 0;
- for (i = 0; i < MAC_ADDR_OCTET_NUM; i++)
+ for (i = 0; i < ETH_ALEN; i++)
hr_dev->dev_addr[port][i] = addr[i];
phy_port = hr_dev->iboe.phy_port[port];
@@ -78,18 +77,13 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
u8 port = attr->port_num - 1;
- unsigned long flags;
int ret;
if (port >= hr_dev->caps.num_ports)
return -EINVAL;
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
-
ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr);
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-
return ret;
}
@@ -98,18 +92,13 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
struct ib_gid_attr zattr = { };
u8 port = attr->port_num - 1;
- unsigned long flags;
int ret;
if (port >= hr_dev->caps.num_ports)
return -EINVAL;
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
-
ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr);
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-
return ret;
}
@@ -272,7 +261,8 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256;
props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ?
IB_PORT_ACTIVE : IB_PORT_DOWN;
- props->phys_state = (props->state == IB_PORT_ACTIVE) ? 5 : 3;
+ props->phys_state = (props->state == IB_PORT_ACTIVE) ?
+ HNS_ROCE_PHY_LINKUP : HNS_ROCE_PHY_DISABLED;
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
@@ -319,7 +309,7 @@ static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask,
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
- int ret = 0;
+ int ret;
struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
@@ -423,6 +413,11 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
}
static const struct ib_device_ops hns_roce_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_HNS,
+ .uverbs_abi_ver = 1,
+ .uverbs_no_driver_id_binding = 1,
+
.add_gid = hns_roce_add_gid,
.alloc_pd = hns_roce_alloc_pd,
.alloc_ucontext = hns_roce_alloc_ucontext,
@@ -451,6 +446,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.reg_user_mr = hns_roce_reg_user_mr,
INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq),
INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext),
};
@@ -489,14 +485,12 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev = &hr_dev->ib_dev;
- ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA;
ib_dev->dev.parent = dev;
ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
- ib_dev->uverbs_abi_ver = 1;
ib_dev->uverbs_cmd_mask =
(1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -545,7 +539,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
}
- ib_dev->driver_id = RDMA_DRIVER_HNS;
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
for (i = 0; i < hr_dev->caps.num_ports; i++) {
@@ -980,7 +973,6 @@ error_failed_cmq_init:
return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_init);
void hns_roce_exit(struct hns_roce_dev *hr_dev)
{
@@ -1001,7 +993,6 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev)
if (hr_dev->hw->reset)
hr_dev->hw->reset(hr_dev, false);
}
-EXPORT_SYMBOL_GPL(hns_roce_exit);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 6110ec408626..549e1a38dfe0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -47,7 +47,6 @@ unsigned long key_to_hw_index(u32 key)
{
return (key << 24) | (key >> 8);
}
-EXPORT_SYMBOL_GPL(key_to_hw_index);
static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox,
@@ -66,7 +65,6 @@ int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-EXPORT_SYMBOL_GPL(hns_roce_hw2sw_mpt);
static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
unsigned long *seg)
@@ -293,7 +291,6 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
break;
}
}
-EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup);
static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr, int err_loop_index,
@@ -314,11 +311,11 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
mr->pbl_l1_dma_addr[i]);
- for (j = 0; j < pbl_bt_sz / 8; j++) {
+ for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
if (i == loop_i && j >= loop_j)
break;
- bt_idx = i * pbl_bt_sz / 8 + j;
+ bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
dma_free_coherent(dev, pbl_bt_sz,
mr->pbl_bt_l2[bt_idx],
mr->pbl_l2_dma_addr[bt_idx]);
@@ -329,8 +326,8 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
mr->pbl_l1_dma_addr[i]);
- for (j = 0; j < pbl_bt_sz / 8; j++) {
- bt_idx = i * pbl_bt_sz / 8 + j;
+ for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
+ bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
dma_free_coherent(dev, pbl_bt_sz,
mr->pbl_bt_l2[bt_idx],
mr->pbl_l2_dma_addr[bt_idx]);
@@ -533,7 +530,7 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
{
struct device *dev = hr_dev->dev;
unsigned long index = 0;
- int ret = 0;
+ int ret;
/* Allocate a key for mr from mr_table */
ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
@@ -559,7 +556,8 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->pbl_l0_dma_addr = 0;
} else {
if (!hr_dev->caps.pbl_hop_num) {
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
+ mr->pbl_buf = dma_alloc_coherent(dev,
+ npages * BA_BYTE_LEN,
&(mr->pbl_dma_addr),
GFP_KERNEL);
if (!mr->pbl_buf)
@@ -590,9 +588,8 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
if (mhop_num == HNS_ROCE_HOP_NUM_0)
return;
- /* hop_num = 1 */
if (mhop_num == 1) {
- dma_free_coherent(dev, (unsigned int)(npages * 8),
+ dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN),
mr->pbl_buf, mr->pbl_dma_addr);
return;
}
@@ -603,12 +600,13 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
if (mhop_num == 2) {
for (i = 0; i < mr->l0_chunk_last_num; i++) {
if (i == mr->l0_chunk_last_num - 1) {
- npages_allocated = i * (pbl_bt_sz / 8);
+ npages_allocated =
+ i * (pbl_bt_sz / BA_BYTE_LEN);
dma_free_coherent(dev,
- (npages - npages_allocated) * 8,
- mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
+ (npages - npages_allocated) * BA_BYTE_LEN,
+ mr->pbl_bt_l1[i],
+ mr->pbl_l1_dma_addr[i]);
break;
}
@@ -621,16 +619,17 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
mr->pbl_l1_dma_addr[i]);
- for (j = 0; j < pbl_bt_sz / 8; j++) {
- bt_idx = i * (pbl_bt_sz / 8) + j;
+ for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
+ bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j;
if ((i == mr->l0_chunk_last_num - 1)
&& j == mr->l1_chunk_last_num - 1) {
npages_allocated = bt_idx *
- (pbl_bt_sz / 8);
+ (pbl_bt_sz / BA_BYTE_LEN);
dma_free_coherent(dev,
- (npages - npages_allocated) * 8,
+ (npages - npages_allocated) *
+ BA_BYTE_LEN,
mr->pbl_bt_l2[bt_idx],
mr->pbl_l2_dma_addr[bt_idx]);
@@ -675,7 +674,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
npages = ib_umem_page_count(mr->umem);
if (!hr_dev->caps.pbl_hop_num)
- dma_free_coherent(dev, (unsigned int)(npages * 8),
+ dma_free_coherent(dev,
+ (unsigned int)(npages * BA_BYTE_LEN),
mr->pbl_buf, mr->pbl_dma_addr);
else
hns_roce_mhop_free(hr_dev, mr);
@@ -1059,6 +1059,7 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
page_addr = sg_page_iter_dma_address(&sg_iter);
if (!hr_dev->caps.pbl_hop_num) {
+ /* for hip06, page addr is aligned to 4K */
mr->pbl_buf[i++] = page_addr >> 12;
} else if (hr_dev->caps.pbl_hop_num == 1) {
mr->pbl_buf[i++] = page_addr;
@@ -1069,7 +1070,7 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
mr->pbl_bt_l2[i][j] = page_addr;
j++;
- if (j >= (pbl_bt_sz / 8)) {
+ if (j >= (pbl_bt_sz / BA_BYTE_LEN)) {
i++;
j = 0;
}
@@ -1117,7 +1118,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
} else {
u64 pbl_size = 1;
- bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8;
+ bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) /
+ BA_BYTE_LEN;
for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
pbl_size *= bt_size;
if (n > pbl_size) {
@@ -1293,9 +1295,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
} else {
hns_roce_mr_free(hr_dev, mr);
- if (mr->umem)
- ib_umem_release(mr->umem);
-
+ ib_umem_release(mr->umem);
kfree(mr);
}
@@ -1491,3 +1491,119 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw)
return 0;
}
+
+void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift,
+ int buf_pg_shift)
+{
+ hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift);
+ mtr->buf_pg_shift = buf_pg_shift;
+}
+
+void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr)
+{
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+}
+
+static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr, dma_addr_t *bufs,
+ struct hns_roce_buf_region *r)
+{
+ int offset;
+ int count;
+ int npage;
+ u64 *mtts;
+ int end;
+ int i;
+
+ offset = r->offset;
+ end = offset + r->count;
+ npage = 0;
+ while (offset < end) {
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+ offset, &count, NULL);
+ if (!mtts)
+ return -ENOBUFS;
+
+ /* Save page addr, low 12 bits : 0 */
+ for (i = 0; i < count; i++) {
+ if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
+ mtts[i] = cpu_to_le64(bufs[npage] >>
+ PAGE_ADDR_SHIFT);
+ else
+ mtts[i] = cpu_to_le64(bufs[npage]);
+
+ npage++;
+ }
+ offset += count;
+ }
+
+ return 0;
+}
+
+int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t **bufs, struct hns_roce_buf_region *regions,
+ int region_cnt)
+{
+ struct hns_roce_buf_region *r;
+ int ret;
+ int i;
+
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions,
+ region_cnt);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < region_cnt; i++) {
+ r = &regions[i];
+ ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "write mtr[%d/%d] err %d,offset=%d.\n",
+ i, region_cnt, ret, r->offset);
+ goto err_write;
+ }
+ }
+
+ return 0;
+
+err_write:
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+
+ return ret;
+}
+
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
+{
+ u64 *mtts = mtt_buf;
+ int mtt_count;
+ int total = 0;
+ u64 *addr;
+ int npage;
+ int left;
+
+ if (mtts == NULL || mtt_max < 1)
+ goto done;
+
+ left = mtt_max;
+ while (left > 0) {
+ mtt_count = 0;
+ addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+ offset + total,
+ &mtt_count, NULL);
+ if (!addr || !mtt_count)
+ goto done;
+
+ npage = min(mtt_count, left);
+ memcpy(&mtts[total], addr, BA_BYTE_LEN * npage);
+ left -= npage;
+ total += npage;
+ }
+
+done:
+ if (base_addr)
+ *base_addr = mtr->hem_list.root_ba;
+
+ return total;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 813401384d78..912b89b4da34 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -83,18 +83,16 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
-EXPORT_SYMBOL_GPL(hns_roce_alloc_pd);
void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn);
}
-EXPORT_SYMBOL_GPL(hns_roce_dealloc_pd);
int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
{
struct resource *res;
- int ret = 0;
+ int ret;
/* Using bitmap to manager UAR index */
ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx);
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 8db2817a249e..e0424029b058 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -64,7 +64,6 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
if (atomic_dec_and_test(&qp->refcount))
complete(&qp->free);
}
-EXPORT_SYMBOL_GPL(hns_roce_qp_event);
static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
enum hns_roce_event type)
@@ -139,7 +138,6 @@ enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
return HNS_ROCE_QP_NUM_STATE;
}
}
-EXPORT_SYMBOL_GPL(to_hns_roce_state);
static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
struct hns_roce_qp *hr_qp)
@@ -242,7 +240,6 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
__xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1));
xa_unlock_irqrestore(xa, flags);
}
-EXPORT_SYMBOL_GPL(hns_roce_qp_remove);
void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
@@ -257,22 +254,19 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
hns_roce_table_put(hr_dev, &qp_table->trrl_table,
hr_qp->qpn);
hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
- hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn);
}
}
-EXPORT_SYMBOL_GPL(hns_roce_qp_free);
void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
int cnt)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
- if (base_qpn < SQP_NUM)
+ if (base_qpn < hr_dev->caps.reserved_qps)
return;
hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR);
}
-EXPORT_SYMBOL_GPL(hns_roce_release_range_qp);
static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap, bool is_user, int has_rq,
@@ -392,8 +386,8 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift), PAGE_SIZE);
} else {
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- hr_qp->sge.sge_cnt =
- max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num);
+ hr_qp->sge.sge_cnt = ex_sge_num ?
+ max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0;
hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
hr_qp->rq.wqe_shift), page_size) +
HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
@@ -422,6 +416,91 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
return 0;
}
+static int split_wqe_buf_region(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_buf_region *regions,
+ int region_max, int page_shift)
+{
+ int page_size = 1 << page_shift;
+ bool is_extend_sge;
+ int region_cnt = 0;
+ int buf_size;
+ int buf_cnt;
+
+ if (hr_qp->buff_size < 1 || region_max < 1)
+ return region_cnt;
+
+ if (hr_qp->sge.sge_cnt > 0)
+ is_extend_sge = true;
+ else
+ is_extend_sge = false;
+
+ /* sq region */
+ if (is_extend_sge)
+ buf_size = hr_qp->sge.offset - hr_qp->sq.offset;
+ else
+ buf_size = hr_qp->rq.offset - hr_qp->sq.offset;
+
+ if (buf_size > 0 && region_cnt < region_max) {
+ buf_cnt = DIV_ROUND_UP(buf_size, page_size);
+ hns_roce_init_buf_region(&regions[region_cnt],
+ hr_dev->caps.wqe_sq_hop_num,
+ hr_qp->sq.offset / page_size,
+ buf_cnt);
+ region_cnt++;
+ }
+
+ /* sge region */
+ if (is_extend_sge) {
+ buf_size = hr_qp->rq.offset - hr_qp->sge.offset;
+ if (buf_size > 0 && region_cnt < region_max) {
+ buf_cnt = DIV_ROUND_UP(buf_size, page_size);
+ hns_roce_init_buf_region(&regions[region_cnt],
+ hr_dev->caps.wqe_sge_hop_num,
+ hr_qp->sge.offset / page_size,
+ buf_cnt);
+ region_cnt++;
+ }
+ }
+
+ /* rq region */
+ buf_size = hr_qp->buff_size - hr_qp->rq.offset;
+ if (buf_size > 0) {
+ buf_cnt = DIV_ROUND_UP(buf_size, page_size);
+ hns_roce_init_buf_region(&regions[region_cnt],
+ hr_dev->caps.wqe_rq_hop_num,
+ hr_qp->rq.offset / page_size,
+ buf_cnt);
+ region_cnt++;
+ }
+
+ return region_cnt;
+}
+
+static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_region *regions,
+ int region_cnt)
+{
+ int bt_pg_shift;
+ int ba_num;
+ int ret;
+
+ bt_pg_shift = PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz;
+
+ /* all root ba entries must in one bt page */
+ do {
+ ba_num = (1 << bt_pg_shift) / BA_BYTE_LEN;
+ ret = hns_roce_hem_list_calc_root_ba(regions, region_cnt,
+ ba_num);
+ if (ret <= ba_num)
+ break;
+
+ bt_pg_shift++;
+ } while (ret > ba_num);
+
+ return bt_pg_shift - PAGE_SHIFT;
+}
+
static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
struct ib_qp_cap *cap,
struct hns_roce_qp *hr_qp)
@@ -534,15 +613,17 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
struct ib_udata *udata, unsigned long sqpn,
struct hns_roce_qp *hr_qp)
{
+ dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { 0 };
struct device *dev = hr_dev->dev;
struct hns_roce_ib_create_qp ucmd;
struct hns_roce_ib_create_qp_resp resp = {};
struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
udata, struct hns_roce_ucontext, ibucontext);
+ struct hns_roce_buf_region *r;
unsigned long qpn = 0;
- int ret = 0;
u32 page_shift;
- u32 npages;
+ int buf_count;
+ int ret;
int i;
mutex_init(&hr_qp->mutex);
@@ -596,6 +677,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
init_attr->cap.max_recv_sge];
}
+ page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
if (udata) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
dev_err(dev, "ib_copy_from_udata error for create qp\n");
@@ -617,32 +699,28 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
ret = PTR_ERR(hr_qp->umem);
goto err_rq_sge_list;
}
-
- hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
- page_shift = PAGE_SHIFT;
- if (hr_dev->caps.mtt_buf_pg_sz) {
- npages = (ib_umem_page_count(hr_qp->umem) +
- (1 << hr_dev->caps.mtt_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.mtt_buf_pg_sz);
- page_shift += hr_dev->caps.mtt_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages,
- page_shift,
- &hr_qp->mtt);
- } else {
- ret = hns_roce_mtt_init(hr_dev,
- ib_umem_page_count(hr_qp->umem),
- page_shift, &hr_qp->mtt);
- }
+ hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
+ hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
+ page_shift);
+ ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list,
+ hr_qp->region_cnt);
if (ret) {
- dev_err(dev, "hns_roce_mtt_init error for create qp\n");
- goto err_buf;
+ dev_err(dev, "alloc buf_list error for create qp\n");
+ goto err_alloc_list;
}
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &hr_qp->mtt,
- hr_qp->umem);
- if (ret) {
- dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n");
- goto err_mtt;
+ for (i = 0; i < hr_qp->region_cnt; i++) {
+ r = &hr_qp->regions[i];
+ buf_count = hns_roce_get_umem_bufs(hr_dev,
+ buf_list[i], r->count, r->offset,
+ hr_qp->umem, page_shift);
+ if (buf_count != r->count) {
+ dev_err(dev,
+ "get umem buf err, expect %d,ret %d.\n",
+ r->count, buf_count);
+ ret = -ENOBUFS;
+ goto err_get_bufs;
+ }
}
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
@@ -653,7 +731,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
&hr_qp->sdb);
if (ret) {
dev_err(dev, "sq record doorbell map failed!\n");
- goto err_mtt;
+ goto err_get_bufs;
}
/* indicate kernel supports sq record db */
@@ -715,7 +793,6 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
/* Allocate QP buf */
- page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
(1 << page_shift) * 2,
&hr_qp->hr_buf, page_shift)) {
@@ -723,21 +800,28 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
ret = -ENOMEM;
goto err_db;
}
-
- hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
- /* Write MTT */
- ret = hns_roce_mtt_init(hr_dev, hr_qp->hr_buf.npages,
- hr_qp->hr_buf.page_shift, &hr_qp->mtt);
+ hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp,
+ hr_qp->regions, ARRAY_SIZE(hr_qp->regions),
+ page_shift);
+ ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list,
+ hr_qp->region_cnt);
if (ret) {
- dev_err(dev, "hns_roce_mtt_init error for kernel create qp\n");
- goto err_buf;
+ dev_err(dev, "alloc buf_list error for create qp!\n");
+ goto err_alloc_list;
}
- ret = hns_roce_buf_write_mtt(hr_dev, &hr_qp->mtt,
- &hr_qp->hr_buf);
- if (ret) {
- dev_err(dev, "hns_roce_buf_write_mtt error for kernel create qp\n");
- goto err_mtt;
+ for (i = 0; i < hr_qp->region_cnt; i++) {
+ r = &hr_qp->regions[i];
+ buf_count = hns_roce_get_kmem_bufs(hr_dev,
+ buf_list[i], r->count, r->offset,
+ &hr_qp->hr_buf);
+ if (buf_count != r->count) {
+ dev_err(dev,
+ "get kmem buf err, expect %d,ret %d.\n",
+ r->count, buf_count);
+ ret = -ENOBUFS;
+ goto err_get_bufs;
+ }
}
hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64),
@@ -761,6 +845,17 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
}
+ hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions,
+ hr_qp->region_cnt);
+ hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift,
+ page_shift);
+ ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list,
+ hr_qp->regions, hr_qp->region_cnt);
+ if (ret) {
+ dev_err(dev, "mtr attach error for create qp\n");
+ goto err_mtr;
+ }
+
if (init_attr->qp_type == IB_QPT_GSI &&
hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
/* In v1 engine, GSI QP context in RoCE engine's register */
@@ -796,6 +891,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
hr_qp->event = hns_roce_ib_qp_event;
+ hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
return 0;
@@ -810,6 +906,9 @@ err_qpn:
if (!sqpn)
hns_roce_release_range_qp(hr_dev, qpn, 1);
+err_mtr:
+ hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr);
+
err_wrid:
if (udata) {
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
@@ -829,14 +928,13 @@ err_sq_dbmap:
hns_roce_qp_has_sq(init_attr))
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+err_get_bufs:
+ hns_roce_free_buf_list(buf_list, hr_qp->region_cnt);
-err_buf:
- if (hr_qp->umem)
- ib_umem_release(hr_qp->umem);
- else
+err_alloc_list:
+ if (!hr_qp->umem)
hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+ ib_umem_release(hr_qp->umem);
err_db:
if (!udata && hns_roce_qp_has_rq(init_attr) &&
@@ -923,7 +1021,6 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
return &hr_qp->ibqp;
}
-EXPORT_SYMBOL_GPL(hns_roce_create_qp);
int to_hr_qp_type(int qp_type)
{
@@ -942,7 +1039,6 @@ int to_hr_qp_type(int qp_type)
return transport_type;
}
-EXPORT_SYMBOL_GPL(to_hr_qp_type);
int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
@@ -1062,7 +1158,6 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
}
}
-EXPORT_SYMBOL_GPL(hns_roce_lock_cqs);
void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
@@ -1079,7 +1174,6 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
spin_unlock_irq(&recv_cq->lock);
}
}
-EXPORT_SYMBOL_GPL(hns_roce_unlock_cqs);
static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
{
@@ -1091,20 +1185,17 @@ void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
{
return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
}
-EXPORT_SYMBOL_GPL(get_recv_wqe);
void *get_send_wqe(struct hns_roce_qp *hr_qp, int n)
{
return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
}
-EXPORT_SYMBOL_GPL(get_send_wqe);
void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n)
{
return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset +
(n << hr_qp->sge.sge_shift));
}
-EXPORT_SYMBOL_GPL(get_send_extend_sge);
bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
struct ib_cq *ib_cq)
@@ -1123,7 +1214,6 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
return cur + nreq >= hr_wq->max_post;
}
-EXPORT_SYMBOL_GPL(hns_roce_wq_overflow);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
{
@@ -1135,11 +1225,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
mutex_init(&qp_table->scc_mutex);
xa_init(&hr_dev->qp_table_xa);
- /* In hw v1, a port include two SQP, six ports total 12 */
- if (hr_dev->caps.max_sq_sg <= 2)
- reserved_from_bot = SQP_NUM;
- else
- reserved_from_bot = hr_dev->caps.reserved_qps;
+ reserved_from_bot = hr_dev->caps.reserved_qps;
ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
hr_dev->caps.num_qps - 1, reserved_from_bot,
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index b3421b1f21e0..38bb548eaa6d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -30,7 +30,6 @@ void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
if (atomic_dec_and_test(&srq->refcount))
complete(&srq->free);
}
-EXPORT_SYMBOL_GPL(hns_roce_srq_event);
static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
enum hns_roce_event event_type)
@@ -181,28 +180,19 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct hns_roce_idx_que *idx_que = &srq->idx_que;
- u32 bitmap_num;
- int i;
- bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64));
-
- idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL);
+ idx_que->bitmap = bitmap_zalloc(srq->max, GFP_KERNEL);
if (!idx_que->bitmap)
return -ENOMEM;
- bitmap_num = bitmap_num / (8 * sizeof(u64));
-
idx_que->buf_size = srq->idx_que.buf_size;
if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2,
&idx_que->idx_buf, page_shift)) {
- kfree(idx_que->bitmap);
+ bitmap_free(idx_que->bitmap);
return -ENOMEM;
}
- for (i = 0; i < bitmap_num; i++)
- idx_que->bitmap[i] = ~(0UL);
-
return 0;
}
@@ -264,8 +254,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
} else
ret = hns_roce_mtt_init(hr_dev,
ib_umem_page_count(srq->umem),
- srq->umem->page_shift,
- &srq->mtt);
+ PAGE_SHIFT, &srq->mtt);
if (ret)
goto err_buf;
@@ -291,10 +280,9 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
ret = hns_roce_mtt_init(hr_dev, npages,
page_shift, &srq->idx_que.mtt);
} else {
- ret = hns_roce_mtt_init(hr_dev,
- ib_umem_page_count(srq->idx_que.umem),
- srq->idx_que.umem->page_shift,
- &srq->idx_que.mtt);
+ ret = hns_roce_mtt_init(
+ hr_dev, ib_umem_page_count(srq->idx_que.umem),
+ PAGE_SHIFT, &srq->idx_que.mtt);
}
if (ret) {
@@ -391,21 +379,19 @@ err_idx_buf:
hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
err_idx_mtt:
- if (udata)
- ib_umem_release(srq->idx_que.umem);
+ ib_umem_release(srq->idx_que.umem);
err_create_idx:
hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
&srq->idx_que.idx_buf);
- kfree(srq->idx_que.bitmap);
+ bitmap_free(srq->idx_que.bitmap);
err_srq_mtt:
hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
err_buf:
- if (udata)
- ib_umem_release(srq->umem);
- else
+ ib_umem_release(srq->umem);
+ if (!udata)
hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
return ret;
@@ -419,15 +405,15 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
hns_roce_srq_free(hr_dev, srq);
hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
- if (ibsrq->uobject) {
+ if (udata) {
hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
- ib_umem_release(srq->idx_que.umem);
- ib_umem_release(srq->umem);
} else {
kvfree(srq->wrid);
hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift,
&srq->buf);
}
+ ib_umem_release(srq->idx_que.umem);
+ ib_umem_release(srq->umem);
}
int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 700a5d06b60c..2d6a378e8560 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -4279,11 +4279,11 @@ static void i40iw_qhash_ctrl(struct i40iw_device *iwdev,
/* if not found then add a child listener if interface is going up */
if (!ifup)
return;
- child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+ child_listen_node = kmemdup(parent_listen_node,
+ sizeof(*child_listen_node), GFP_ATOMIC);
if (!child_listen_node)
return;
node_allocated = true;
- memcpy(child_listen_node, parent_listen_node, sizeof(*child_listen_node));
memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 5689d742bafb..d169a8031375 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -772,6 +772,8 @@ static int i40iw_query_qp(struct ib_qp *ibqp,
struct i40iw_qp *iwqp = to_iwqp(ibqp);
struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+ attr->qp_state = iwqp->ibqp_state;
+ attr->cur_qp_state = attr->qp_state;
attr->qp_access_flags = 0;
attr->cap.max_send_wr = qp->qp_uk.sq_size;
attr->cap.max_recv_wr = qp->qp_uk.rq_size;
@@ -1064,44 +1066,38 @@ void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
* @ib_cq: cq pointer
* @udata: user data or NULL for kernel object
*/
-static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct i40iw_cq *iwcq;
struct i40iw_device *iwdev;
struct i40iw_sc_cq *cq;
- if (!ib_cq) {
- i40iw_pr_err("ib_cq == NULL\n");
- return 0;
- }
-
iwcq = to_iwcq(ib_cq);
iwdev = to_iwdev(ib_cq->device);
cq = &iwcq->sc_cq;
i40iw_cq_wq_destroy(iwdev, cq);
cq_free_resources(iwdev, iwcq);
- kfree(iwcq);
i40iw_rem_devusecount(iwdev);
- return 0;
}
/**
* i40iw_create_cq - create cq
- * @ibdev: device pointer from stack
+ * @ibcq: CQ allocated
* @attr: attributes for cq
* @udata: user data
*/
-static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+static int i40iw_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_cq *iwcq;
+ struct i40iw_cq *iwcq = to_iwcq(ibcq);
struct i40iw_pbl *iwpbl;
u32 cq_num = 0;
struct i40iw_sc_cq *cq;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_cq_init_info info;
+ struct i40iw_cq_init_info info = {};
enum i40iw_status_code status;
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
@@ -1111,22 +1107,16 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
int entries = attr->cqe;
if (iwdev->closing)
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
if (entries > iwdev->max_cqe)
- return ERR_PTR(-EINVAL);
-
- iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL);
- if (!iwcq)
- return ERR_PTR(-ENOMEM);
-
- memset(&info, 0, sizeof(info));
+ return -EINVAL;
err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_cqs,
iwdev->max_cq, &cq_num,
&iwdev->next_cq);
if (err_code)
- goto error;
+ return err_code;
cq = &iwcq->sc_cq;
cq->back_cq = (void *)iwcq;
@@ -1233,15 +1223,13 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
}
i40iw_add_devusecount(iwdev);
- return (struct ib_cq *)iwcq;
+ return 0;
cq_destroy:
i40iw_cq_wq_destroy(iwdev, cq);
cq_free_resources:
cq_free_resources(iwdev, iwcq);
-error:
- kfree(iwcq);
- return ERR_PTR(err_code);
+ return err_code;
}
/**
@@ -2018,8 +2006,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
struct cqp_commands_info *cqp_info;
u32 stag_idx;
- if (iwmr->region)
- ib_umem_release(iwmr->region);
+ ib_umem_release(iwmr->region);
if (iwmr->type != IW_MEMREG_TYPE_MEM) {
/* region is released. only test for userness. */
@@ -2655,6 +2642,11 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
}
static const struct ib_device_ops i40iw_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_I40IW,
+ /* NOTE: Older kernels wrongly use 0 for the uverbs_abi_ver */
+ .uverbs_abi_ver = I40IW_ABI_VER,
+
.alloc_hw_stats = i40iw_alloc_hw_stats,
.alloc_mr = i40iw_alloc_mr,
.alloc_pd = i40iw_alloc_pd,
@@ -2694,6 +2686,7 @@ static const struct ib_device_ops i40iw_dev_ops = {
.reg_user_mr = i40iw_reg_user_mr,
.req_notify_cq = i40iw_req_notify_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_cq, i40iw_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, i40iw_ucontext, ibucontext),
};
@@ -2712,7 +2705,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
i40iw_pr_err("iwdev == NULL\n");
return NULL;
}
- iwibdev->ibdev.owner = THIS_MODULE;
iwdev->iwibdev = iwibdev;
iwibdev->iwdev = iwdev;
@@ -2771,9 +2763,6 @@ void i40iw_port_ibevent(struct i40iw_device *iwdev)
*/
void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
{
- if (!iwibdev)
- return;
-
ib_unregister_device(&iwibdev->ibdev);
wait_event_timeout(iwibdev->iwdev->close_wq,
!atomic64_read(&iwibdev->iwdev->use_count),
@@ -2795,7 +2784,6 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
return -ENOMEM;
iwibdev = iwdev->iwibdev;
rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
- iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
if (ret)
goto error;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 022a0b4ea452..a7d238d312f0 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -172,14 +172,14 @@ err_buf:
}
#define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct mlx4_ib_cq *cq;
+ struct mlx4_ib_cq *cq = to_mcq(ibcq);
struct mlx4_uar *uar;
void *buf_addr;
int err;
@@ -187,14 +187,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
udata, struct mlx4_ib_ucontext, ibucontext);
if (entries < 1 || entries > dev->dev->caps.max_cqes)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
- return ERR_PTR(-EINVAL);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
entries = roundup_pow_of_two(entries + 1);
cq->ibcq.cqe = entries - 1;
@@ -269,7 +265,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_cq_free;
}
- return &cq->ibcq;
+ return 0;
err_cq_free:
mlx4_cq_free(dev->dev, &cq->mcq);
@@ -281,19 +277,15 @@ err_dbmap:
err_mtt:
mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
- if (udata)
- ib_umem_release(cq->umem);
- else
+ ib_umem_release(cq->umem);
+ if (!udata)
mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_db:
if (!udata)
mlx4_db_free(dev->dev, &cq->db);
-
err_cq:
- kfree(cq);
-
- return ERR_PTR(err);
+ return err;
}
static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
@@ -475,18 +467,15 @@ err_buf:
kfree(cq->resize_buf);
cq->resize_buf = NULL;
- if (cq->resize_umem) {
- ib_umem_release(cq->resize_umem);
- cq->resize_umem = NULL;
- }
-
+ ib_umem_release(cq->resize_umem);
+ cq->resize_umem = NULL;
out:
mutex_unlock(&cq->resize_mutex);
return err;
}
-int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
struct mlx4_ib_cq *mcq = to_mcq(cq);
@@ -501,15 +490,11 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
struct mlx4_ib_ucontext,
ibucontext),
&mcq->db);
- ib_umem_release(mcq->umem);
} else {
mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
mlx4_db_free(dev->dev, &mcq->db);
}
-
- kfree(mcq);
-
- return 0;
+ ib_umem_release(mcq->umem);
}
static void dump_cqe(void *cqe)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 25d09d53b51c..8790101facb7 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1089,7 +1089,8 @@ static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (!dev->ib_active)
return -EAGAIN;
- if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+ if (ibdev->ops.uverbs_abi_ver ==
+ MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
resp_v3.qp_tab_size = dev->dev->caps.num_qps;
resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
@@ -1111,7 +1112,7 @@ static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
INIT_LIST_HEAD(&context->wqn_ranges_list);
mutex_init(&context->wqn_ranges_mutex);
- if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+ if (ibdev->ops.uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
@@ -2509,6 +2510,10 @@ static void get_fw_ver_str(struct ib_device *device, char *str)
}
static const struct ib_device_ops mlx4_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MLX4,
+ .uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION,
+
.add_gid = mlx4_ib_add_gid,
.alloc_mr = mlx4_ib_alloc_mr,
.alloc_pd = mlx4_ib_alloc_pd,
@@ -2560,6 +2565,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
.resize_cq = mlx4_ib_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
@@ -2642,7 +2648,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->dev = dev;
ibdev->bond_next_port = 0;
- ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
ibdev->num_ports = num_ports;
@@ -2651,11 +2656,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
- if (dev->caps.userspace_caps)
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
- else
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
-
ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -2729,6 +2729,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
}
+ if (!dev->caps.userspace_caps)
+ ibdev->ib_dev.ops.uverbs_abi_ver =
+ MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
+
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
@@ -2839,7 +2843,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_steer_free_bitmap;
rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
- ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
if (ib_register_device(&ibdev->ib_dev, "mlx4_%d"))
goto err_diag_counters;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 26897102057d..eb53bb4c0c91 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -743,10 +743,9 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
@@ -907,7 +906,7 @@ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+void mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 355205a28544..753479285ce9 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -258,7 +258,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
int *num_of_mtts)
{
u64 block_shift = MLX4_MAX_MTT_SHIFT;
- u64 min_shift = umem->page_shift;
+ u64 min_shift = PAGE_SHIFT;
u64 last_block_aligned_end = 0;
u64 current_block_start = 0;
u64 first_block_start = 0;
@@ -295,8 +295,8 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
* in access to the wrong data.
*/
misalignment_bits =
- (start_va & (~(((u64)(BIT(umem->page_shift))) - 1ULL)))
- ^ current_block_start;
+ (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^
+ current_block_start;
block_shift = min(alignment_of(misalignment_bits),
block_shift);
}
@@ -368,8 +368,7 @@ end:
}
static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
- u64 length, u64 virt_addr,
- int access_flags)
+ u64 length, int access_flags)
{
/*
* Force registering the memory as writable if the underlying pages
@@ -415,8 +414,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem =
- mlx4_get_umem_mr(udata, start, length, virt_addr, access_flags);
+ mr->umem = mlx4_get_umem_mr(udata, start, length, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
@@ -505,7 +503,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem);
- mmr->umem = mlx4_get_umem_mr(udata, start, length, virt_addr,
+ mmr->umem = mlx4_get_umem_mr(udata, start, length,
mr_access_flags);
if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem);
@@ -514,7 +512,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
goto release_mpt_entry;
}
n = ib_umem_page_count(mmr->umem);
- shift = mmr->umem->page_shift;
+ shift = PAGE_SHIFT;
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
virt_addr, length, n, shift,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5221c0794d1d..82aff2f2fdc2 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1207,10 +1207,9 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
err_buf:
- if (qp->umem)
- ib_umem_release(qp->umem);
- else
+ if (!qp->umem)
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
+ ib_umem_release(qp->umem);
err_db:
if (!udata && qp_has_rq(init_attr))
@@ -1421,7 +1420,6 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_ib_db_unmap_user(mcontext, &qp->db);
}
- ib_umem_release(qp->umem);
} else {
kvfree(qp->sq.wrid);
kvfree(qp->rq.wrid);
@@ -1432,6 +1430,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
if (qp->rq.wqe_cnt)
mlx4_db_free(dev->dev, &qp->db);
}
+ ib_umem_release(qp->umem);
del_gid_entries(qp);
}
@@ -4248,7 +4247,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr,
return err;
}
-int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibwq->device);
struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
@@ -4259,8 +4258,6 @@ int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata);
kfree(qp);
-
- return 0;
}
struct ib_rwq_ind_table
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 4bf2946b9759..848db7264cc9 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -115,7 +115,7 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
return PTR_ERR(srq->umem);
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
- srq->umem->page_shift, &srq->mtt);
+ PAGE_SHIFT, &srq->mtt);
if (err)
goto err_buf;
@@ -204,10 +204,9 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
err_buf:
- if (srq->umem)
- ib_umem_release(srq->umem);
- else
+ if (!srq->umem)
mlx4_buf_free(dev->dev, buf_size, &srq->buf);
+ ib_umem_release(srq->umem);
err_db:
if (!udata)
@@ -275,13 +274,13 @@ void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
struct mlx4_ib_ucontext,
ibucontext),
&msrq->db);
- ib_umem_release(msrq->umem);
} else {
kvfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_db_free(dev->dev, &msrq->db);
}
+ ib_umem_release(msrq->umem);
}
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 4efbbd2fce0c..45f48cde6b9d 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -884,15 +884,15 @@ static void notify_soft_wc_handler(struct work_struct *work)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
- struct mlx5_ib_cq *cq;
int uninitialized_var(index);
int uninitialized_var(inlen);
u32 *cqb = NULL;
@@ -904,18 +904,14 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
if (entries < 0 ||
(entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (check_cq_create_flags(attr->flags))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
- return ERR_PTR(-EINVAL);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
cq->ibcq.cqe = entries - 1;
mutex_init(&cq->resize_mutex);
@@ -930,13 +926,13 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
&index, &inlen);
if (err)
- goto err_create;
+ return err;
} else {
cqe_size = cache_line_size() == 128 ? 128 : 64;
err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
&index, &inlen);
if (err)
- goto err_create;
+ return err;
INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
@@ -981,7 +977,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
kvfree(cqb);
- return &cq->ibcq;
+ return 0;
err_cmd:
mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
@@ -992,14 +988,10 @@ err_cqb:
destroy_cq_user(cq, udata);
else
destroy_cq_kernel(dev, cq);
-
-err_create:
- kfree(cq);
-
- return ERR_PTR(err);
+ return err;
}
-int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
@@ -1009,10 +1001,6 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
destroy_cq_user(mcq, udata);
else
destroy_cq_kernel(dev, mcq);
-
- kfree(mcq);
-
- return 0;
}
static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
@@ -1138,11 +1126,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
return 0;
}
-static void un_resize_user(struct mlx5_ib_cq *cq)
-{
- ib_umem_release(cq->resize_umem);
-}
-
static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, int cqe_size)
{
@@ -1165,12 +1148,6 @@ ex:
return err;
}
-static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
-{
- free_cq_buf(dev, cq->resize_buf);
- cq->resize_buf = NULL;
-}
-
static int copy_resize_cqes(struct mlx5_ib_cq *cq)
{
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
@@ -1351,10 +1328,11 @@ ex_alloc:
kvfree(in);
ex_resize:
- if (udata)
- un_resize_user(cq);
- else
- un_resize_kernel(dev, cq);
+ ib_umem_release(cq->resize_umem);
+ if (!udata) {
+ free_cq_buf(dev, cq->resize_buf);
+ cq->resize_buf = NULL;
+ }
ex:
mutex_unlock(&cq->resize_mutex);
return err;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 931f587dfb8f..ec4370f99381 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -14,13 +14,17 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include <linux/xarray.h>
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
+static void dispatch_event_fd(struct list_head *fd_list, const void *data);
+
enum devx_obj_flags {
DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
DEVX_OBJ_FLAGS_DCT = 1 << 1,
+ DEVX_OBJ_FLAGS_CQ = 1 << 2,
};
struct devx_async_data {
@@ -33,9 +37,61 @@ struct devx_async_data {
struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
};
+struct devx_async_event_data {
+ struct list_head list; /* headed in ev_file->event_list */
+ struct mlx5_ib_uapi_devx_async_event_hdr hdr;
+};
+
+/* first level XA value data structure */
+struct devx_event {
+ struct xarray object_ids; /* second XA level, Key = object id */
+ struct list_head unaffiliated_list;
+};
+
+/* second level XA value data structure */
+struct devx_obj_event {
+ struct rcu_head rcu;
+ struct list_head obj_sub_list;
+};
+
+struct devx_event_subscription {
+ struct list_head file_list; /* headed in ev_file->
+ * subscribed_events_list
+ */
+ struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
+ * devx_obj_event->obj_sub_list
+ */
+ struct list_head obj_list; /* headed in devx_object */
+ struct list_head event_list; /* headed in ev_file->event_list or in
+ * temp list via subscription
+ */
+
+ u8 is_cleaned:1;
+ u32 xa_key_level1;
+ u32 xa_key_level2;
+ struct rcu_head rcu;
+ u64 cookie;
+ struct devx_async_event_file *ev_file;
+ struct file *filp; /* Upon hot unplug we need a direct access to */
+ struct eventfd_ctx *eventfd;
+};
+
+struct devx_async_event_file {
+ struct ib_uobject uobj;
+ /* Head of events that are subscribed to this FD */
+ struct list_head subscribed_events_list;
+ spinlock_t lock;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+ struct mlx5_ib_dev *dev;
+ u8 omit_data:1;
+ u8 is_overflow_err:1;
+ u8 is_destroyed:1;
+};
+
#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
struct devx_obj {
- struct mlx5_core_dev *mdev;
+ struct mlx5_ib_dev *ib_dev;
u64 obj_id;
u32 dinlen; /* destroy inbox length */
u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
@@ -43,7 +99,9 @@ struct devx_obj {
union {
struct mlx5_ib_devx_mr devx_mr;
struct mlx5_core_dct core_dct;
+ struct mlx5_core_cq core_cq;
};
+ struct list_head event_sub; /* holds devx_event_subscription entries */
};
struct devx_umem {
@@ -149,6 +207,127 @@ bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
return false;
}
+static bool is_legacy_unaffiliated_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_legacy_obj_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_COMP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static u16 get_legacy_obj_type(u16 opcode)
+{
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_EVENT_QUEUE_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_EVENT_QUEUE_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_EVENT_QUEUE_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
+{
+ u16 opcode;
+
+ opcode = (obj->obj_id >> 32) & 0xffff;
+
+ if (is_legacy_obj_event_num(event_num))
+ return get_legacy_obj_type(opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ return (obj->obj_id >> 48);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_OBJ_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_OBJ_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_OBJ_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_OBJ_TYPE_DCT;
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_OBJ_TYPE_TIR;
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_OBJ_TYPE_TIS;
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_OBJ_TYPE_PSV;
+ case MLX5_OBJ_TYPE_MKEY:
+ return MLX5_OBJ_TYPE_MKEY;
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_OBJ_TYPE_RMP;
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_OBJ_TYPE_XRC_SRQ;
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_OBJ_TYPE_XRQ;
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_OBJ_TYPE_RQT;
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_OBJ_TYPE_FLOW_COUNTER;
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_OBJ_TYPE_CQ;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
+{
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return eqe->data.qp_srq.type;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ return 0;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
+ }
+}
+
+static u32 get_dec_obj_id(u64 obj_id)
+{
+ return (obj_id & 0xffffffff);
+}
+
/*
* As the obj_id in the firmware is not globally unique the object type
* must be considered upon checking for a valid object id.
@@ -715,12 +894,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
return c->devx_uid;
}
-static bool devx_is_general_cmd(void *in)
+
+static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
- if (opcode >= MLX5_CMD_OP_GENERAL_START &&
- opcode < MLX5_CMD_OP_GENERAL_END)
+ /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
+ if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
+ MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
+ (opcode >= MLX5_CMD_OP_GENERAL_START &&
+ opcode < MLX5_CMD_OP_GENERAL_END))
return true;
switch (opcode) {
@@ -846,7 +1029,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
return uid;
/* Only white list of some general HCA commands are allowed for this method. */
- if (!devx_is_general_cmd(cmd_in))
+ if (!devx_is_general_cmd(cmd_in, dev))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1111,33 +1294,72 @@ static void devx_free_indirect_mkey(struct rcu_head *rcu)
*/
static void devx_cleanup_mkey(struct devx_obj *obj)
{
- xa_erase(&obj->mdev->priv.mkey_table,
+ xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
mlx5_base_mkey(obj->devx_mr.mmkey.key));
}
+static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
+ struct devx_event_subscription *sub)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ if (sub->is_cleaned)
+ return;
+
+ sub->is_cleaned = 1;
+ list_del_rcu(&sub->xa_list);
+
+ if (list_empty(&sub->obj_list))
+ return;
+
+ list_del_rcu(&sub->obj_list);
+ /* check whether key level 1 for this obj_sub_list is empty */
+ event = xa_load(&dev->devx_event_table.event_xa,
+ sub->xa_key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ sub->xa_key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
static int devx_obj_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
{
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_devx_event_table *devx_event_table;
struct devx_obj *obj = uobject->object;
+ struct devx_event_subscription *sub_entry, *tmp;
+ struct mlx5_ib_dev *dev;
int ret;
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
devx_cleanup_mkey(obj);
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
- ret = mlx5_core_destroy_dct(obj->mdev, &obj->core_dct);
+ ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
else
- ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out,
- sizeof(out));
+ ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
+ obj->dinlen, out, sizeof(out));
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- struct mlx5_ib_dev *dev =
- mlx5_udata_to_mdev(&attrs->driver_udata);
+ devx_event_table = &dev->devx_event_table;
+
+ mutex_lock(&devx_event_table->event_xa_lock);
+ list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
+ devx_cleanup_subscription(dev, sub_entry);
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
devx_free_indirect_mkey);
return ret;
@@ -1147,6 +1369,29 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
return ret;
}
+static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
+{
+ struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
+ struct mlx5_devx_event_table *table;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u32 obj_id = mcq->cqn;
+
+ table = &obj->ib_dev->devx_event_table;
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
+ if (!event)
+ goto out;
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event)
+ goto out;
+
+ dispatch_event_fd(&obj_event->obj_sub_list, eqe);
+out:
+ rcu_read_unlock();
+}
+
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
struct uverbs_attr_bundle *attrs)
{
@@ -1169,6 +1414,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
u32 obj_id;
u16 opcode;
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1198,6 +1446,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
err = mlx5_core_create_dct(dev->mdev, &obj->core_dct,
cmd_in, cmd_in_len,
cmd_out, cmd_out_len);
+ } else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
+ obj->flags |= DEVX_OBJ_FLAGS_CQ;
+ obj->core_cq.comp = devx_cq_comp;
+ err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
+ cmd_in, cmd_in_len, cmd_out,
+ cmd_out_len);
} else {
err = mlx5_cmd_exec(dev->mdev, cmd_in,
cmd_in_len,
@@ -1208,7 +1462,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
goto obj_free;
uobj->object = obj;
- obj->mdev = dev->mdev;
+ INIT_LIST_HEAD(&obj->event_sub);
+ obj->ib_dev = dev;
devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
@@ -1235,9 +1490,11 @@ err_copy:
devx_cleanup_mkey(obj);
obj_destroy:
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
- mlx5_core_destroy_dct(obj->mdev, &obj->core_dct);
+ mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
else
- mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out,
+ mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
sizeof(out));
obj_free:
kfree(obj);
@@ -1259,6 +1516,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
int err;
int uid;
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1301,6 +1561,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
int uid;
struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1365,6 +1628,38 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
return 0;
}
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
+ struct devx_async_event_file *ev_file;
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ u32 flags;
+ int err;
+
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
+
+ if (err)
+ return err;
+
+ ev_file = container_of(uobj, struct devx_async_event_file,
+ uobj);
+ spin_lock_init(&ev_file->lock);
+ INIT_LIST_HEAD(&ev_file->event_list);
+ init_waitqueue_head(&ev_file->poll_wait);
+ if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
+ ev_file->omit_data = 1;
+ INIT_LIST_HEAD(&ev_file->subscribed_events_list);
+ ev_file->dev = dev;
+ get_device(&dev->ib_dev.dev);
+ return 0;
+}
+
static void devx_query_callback(int status, struct mlx5_async_work *context)
{
struct devx_async_data *async_data =
@@ -1406,6 +1701,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
struct devx_async_cmd_event_file *ev_file;
struct devx_async_data *async_data;
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1474,6 +1772,331 @@ sub_bytes:
return err;
}
+static void
+subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ /* Level 1 is valid for future use, no need to free */
+ if (!is_level2)
+ return;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids,
+ key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
+static int
+subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_obj_event *obj_event;
+ struct devx_event *event;
+ int err;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ if (!event) {
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&event->unaffiliated_list);
+ xa_init(&event->object_ids);
+
+ err = xa_insert(&devx_event_table->event_xa,
+ key_level1,
+ event,
+ GFP_KERNEL);
+ if (err) {
+ kfree(event);
+ return err;
+ }
+ }
+
+ if (!is_level2)
+ return 0;
+
+ obj_event = xa_load(&event->object_ids, key_level2);
+ if (!obj_event) {
+ obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
+ if (!obj_event)
+ /* Level1 is valid for future use, no need to free */
+ return -ENOMEM;
+
+ err = xa_insert(&event->object_ids,
+ key_level2,
+ obj_event,
+ GFP_KERNEL);
+ if (err)
+ return err;
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
+ }
+
+ return 0;
+}
+
+static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ int i;
+
+ for (i = 0; i < num_events; i++) {
+ if (obj) {
+ if (!is_legacy_obj_event_num(event_type_num_list[i]))
+ return false;
+ } else if (!is_legacy_unaffiliated_event_num(
+ event_type_num_list[i])) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#define MAX_SUPP_EVENT_NUM 255
+static bool is_valid_events(struct mlx5_core_dev *dev,
+ int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ __be64 *aff_events;
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+ int i;
+
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ aff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_affiliated_events);
+ unaff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ } else {
+ return is_valid_events_legacy(num_events, event_type_num_list,
+ obj);
+ }
+
+ for (i = 0; i < num_events; i++) {
+ if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
+ return false;
+
+ mask_entry = event_type_num_list[i] / 64;
+ mask_bit = event_type_num_list[i] % 64;
+
+ if (obj) {
+ /* CQ completion */
+ if (event_type_num_list[i] == 0)
+ continue;
+
+ if (!(be64_to_cpu(aff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+
+ continue;
+ }
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+ }
+
+ return true;
+}
+
+#define MAX_NUM_EVENTS 16
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
+ attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ struct ib_uobject *fd_uobj;
+ struct devx_obj *obj = NULL;
+ struct devx_async_event_file *ev_file;
+ struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
+ u16 *event_type_num_list;
+ struct devx_event_subscription *event_sub, *tmp_sub;
+ struct list_head sub_list;
+ int redirect_fd;
+ bool use_eventfd = false;
+ int num_events;
+ int num_alloc_xa_entries = 0;
+ u16 obj_type = 0;
+ u64 cookie = 0;
+ u32 obj_id = 0;
+ int err;
+ int i;
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!IS_ERR(devx_uobj)) {
+ obj = (struct devx_obj *)devx_uobj->object;
+ if (obj)
+ obj_id = get_dec_obj_id(obj->obj_id);
+ }
+
+ fd_uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
+ if (IS_ERR(fd_uobj))
+ return PTR_ERR(fd_uobj);
+
+ ev_file = container_of(fd_uobj, struct devx_async_event_file,
+ uobj);
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
+ err = uverbs_copy_from(&redirect_fd, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
+ if (err)
+ return err;
+
+ use_eventfd = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
+ if (use_eventfd)
+ return -EINVAL;
+
+ err = uverbs_copy_from(&cookie, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
+ if (err)
+ return err;
+ }
+
+ num_events = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ sizeof(u16));
+
+ if (num_events < 0)
+ return num_events;
+
+ if (num_events > MAX_NUM_EVENTS)
+ return -EINVAL;
+
+ event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
+
+ if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&sub_list);
+
+ /* Protect from concurrent subscriptions to same XA entries to allow
+ * both to succeed
+ */
+ mutex_lock(&devx_event_table->event_xa_lock);
+ for (i = 0; i < num_events; i++) {
+ u32 key_level1;
+
+ if (obj)
+ obj_type = get_dec_obj_type(obj,
+ event_type_num_list[i]);
+ key_level1 = event_type_num_list[i] | obj_type << 16;
+
+ err = subscribe_event_xa_alloc(devx_event_table,
+ key_level1,
+ obj,
+ obj_id);
+ if (err)
+ goto err;
+
+ num_alloc_xa_entries++;
+ event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
+ if (!event_sub)
+ goto err;
+
+ list_add_tail(&event_sub->event_list, &sub_list);
+ if (use_eventfd) {
+ event_sub->eventfd =
+ eventfd_ctx_fdget(redirect_fd);
+
+ if (IS_ERR(event_sub)) {
+ err = PTR_ERR(event_sub->eventfd);
+ event_sub->eventfd = NULL;
+ goto err;
+ }
+ }
+
+ event_sub->cookie = cookie;
+ event_sub->ev_file = ev_file;
+ event_sub->filp = fd_uobj->object;
+ /* May be needed upon cleanup the devx object/subscription */
+ event_sub->xa_key_level1 = key_level1;
+ event_sub->xa_key_level2 = obj_id;
+ INIT_LIST_HEAD(&event_sub->obj_list);
+ }
+
+ /* Once all the allocations and the XA data insertions were done we
+ * can go ahead and add all the subscriptions to the relevant lists
+ * without concern of a failure.
+ */
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+
+ list_del_init(&event_sub->event_list);
+
+ spin_lock_irq(&ev_file->lock);
+ list_add_tail_rcu(&event_sub->file_list,
+ &ev_file->subscribed_events_list);
+ spin_unlock_irq(&ev_file->lock);
+
+ event = xa_load(&devx_event_table->event_xa,
+ event_sub->xa_key_level1);
+ WARN_ON(!event);
+
+ if (!obj) {
+ list_add_tail_rcu(&event_sub->xa_list,
+ &event->unaffiliated_list);
+ continue;
+ }
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ WARN_ON(!obj_event);
+ list_add_tail_rcu(&event_sub->xa_list,
+ &obj_event->obj_sub_list);
+ list_add_tail_rcu(&event_sub->obj_list,
+ &obj->event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return 0;
+
+err:
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ list_del(&event_sub->event_list);
+
+ subscribe_event_xa_dealloc(devx_event_table,
+ event_sub->xa_key_level1,
+ obj,
+ obj_id);
+
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+
+ kfree(event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return err;
+}
+
static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
struct uverbs_attr_bundle *attrs,
struct devx_umem *obj)
@@ -1621,6 +2244,203 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
return 0;
}
+static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
+ unsigned long event_type)
+{
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+
+ if (!MLX5_CAP_GEN(dev, event_cap))
+ return is_legacy_unaffiliated_event_num(event_type);
+
+ unaff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
+
+ mask_entry = event_type / 64;
+ mask_bit = event_type % 64;
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
+ return false;
+
+ return true;
+}
+
+static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
+{
+ struct mlx5_eqe *eqe = data;
+ u32 obj_id = 0;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ break;
+ default:
+ obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
+ break;
+ }
+
+ return obj_id;
+}
+
+static int deliver_event(struct devx_event_subscription *event_sub,
+ const void *data)
+{
+ struct devx_async_event_file *ev_file;
+ struct devx_async_event_data *event_data;
+ unsigned long flags;
+
+ ev_file = event_sub->ev_file;
+
+ if (ev_file->omit_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ if (!list_empty(&event_sub->event_list)) {
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return 0;
+ }
+
+ list_add_tail(&event_sub->event_list, &ev_file->event_list);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+ return 0;
+ }
+
+ event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
+ GFP_ATOMIC);
+ if (!event_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ ev_file->is_overflow_err = 1;
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return -ENOMEM;
+ }
+
+ event_data->hdr.cookie = event_sub->cookie;
+ memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
+
+ spin_lock_irqsave(&ev_file->lock, flags);
+ list_add_tail(&event_data->list, &ev_file->event_list);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+
+ return 0;
+}
+
+static void dispatch_event_fd(struct list_head *fd_list,
+ const void *data)
+{
+ struct devx_event_subscription *item;
+
+ list_for_each_entry_rcu(item, fd_list, xa_list) {
+ if (!get_file_rcu(item->filp))
+ continue;
+
+ if (item->eventfd) {
+ eventfd_signal(item->eventfd, 1);
+ fput(item->filp);
+ continue;
+ }
+
+ deliver_event(item, data);
+ fput(item->filp);
+ }
+}
+
+static int devx_event_notifier(struct notifier_block *nb,
+ unsigned long event_type, void *data)
+{
+ struct mlx5_devx_event_table *table;
+ struct mlx5_ib_dev *dev;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u16 obj_type = 0;
+ bool is_unaffiliated;
+ u32 obj_id;
+
+ /* Explicit filtering to kernel events which may occur frequently */
+ if (event_type == MLX5_EVENT_TYPE_CMD ||
+ event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
+ return NOTIFY_OK;
+
+ table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb);
+ dev = container_of(table, struct mlx5_ib_dev, devx_event_table);
+ is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
+
+ if (!is_unaffiliated)
+ obj_type = get_event_obj_type(event_type, data);
+
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, event_type | (obj_type << 16));
+ if (!event) {
+ rcu_read_unlock();
+ return NOTIFY_DONE;
+ }
+
+ if (is_unaffiliated) {
+ dispatch_event_fd(&event->unaffiliated_list, data);
+ rcu_read_unlock();
+ return NOTIFY_OK;
+ }
+
+ obj_id = devx_get_obj_id_from_event(event_type, data);
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event) {
+ rcu_read_unlock();
+ return NOTIFY_DONE;
+ }
+
+ dispatch_event_fd(&obj_event->obj_sub_list, data);
+
+ rcu_read_unlock();
+ return NOTIFY_OK;
+}
+
+void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+
+ xa_init(&table->event_xa);
+ mutex_init(&table->event_xa_lock);
+ MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
+ mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+}
+
+void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ struct devx_event_subscription *sub, *tmp;
+ struct devx_event *event;
+ void *entry;
+ unsigned long id;
+
+ mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ xa_for_each(&table->event_xa, id, entry) {
+ event = entry;
+ list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
+ xa_list)
+ devx_cleanup_subscription(dev, sub);
+ kfree(entry);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+ xa_destroy(&table->event_xa);
+}
+
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
@@ -1719,6 +2539,149 @@ static const struct file_operations devx_async_cmd_event_fops = {
.llseek = no_llseek,
};
+static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ struct devx_event_subscription *event_sub;
+ struct devx_async_event_data *uninitialized_var(event);
+ int ret = 0;
+ size_t eventsz;
+ bool omit_data;
+ void *event_data;
+
+ omit_data = ev_file->omit_data;
+
+ spin_lock_irq(&ev_file->lock);
+
+ if (ev_file->is_overflow_err) {
+ ev_file->is_overflow_err = 0;
+ spin_unlock_irq(&ev_file->lock);
+ return -EOVERFLOW;
+ }
+
+ if (ev_file->is_destroyed) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EIO;
+ }
+
+ while (list_empty(&ev_file->event_list)) {
+ spin_unlock_irq(&ev_file->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(ev_file->poll_wait,
+ (!list_empty(&ev_file->event_list) ||
+ ev_file->is_destroyed))) {
+ return -ERESTARTSYS;
+ }
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EIO;
+ }
+ }
+
+ if (omit_data) {
+ event_sub = list_first_entry(&ev_file->event_list,
+ struct devx_event_subscription,
+ event_list);
+ eventsz = sizeof(event_sub->cookie);
+ event_data = &event_sub->cookie;
+ } else {
+ event = list_first_entry(&ev_file->event_list,
+ struct devx_async_event_data, list);
+ eventsz = sizeof(struct mlx5_eqe) +
+ sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
+ event_data = &event->hdr;
+ }
+
+ if (eventsz > count) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EINVAL;
+ }
+
+ if (omit_data)
+ list_del_init(&event_sub->event_list);
+ else
+ list_del(&event->list);
+
+ spin_unlock_irq(&ev_file->lock);
+
+ if (copy_to_user(buf, event_data, eventsz))
+ /* This points to an application issue, not a kernel concern */
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+
+ if (!omit_data)
+ kfree(event);
+ return ret;
+}
+
+static __poll_t devx_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ __poll_t pollflags = 0;
+
+ poll_wait(filp, &ev_file->poll_wait, wait);
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed)
+ pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+ else if (!list_empty(&ev_file->event_list))
+ pollflags = EPOLLIN | EPOLLRDNORM;
+ spin_unlock_irq(&ev_file->lock);
+
+ return pollflags;
+}
+
+static int devx_async_event_close(struct inode *inode, struct file *filp)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ struct devx_event_subscription *event_sub, *event_sub_tmp;
+ struct devx_async_event_data *entry, *tmp;
+
+ mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock);
+ /* delete the subscriptions which are related to this FD */
+ list_for_each_entry_safe(event_sub, event_sub_tmp,
+ &ev_file->subscribed_events_list, file_list) {
+ devx_cleanup_subscription(ev_file->dev, event_sub);
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+
+ list_del_rcu(&event_sub->file_list);
+ /* subscription may not be used by the read API any more */
+ kfree_rcu(event_sub, rcu);
+ }
+
+ mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock);
+
+ /* free the pending events allocation */
+ if (!ev_file->omit_data) {
+ spin_lock_irq(&ev_file->lock);
+ list_for_each_entry_safe(entry, tmp,
+ &ev_file->event_list, list)
+ kfree(entry); /* read can't come any more */
+ spin_unlock_irq(&ev_file->lock);
+ }
+
+ uverbs_close_fd(filp);
+ put_device(&ev_file->dev->ib_dev.dev);
+ return 0;
+}
+
+static const struct file_operations devx_async_event_fops = {
+ .owner = THIS_MODULE,
+ .read = devx_async_event_read,
+ .poll = devx_async_event_poll,
+ .release = devx_async_event_close,
+ .llseek = no_llseek,
+};
+
static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
@@ -1738,6 +2701,21 @@ static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
return 0;
};
+static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct devx_async_event_file *ev_file =
+ container_of(uobj, struct devx_async_event_file,
+ uobj);
+
+ spin_lock_irq(&ev_file->lock);
+ ev_file->is_destroyed = 1;
+ spin_unlock_irq(&ev_file->lock);
+
+ wake_up_interruptible(&ev_file->poll_wait);
+ return 0;
+};
+
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_UMEM_REG,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
@@ -1869,10 +2847,32 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
- &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
@@ -1903,6 +2903,24 @@ DECLARE_UVERBS_NAMED_OBJECT(
O_RDONLY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ enum mlx5_ib_uapi_devx_create_event_channel_flags,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
+ devx_hot_unplug_async_event_file,
+ &devx_async_event_fops, "[devx_async_event]",
+ O_RDONLY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
+
static bool devx_is_supported(struct ib_device *device)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1923,5 +2941,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
{},
};
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 6c529e6f3a01..348c1df69cdc 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -200,19 +200,33 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
vl_15_dropped);
}
-static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
+static int process_pma_cmd(struct mlx5_ib_dev *dev, u8 port_num,
const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
- int err;
+ struct mlx5_core_dev *mdev;
+ bool native_port = true;
+ u8 mdev_port_num;
void *out_cnt;
+ int err;
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev) {
+ /* Fail to get the native port, likely due to 2nd port is still
+ * unaffiliated. In such case default to 1st port and attached
+ * PF device.
+ */
+ native_port = false;
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
/* Declaring support of extended counters */
if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
struct ib_class_port_info cpi = {};
cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ goto done;
}
if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
@@ -221,11 +235,13 @@ static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
out_cnt = kvzalloc(sz, GFP_KERNEL);
- if (!out_cnt)
- return IB_MAD_RESULT_FAILURE;
+ if (!out_cnt) {
+ err = IB_MAD_RESULT_FAILURE;
+ goto done;
+ }
err = mlx5_core_query_vport_counter(mdev, 0, 0,
- port_num, out_cnt, sz);
+ mdev_port_num, out_cnt, sz);
if (!err)
pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
} else {
@@ -234,20 +250,23 @@ static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
out_cnt = kvzalloc(sz, GFP_KERNEL);
- if (!out_cnt)
- return IB_MAD_RESULT_FAILURE;
+ if (!out_cnt) {
+ err = IB_MAD_RESULT_FAILURE;
+ goto done;
+ }
- err = mlx5_core_query_ib_ppcnt(mdev, port_num,
+ err = mlx5_core_query_ib_ppcnt(mdev, mdev_port_num,
out_cnt, sz);
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
- }
-
+ }
kvfree(out_cnt);
- if (err)
- return IB_MAD_RESULT_FAILURE;
-
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ err = err ? IB_MAD_RESULT_FAILURE :
+ IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+done:
+ if (native_port)
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ return err;
}
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -259,8 +278,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct ib_mad *in_mad = (const struct ib_mad *)in;
struct ib_mad *out_mad = (struct ib_mad *)out;
- struct mlx5_core_dev *mdev;
- u8 mdev_port_num;
int ret;
if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
@@ -269,19 +286,14 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
memset(out_mad->data, 0, sizeof(out_mad->data));
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
- if (!mdev)
- return IB_MAD_RESULT_FAILURE;
-
- if (MLX5_CAP_GEN(mdev, vport_counters) &&
+ if (MLX5_CAP_GEN(dev->mdev, vport_counters) &&
in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
- ret = process_pma_cmd(mdev, mdev_port_num, in_mad, out_mad);
+ ret = process_pma_cmd(dev, port_num, in_mad, out_mad);
} else {
ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
in_mad, out_mad);
}
- mlx5_ib_put_native_port_mdev(dev, port_num);
return ret;
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ba312bf59c7a..c2a5780cb394 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -52,6 +52,7 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/eswitch.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -888,7 +889,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
- props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+ props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
@@ -1008,6 +1009,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len =
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
+ props->max_pi_fast_reg_page_list_len =
+ props->max_fast_reg_page_list_len / 2;
get_atomic_caps_qp(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
@@ -1043,15 +1046,19 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(mdev, tag_matching)) {
- props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
props->tm_caps.max_num_tags =
(1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
- props->tm_caps.flags = IB_TM_CAP_RC;
props->tm_caps.max_ops =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
}
+ if (MLX5_CAP_GEN(mdev, tag_matching) &&
+ MLX5_CAP_GEN(mdev, rndv_offload_rc)) {
+ props->tm_caps.flags = IB_TM_CAP_RNDV_RC;
+ props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) {
props->cq_caps.max_cq_moderation_count =
MLX5_MAX_CQ_COUNT;
@@ -3257,11 +3264,14 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
int max_table_size;
int num_entries;
int num_groups;
+ bool esw_encap;
u32 flags = 0;
int priority;
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
enum mlx5_flow_namespace_type fn_type;
@@ -3274,10 +3284,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
if (ft_type == MLX5_IB_FT_RX) {
fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
prio = &dev->flow_db->prios[priority];
- if (!dev->is_rep &&
+ if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (!dev->is_rep &&
+ if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
reformat_l3_tunnel_to_l2))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
@@ -3287,7 +3297,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
log_max_ft_size));
fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
prio = &dev->flow_db->egress_prios[priority];
- if (!dev->is_rep &&
+ if (!dev->is_rep && !esw_encap &&
MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
}
@@ -3923,6 +3933,7 @@ _get_flow_table(struct mlx5_ib_dev *dev,
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio = NULL;
int max_table_size = 0;
+ bool esw_encap;
u32 flags = 0;
int priority;
@@ -3931,22 +3942,30 @@ _get_flow_table(struct mlx5_ib_dev *dev,
else
priority = ib_prio_to_core_prio(fs_matcher->priority, false);
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
+ reformat_l3_tunnel_to_l2) &&
+ !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
} else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
max_table_size = BIT(
MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
} else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
max_table_size = BIT(
MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+ esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
priority = FDB_BYPASS_PATH;
}
@@ -4711,7 +4730,7 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
int err = -ENOMEM;
struct ib_udata uhw = {.inlen = 0, .outlen = 0};
- pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
+ pprops = kzalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
goto out;
@@ -4725,7 +4744,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
goto out;
}
- memset(pprops, 0, sizeof(*pprops));
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@ -4926,18 +4944,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
if (ret)
goto error0;
- devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL);
- if (IS_ERR(devr->c0)) {
- ret = PTR_ERR(devr->c0);
+ devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq);
+ if (!devr->c0) {
+ ret = -ENOMEM;
goto error1;
}
- devr->c0->device = &dev->ib_dev;
- devr->c0->uobject = NULL;
- devr->c0->comp_handler = NULL;
- devr->c0->event_handler = NULL;
- devr->c0->cq_context = NULL;
+
+ devr->c0->device = &dev->ib_dev;
atomic_set(&devr->c0->usecnt, 0);
+ ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL);
+ if (ret)
+ goto err_create_cq;
+
devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
if (IS_ERR(devr->x0)) {
ret = PTR_ERR(devr->x0);
@@ -5029,6 +5048,8 @@ error3:
mlx5_ib_dealloc_xrcd(devr->x0, NULL);
error2:
mlx5_ib_destroy_cq(devr->c0, NULL);
+err_create_cq:
+ kfree(devr->c0);
error1:
mlx5_ib_dealloc_pd(devr->p0, NULL);
error0:
@@ -5047,6 +5068,7 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
mlx5_ib_dealloc_xrcd(devr->x0, NULL);
mlx5_ib_dealloc_xrcd(devr->x1, NULL);
mlx5_ib_destroy_cq(devr->c0, NULL);
+ kfree(devr->c0);
mlx5_ib_dealloc_pd(devr->p0, NULL);
kfree(devr->p0);
@@ -5459,7 +5481,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
struct mlx5_ib_port *port,
- struct rdma_hw_stats *stats)
+ struct rdma_hw_stats *stats,
+ u16 set_id)
{
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
@@ -5470,9 +5493,7 @@ static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
if (!out)
return -ENOMEM;
- ret = mlx5_core_query_q_counter(mdev,
- port->cnts.set_id, 0,
- out, outlen);
+ ret = mlx5_core_query_q_counter(mdev, set_id, 0, out, outlen);
if (ret)
goto free;
@@ -5532,7 +5553,8 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
port->cnts.num_ext_ppcnt_counters;
/* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
+ ret = mlx5_ib_query_q_counters(dev->mdev, port, stats,
+ port->cnts.set_id);
if (ret)
return ret;
@@ -5568,6 +5590,68 @@ done:
return num_counters;
}
+static struct rdma_hw_stats *
+mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+
+ /* Q counters are in the beginning of all counters */
+ return rdma_alloc_hw_stats_struct(port->cnts.names,
+ port->cnts.num_q_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ struct mlx5_ib_port *port = &dev->port[counter->port - 1];
+
+ return mlx5_ib_query_q_counters(dev->mdev, port,
+ counter->stats, counter->id);
+}
+
+static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ u16 cnt_set_id = 0;
+ int err;
+
+ if (!counter->id) {
+ err = mlx5_cmd_alloc_q_counter(dev->mdev,
+ &cnt_set_id,
+ MLX5_SHARED_RESOURCE_UID);
+ if (err)
+ return err;
+ counter->id = cnt_set_id;
+ }
+
+ err = mlx5_ib_qp_set_counter(qp, counter);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ mlx5_core_dealloc_q_counter(dev->mdev, cnt_set_id);
+ counter->id = 0;
+
+ return err;
+}
+
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
+{
+ return mlx5_ib_qp_set_counter(qp, NULL);
+}
+
+static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+
+ return mlx5_core_dealloc_q_counter(dev->mdev, counter->id);
+}
+
static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params)
@@ -6079,7 +6163,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->ib_dev.phys_port_cnt = dev->num_ports;
@@ -6159,8 +6242,13 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
}
static const struct ib_device_ops mlx5_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MLX5,
+ .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION,
+
.add_gid = mlx5_ib_add_gid,
.alloc_mr = mlx5_ib_alloc_mr,
+ .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
.alloc_pd = mlx5_ib_alloc_pd,
.alloc_ucontext = mlx5_ib_alloc_ucontext,
.attach_mcast = mlx5_ib_mcg_attach,
@@ -6190,6 +6278,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
.map_mr_sg = mlx5_ib_map_mr_sg,
+ .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
.mmap = mlx5_ib_mmap,
.modify_cq = mlx5_ib_modify_cq,
.modify_device = mlx5_ib_modify_device,
@@ -6214,6 +6303,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.resize_cq = mlx5_ib_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
@@ -6256,7 +6346,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
int err;
- dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -6325,7 +6414,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
if (mlx5_accel_ipsec_device_caps(dev->mdev) &
MLX5_ACCEL_IPSEC_CAP_DEVICE)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
- dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
@@ -6340,6 +6428,8 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
mutex_init(&dev->lb.mutex);
+ dev->ib_dev.use_cq_dim = true;
+
return 0;
}
@@ -6487,6 +6577,11 @@ static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
.alloc_hw_stats = mlx5_ib_alloc_hw_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
};
static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
@@ -6607,15 +6702,19 @@ static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
int uid;
uid = mlx5_ib_devx_create(dev, false);
- if (uid > 0)
+ if (uid > 0) {
dev->devx_whitelist_uid = uid;
+ mlx5_ib_devx_init_event_table(dev);
+ }
return 0;
}
static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
{
- if (dev->devx_whitelist_uid)
+ if (dev->devx_whitelist_uid) {
+ mlx5_ib_devx_cleanup_event_table(dev);
mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
+ }
}
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 9f90be296ee0..fe1a76d8531c 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -55,9 +55,10 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
int i = 0;
struct scatterlist *sg;
int entry;
- unsigned long page_shift = umem->page_shift;
if (umem->is_odp) {
+ unsigned int page_shift = to_ib_umem_odp(umem)->page_shift;
+
*ncont = ib_umem_page_count(umem);
*count = *ncont << (page_shift - PAGE_SHIFT);
*shift = page_shift;
@@ -67,15 +68,15 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
return;
}
- addr = addr >> page_shift;
+ addr = addr >> PAGE_SHIFT;
tmp = (unsigned long)addr;
m = find_first_bit(&tmp, BITS_PER_LONG);
if (max_page_shift)
- m = min_t(unsigned long, max_page_shift - page_shift, m);
+ m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> page_shift;
- pfn = sg_dma_address(sg) >> page_shift;
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ pfn = sg_dma_address(sg) >> PAGE_SHIFT;
if (base + p != pfn) {
/* If either the offset or the new
* base are unaligned update m
@@ -107,7 +108,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
*ncont = 0;
}
- *shift = page_shift + m;
+ *shift = PAGE_SHIFT + m;
*count = i;
}
@@ -140,8 +141,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, size_t offset, size_t num_pages,
__be64 *pas, int access_flags)
{
- unsigned long umem_page_shift = umem->page_shift;
- int shift = page_shift - umem_page_shift;
+ int shift = page_shift - PAGE_SHIFT;
int mask = (1 << shift) - 1;
int i, k, idx;
u64 cur = 0;
@@ -165,7 +165,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> umem_page_shift;
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
base = sg_dma_address(sg);
/* Skip elements below offset */
@@ -184,7 +184,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
for (; k < len; k++) {
if (!(i & mask)) {
- cur = base + (k << umem_page_shift);
+ cur = base + (k << PAGE_SHIFT);
cur |= access_flags;
idx = (i >> shift) - offset;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index ee73dc122d28..c482f19958b3 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -431,9 +431,6 @@ struct mlx5_ib_qp {
int create_type;
- /* Store signature errors */
- bool signature_en;
-
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
@@ -442,6 +439,10 @@ struct mlx5_ib_qp {
u32 flags_en;
/* storage for qp sub type when core qp type is IB_QPT_DRIVER */
enum ib_qp_type qp_sub_type;
+ /* A flag to indicate if there's a new counter is configured
+ * but not take effective
+ */
+ u32 counter_pending;
};
struct mlx5_ib_cq_buf {
@@ -587,6 +588,9 @@ struct mlx5_ib_mr {
void *descs;
dma_addr_t desc_map;
int ndescs;
+ int data_length;
+ int meta_ndescs;
+ int meta_length;
int max_descs;
int desc_size;
int access_mode;
@@ -605,6 +609,13 @@ struct mlx5_ib_mr {
int access_flags; /* Needed for rereg MR */
struct mlx5_ib_mr *parent;
+ /* Needed for IB_MR_TYPE_INTEGRITY */
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr *klm_mr;
+ struct mlx5_ib_mr *mtt_mr;
+ u64 data_iova;
+ u64 pi_iova;
+
atomic_t num_leaf_free;
wait_queue_head_t q_leaf_free;
struct mlx5_async_work cb_work;
@@ -929,6 +940,13 @@ struct mlx5_ib_pf_eq {
mempool_t *pool;
};
+struct mlx5_devx_event_table {
+ struct mlx5_nb devx_nb;
+ /* serialize updating the event_xa */
+ struct mutex event_xa_lock;
+ struct xarray event_xa;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
@@ -978,6 +996,7 @@ struct mlx5_ib_dev {
u16 devx_whitelist_uid;
struct mlx5_srq_table srq_table;
struct mlx5_async_ctx async_ctx;
+ struct mlx5_devx_event_table devx_event_table;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1115,10 +1134,9 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
int buflen, size_t *bc);
int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
void *buffer, int buflen, size_t *bc);
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
@@ -1148,8 +1166,15 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg,
+ u32 max_num_meta_sg);
int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset);
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
@@ -1201,7 +1226,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
@@ -1311,6 +1336,8 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
+void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev);
const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
@@ -1328,6 +1355,8 @@ static inline int
mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
bool is_user) { return -EOPNOTSUPP; }
static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
+static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {}
static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
int *dest_type)
{
@@ -1443,4 +1472,6 @@ void mlx5_ib_put_xlt_emergency_page(void);
int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
+
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 83b452d977d4..20ece6e0b2fc 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1507,10 +1507,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
return 0;
err:
- if (mr->umem) {
- ib_umem_release(mr->umem);
- mr->umem = NULL;
- }
+ ib_umem_release(mr->umem);
+ mr->umem = NULL;
+
clean_mr(dev, mr);
return err;
}
@@ -1606,8 +1605,9 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
if (umem_odp->page_list)
- mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
+ mlx5_ib_invalidate_range(umem_odp,
+ ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
else
mlx5_ib_free_implicit_mr(mr);
/*
@@ -1629,28 +1629,85 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
* remove the DMA mapping.
*/
mlx5_mr_cache_free(dev, mr);
- if (umem) {
- ib_umem_release(umem);
+ ib_umem_release(umem);
+ if (umem)
atomic_sub(npages, &dev->mdev->priv.reg_pages);
- }
+
if (!mr->allocated_from_cache)
kfree(mr);
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
- dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
+ struct mlx5_ib_mr *mmr = to_mmr(ibmr);
+
+ if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
+ dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
+ dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
+ }
+
+ dereg_mr(to_mdev(ibmr->device), mmr);
+
return 0;
}
-struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
+ int access_mode, int page_shift)
+{
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+}
+
+static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, int desc_size, int page_shift,
+ int access_mode, u32 *in, int inlen)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int err;
+
+ mr->access_mode = access_mode;
+ mr->desc_size = desc_size;
+ mr->max_descs = ndescs;
+
+ err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
+ if (err)
+ return err;
+
+ mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
+
+ err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_free_descs;
+
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+
+ return 0;
+
+err_free_descs:
+ mlx5_free_priv_descs(mr);
+ return err;
+}
+
+static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg,
+ int desc_size, int access_mode)
+{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- int ndescs = ALIGN(max_num_sg, 4);
+ int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
+ int page_shift = 0;
struct mlx5_ib_mr *mr;
- void *mkc;
u32 *in;
int err;
@@ -1658,99 +1715,168 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->ibmr.pd = pd;
+ mr->ibmr.device = pd->device;
+
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_free;
}
+ if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ page_shift = PAGE_SHIFT;
+
+ err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
+ access_mode, in, inlen);
+ if (err)
+ goto err_free_in;
+
+ mr->umem = NULL;
+ kfree(in);
+
+ return mr;
+
+err_free_in:
+ kfree(in);
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
+ PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
+ inlen);
+}
+
+static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
+ 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+}
+
+static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int max_num_sg, int max_num_meta_sg,
+ u32 *in, int inlen)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ u32 psv_index[2];
+ void *mkc;
+ int err;
+
+ mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
+ if (!mr->sig)
+ return -ENOMEM;
+
+ /* create mem & wire PSVs */
+ err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
+ if (err)
+ goto err_free_sig;
+
+ mr->sig->psv_memory.psv_idx = psv_index[0];
+ mr->sig->psv_wire.psv_idx = psv_index[1];
+
+ mr->sig->sig_status_checked = true;
+ mr->sig->sig_err_exists = false;
+ /* Next UMR, Arm SIGERR */
+ ++mr->sig->sigerr_count;
+ mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_klm),
+ MLX5_MKC_ACCESS_MODE_KLMS);
+ if (IS_ERR(mr->klm_mr)) {
+ err = PTR_ERR(mr->klm_mr);
+ goto err_destroy_psv;
+ }
+ mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_mtt),
+ MLX5_MKC_ACCESS_MODE_MTT);
+ if (IS_ERR(mr->mtt_mr)) {
+ err = PTR_ERR(mr->mtt_mr);
+ goto err_free_klm_mr;
+ }
+
+ /* Set bsf descriptors for mkey */
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, free, 1);
- MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, bsf_en, 1);
+ MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- if (mr_type == IB_MR_TYPE_MEM_REG) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
- MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(struct mlx5_mtt));
- if (err)
- goto err_free_in;
+ err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
+ MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+ if (err)
+ goto err_free_mtt_mr;
- mr->desc_size = sizeof(struct mlx5_mtt);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
+ return 0;
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(struct mlx5_klm));
- if (err)
- goto err_free_in;
- mr->desc_size = sizeof(struct mlx5_klm);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
- u32 psv_index[2];
-
- MLX5_SET(mkc, mkc, bsf_en, 1);
- MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
- if (!mr->sig) {
- err = -ENOMEM;
- goto err_free_in;
- }
+err_free_mtt_mr:
+ dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
+ mr->mtt_mr = NULL;
+err_free_klm_mr:
+ dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
+ mr->klm_mr = NULL;
+err_destroy_psv:
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+err_free_sig:
+ kfree(mr->sig);
- /* create mem & wire PSVs */
- err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
- 2, psv_index);
- if (err)
- goto err_free_sig;
+ return err;
+}
+
+static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type, u32 max_num_sg,
+ u32 max_num_meta_sg)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ int ndescs = ALIGN(max_num_sg, 4);
+ struct mlx5_ib_mr *mr;
+ u32 *in;
+ int err;
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
- mr->sig->psv_memory.psv_idx = psv_index[0];
- mr->sig->psv_wire.psv_idx = psv_index[1];
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
- mr->sig->sig_status_checked = true;
- mr->sig->sig_err_exists = false;
- /* Next UMR, Arm SIGERR */
- ++mr->sig->sigerr_count;
- } else {
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ mr->ibmr.device = pd->device;
+ mr->umem = NULL;
+
+ switch (mr_type) {
+ case IB_MR_TYPE_MEM_REG:
+ err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_SG_GAPS:
+ err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_INTEGRITY:
+ err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
+ max_num_meta_sg, in, inlen);
+ break;
+ default:
mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
err = -EINVAL;
- goto err_free_in;
}
- MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
- MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, umr_en, 1);
-
- mr->ibmr.device = pd->device;
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
if (err)
- goto err_destroy_psv;
+ goto err_free_in;
- mr->mmkey.type = MLX5_MKEY_MR;
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->umem = NULL;
kfree(in);
return &mr->ibmr;
-err_destroy_psv:
- if (mr->sig) {
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_memory.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
- mr->sig->psv_memory.psv_idx);
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_wire.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
- mr->sig->psv_wire.psv_idx);
- }
- mlx5_free_priv_descs(mr);
-err_free_sig:
- kfree(mr->sig);
err_free_in:
kfree(in);
err_free:
@@ -1758,6 +1884,19 @@ err_free:
return ERR_PTR(err);
}
+struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata)
+{
+ return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
+}
+
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg)
+{
+ return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
+ max_num_meta_sg);
+}
+
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata)
{
@@ -1887,16 +2026,53 @@ done:
}
static int
+mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ unsigned int sg_offset = 0;
+ int n = 0;
+
+ mr->meta_length = 0;
+ if (data_sg_nents == 1) {
+ n++;
+ mr->ndescs = 1;
+ if (data_sg_offset)
+ sg_offset = *data_sg_offset;
+ mr->data_length = sg_dma_len(data_sg) - sg_offset;
+ mr->data_iova = sg_dma_address(data_sg) + sg_offset;
+ if (meta_sg_nents == 1) {
+ n++;
+ mr->meta_ndescs = 1;
+ if (meta_sg_offset)
+ sg_offset = *meta_sg_offset;
+ else
+ sg_offset = 0;
+ mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
+ mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
+ }
+ ibmr->length = mr->data_length + mr->meta_length;
+ }
+
+ return n;
+}
+
+static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
struct scatterlist *sgl,
unsigned short sg_nents,
- unsigned int *sg_offset_p)
+ unsigned int *sg_offset_p,
+ struct scatterlist *meta_sgl,
+ unsigned short meta_sg_nents,
+ unsigned int *meta_sg_offset_p)
{
struct scatterlist *sg = sgl;
struct mlx5_klm *klms = mr->descs;
unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
u32 lkey = mr->ibmr.pd->local_dma_lkey;
- int i;
+ int i, j = 0;
mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
mr->ibmr.length = 0;
@@ -1911,12 +2087,36 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
sg_offset = 0;
}
- mr->ndescs = i;
if (sg_offset_p)
*sg_offset_p = sg_offset;
- return i;
+ mr->ndescs = i;
+ mr->data_length = mr->ibmr.length;
+
+ if (meta_sg_nents) {
+ sg = meta_sgl;
+ sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
+ for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
+ if (unlikely(i + j >= mr->max_descs))
+ break;
+ klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
+ sg_offset);
+ klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
+ sg_offset);
+ klms[i + j].key = cpu_to_be32(lkey);
+ mr->ibmr.length += sg_dma_len(sg) - sg_offset;
+
+ sg_offset = 0;
+ }
+ if (meta_sg_offset_p)
+ *meta_sg_offset_p = sg_offset;
+
+ mr->meta_ndescs = j;
+ mr->meta_length = mr->ibmr.length - mr->data_length;
+ }
+
+ return i + j;
}
static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
@@ -1933,6 +2133,181 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
+static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ __be64 *descs;
+
+ if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
+ return -ENOMEM;
+
+ descs = mr->descs;
+ descs[mr->ndescs + mr->meta_ndescs++] =
+ cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+ return 0;
+}
+
+static int
+mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
+ int n;
+
+ pi_mr->ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ pi_mr->ibmr.page_size = ibmr->page_size;
+ n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
+ mlx5_set_page);
+ if (n != data_sg_nents)
+ return n;
+
+ pi_mr->data_iova = pi_mr->ibmr.iova;
+ pi_mr->data_length = pi_mr->ibmr.length;
+ pi_mr->ibmr.length = pi_mr->data_length;
+ ibmr->length = pi_mr->data_length;
+
+ if (meta_sg_nents) {
+ u64 page_mask = ~((u64)ibmr->page_size - 1);
+ u64 iova = pi_mr->data_iova;
+
+ n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
+ meta_sg_offset, mlx5_set_page_pi);
+
+ pi_mr->meta_length = pi_mr->ibmr.length;
+ /*
+ * PI address for the HW is the offset of the metadata address
+ * relative to the first data page address.
+ * It equals to first data page address + size of data pages +
+ * metadata offset at the first metadata page
+ */
+ pi_mr->pi_iova = (iova & page_mask) +
+ pi_mr->ndescs * ibmr->page_size +
+ (pi_mr->ibmr.iova & ~page_mask);
+ /*
+ * In order to use one MTT MR for data and metadata, we register
+ * also the gaps between the end of the data and the start of
+ * the metadata (the sig MR will verify that the HW will access
+ * to right addresses). This mapping is safe because we use
+ * internal mkey for the registration.
+ */
+ pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
+ pi_mr->ibmr.iova = iova;
+ ibmr->length += pi_mr->meta_length;
+ }
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ return n;
+}
+
+static int
+mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->klm_mr;
+ int n;
+
+ pi_mr->ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
+ meta_sg, meta_sg_nents, meta_sg_offset);
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ /* This is zero-based memory region */
+ pi_mr->data_iova = 0;
+ pi_mr->ibmr.iova = 0;
+ pi_mr->pi_iova = pi_mr->data_length;
+ ibmr->length = pi_mr->ibmr.length;
+
+ return n;
+}
+
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = NULL;
+ int n;
+
+ WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
+
+ mr->ndescs = 0;
+ mr->data_length = 0;
+ mr->data_iova = 0;
+ mr->meta_ndescs = 0;
+ mr->pi_iova = 0;
+ /*
+ * As a performance optimization, if possible, there is no need to
+ * perform UMR operation to register the data/metadata buffers.
+ * First try to map the sg lists to PA descriptors with local_dma_lkey.
+ * Fallback to UMR only in case of a failure.
+ */
+ n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+ /*
+ * As a performance optimization, if possible, there is no need to map
+ * the sg lists to KLM descriptors. First try to map the sg lists to MTT
+ * descriptors and fallback to KLM only in case of a failure.
+ * It's more efficient for the HW to work with MTT descriptors
+ * (especially in high load).
+ * Use KLM (indirect access) only if it's mandatory.
+ */
+ pi_mr = mr->mtt_mr;
+ n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+
+ pi_mr = mr->klm_mr;
+ n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (unlikely(n != data_sg_nents + meta_sg_nents))
+ return -ENOMEM;
+
+out:
+ /* This is zero-based memory region */
+ ibmr->iova = 0;
+ mr->pi_mr = pi_mr;
+ if (pi_mr)
+ ibmr->sig_attrs->meta_length = pi_mr->meta_length;
+ else
+ ibmr->sig_attrs->meta_length = mr->meta_length;
+
+ return 0;
+}
+
int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset)
{
@@ -1946,7 +2321,8 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
DMA_TO_DEVICE);
if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
+ n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
+ NULL);
else
n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
mlx5_set_page);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 831c450b271a..5b642d81e617 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -150,7 +150,7 @@ static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
if (!rb)
goto not_found;
odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
- if (ib_umem_start(&odp->umem) > start + length)
+ if (ib_umem_start(odp) > start + length)
goto not_found;
}
not_found:
@@ -200,7 +200,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
static void mr_leaf_free_action(struct work_struct *work)
{
struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
- int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT;
+ int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
mr->parent = NULL;
@@ -224,7 +224,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
sizeof(struct mlx5_mtt)) - 1;
u64 idx = 0, blk_start_idx = 0;
- struct ib_umem *umem;
int in_block = 0;
u64 addr;
@@ -232,15 +231,14 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
pr_err("invalidation called on NULL umem or non-ODP umem\n");
return;
}
- umem = &umem_odp->umem;
mr = umem_odp->private;
if (!mr || !mr->ibmr.pd)
return;
- start = max_t(u64, ib_umem_start(umem), start);
- end = min_t(u64, ib_umem_end(umem), end);
+ start = max_t(u64, ib_umem_start(umem_odp), start);
+ end = min_t(u64, ib_umem_end(umem_odp), end);
/*
* Iteration one - zap the HW's MTTs. The notifiers_count ensures that
@@ -249,8 +247,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
* but they will write 0s as well, so no difference in the end result.
*/
- for (addr = start; addr < end; addr += BIT(umem->page_shift)) {
- idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
+ for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) {
+ idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
/*
* Strive to write the MTTs in chunks, but avoid overwriting
* non-existing MTTs. The huristic here can be improved to
@@ -544,13 +542,12 @@ static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
void *cookie)
{
struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
- struct ib_umem *umem = &umem_odp->umem;
if (mr->parent != imr)
return 0;
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
if (umem_odp->dying)
return 0;
@@ -602,9 +599,9 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
}
next_mr:
- size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt);
+ size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt);
- page_shift = mr->umem->page_shift;
+ page_shift = odp->page_shift;
page_mask = ~(BIT(page_shift) - 1);
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
access_mask = ODP_READ_ALLOWED_BIT;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 768c7e81f688..2a97619ed603 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -34,6 +34,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/rdma_counter.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
@@ -442,9 +443,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
}
size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN &&
ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
- return MLX5_SIG_WQE_SIZE;
+ return MLX5_SIG_WQE_SIZE;
else
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
@@ -496,9 +497,6 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
- qp->signature_en = true;
-
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
@@ -790,8 +788,7 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
atomic_dec(&dev->delay_drop.rqs_cnt);
mlx5_ib_db_unmap_user(context, &rwq->db);
- if (rwq->umem)
- ib_umem_release(rwq->umem);
+ ib_umem_release(rwq->umem);
}
static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
@@ -977,8 +974,7 @@ err_free:
kvfree(*in);
err_umem:
- if (ubuffer->umem)
- ib_umem_release(ubuffer->umem);
+ ib_umem_release(ubuffer->umem);
err_bfreg:
if (bfregn != MLX5_IB_INVALID_BFREG)
@@ -997,8 +993,7 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
ibucontext);
mlx5_ib_db_unmap_user(context, &qp->db);
- if (base->ubuffer.umem)
- ib_umem_release(base->ubuffer.umem);
+ ib_umem_release(base->ubuffer.umem);
/*
* Free only the BFREGs which are handled by the kernel.
@@ -1042,7 +1037,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
void *qpc;
int err;
- if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
+ if (init_attr->create_flags & ~(IB_QP_CREATE_INTEGRITY_EN |
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_IPOIB_UD_LSO |
IB_QP_CREATE_NETIF_QP |
@@ -3386,6 +3381,35 @@ static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
return tx_port_affinity;
}
+static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
+ struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_qp_context context = {};
+ struct mlx5_ib_port *mibport = NULL;
+ struct mlx5_ib_qp_base *base;
+ u32 set_id;
+
+ if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id))
+ return 0;
+
+ if (counter) {
+ set_id = counter->id;
+ } else {
+ mibport = &dev->port[mqp->port - 1];
+ set_id = mibport->cnts.set_id;
+ }
+
+ base = &mqp->trans_qp.base;
+ context.qp_counter_set_usr_page &= cpu_to_be32(0xffffff);
+ context.qp_counter_set_usr_page |= cpu_to_be32(set_id << 24);
+ return mlx5_core_qp_modify(dev->mdev,
+ MLX5_CMD_OP_RTS2RTS_QP,
+ MLX5_QP_OPTPAR_COUNTER_SET_ID,
+ &context, &base->mqp);
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state,
@@ -3439,6 +3463,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_port *mibport = NULL;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
+ u32 set_id = 0;
int mlx5_st;
int err;
u16 op;
@@ -3601,8 +3626,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
port_num = 0;
mibport = &dev->port[port_num];
+ if (ibqp->counter)
+ set_id = ibqp->counter->id;
+ else
+ set_id = mibport->cnts.set_id;
context->qp_counter_set_usr_page |=
- cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
+ cpu_to_be32(set_id << 24);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -3630,7 +3659,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id;
+ raw_qp_param.rq_q_ctr_id = set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
@@ -3707,6 +3736,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->db.db[MLX5_SND_DBR] = 0;
}
+ if ((new_state == IB_QPS_RTS) && qp->counter_pending) {
+ err = __mlx5_ib_qp_set_counter(ibqp, ibqp->counter);
+ if (!err)
+ qp->counter_pending = 0;
+ }
+
out:
kfree(context);
return err;
@@ -4170,15 +4205,13 @@ static __be64 sig_mkey_mask(void)
}
static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr, bool umr_inline)
+ struct mlx5_ib_mr *mr, u8 flags)
{
- int size = mr->ndescs * mr->desc_size;
+ int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
memset(umr, 0, sizeof(*umr));
- umr->flags = MLX5_UMR_CHECK_NOT_FREE;
- if (umr_inline)
- umr->flags |= MLX5_UMR_INLINE;
+ umr->flags = flags;
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask();
}
@@ -4305,7 +4338,7 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
struct mlx5_ib_mr *mr,
u32 key, int access)
{
- int ndescs = ALIGN(mr->ndescs, 8) >> 1;
+ int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1;
memset(seg, 0, sizeof(*seg));
@@ -4356,7 +4389,7 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
struct mlx5_ib_mr *mr,
struct mlx5_ib_pd *pd)
{
- int bcount = mr->desc_size * mr->ndescs;
+ int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs);
dseg->addr = cpu_to_be64(mr->desc_map);
dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
@@ -4549,23 +4582,37 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
return 0;
}
-static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
- struct mlx5_ib_qp *qp, void **seg,
- int *size, void **cur_edge)
+static int set_sig_data_segment(const struct ib_send_wr *send_wr,
+ struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
- struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
- struct ib_mr *sig_mr = wr->sig_mr;
struct mlx5_bsf *bsf;
- u32 data_len = wr->wr.sg_list->length;
- u32 data_key = wr->wr.sg_list->lkey;
- u64 data_va = wr->wr.sg_list->addr;
+ u32 data_len;
+ u32 data_key;
+ u64 data_va;
+ u32 prot_len = 0;
+ u32 prot_key = 0;
+ u64 prot_va = 0;
+ bool prot = false;
int ret;
int wqe_size;
+ struct mlx5_ib_mr *mr = to_mmr(sig_mr);
+ struct mlx5_ib_mr *pi_mr = mr->pi_mr;
+
+ data_len = pi_mr->data_length;
+ data_key = pi_mr->ibmr.lkey;
+ data_va = pi_mr->data_iova;
+ if (pi_mr->meta_ndescs) {
+ prot_len = pi_mr->meta_length;
+ prot_key = pi_mr->ibmr.lkey;
+ prot_va = pi_mr->pi_iova;
+ prot = true;
+ }
- if (!wr->prot ||
- (data_key == wr->prot->lkey &&
- data_va == wr->prot->addr &&
- data_len == wr->prot->length)) {
+ if (!prot || (data_key == prot_key && data_va == prot_va &&
+ data_len == prot_len)) {
/**
* Source domain doesn't contain signature information
* or data and protection are interleaved in memory.
@@ -4599,8 +4646,6 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
struct mlx5_stride_block_entry *data_sentry;
struct mlx5_stride_block_entry *prot_sentry;
- u32 prot_key = wr->prot->lkey;
- u64 prot_va = wr->prot->addr;
u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
int prot_size;
@@ -4650,17 +4695,15 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
}
static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- const struct ib_sig_handover_wr *wr, u32 size,
- u32 length, u32 pdn)
+ struct ib_mr *sig_mr, int access_flags,
+ u32 size, u32 length, u32 pdn)
{
- struct ib_mr *sig_mr = wr->sig_mr;
u32 sig_key = sig_mr->rkey;
u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
memset(seg, 0, sizeof(*seg));
- seg->flags = get_umr_flags(wr->access_flags) |
- MLX5_MKC_ACCESS_MODE_KLMS;
+ seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
@@ -4680,49 +4723,50 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
umr->mkey_mask = sig_mkey_mask();
}
-
-static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
- struct mlx5_ib_qp *qp, void **seg, int *size,
- void **cur_edge)
+static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
- const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
- struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
+ const struct ib_reg_wr *wr = reg_wr(send_wr);
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
+ struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
+ struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
u32 pdn = get_pd(qp)->pdn;
u32 xlt_size;
int region_len, ret;
- if (unlikely(wr->wr.num_sge != 1) ||
- unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
- unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
+ if (unlikely(send_wr->num_sge != 0) ||
+ unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
+ unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
unlikely(!sig_mr->sig->sig_status_checked))
return -EINVAL;
/* length of the protected region, data + protection */
- region_len = wr->wr.sg_list->length;
- if (wr->prot &&
- (wr->prot->lkey != wr->wr.sg_list->lkey ||
- wr->prot->addr != wr->wr.sg_list->addr ||
- wr->prot->length != wr->wr.sg_list->length))
- region_len += wr->prot->length;
+ region_len = pi_mr->ibmr.length;
/**
* KLM octoword size - if protection was provided
* then we use strided block format (3 octowords),
* else we use single KLM (1 octoword)
**/
- xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm);
+ if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
+ xlt_size = 0x30;
+ else
+ xlt_size = sizeof(struct mlx5_klm);
set_sig_umr_segment(*seg, xlt_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
+ set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
+ pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- ret = set_sig_data_segment(wr, qp, seg, size, cur_edge);
+ ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
+ cur_edge);
if (ret)
return ret;
@@ -4759,12 +4803,14 @@ static int set_psv_wr(struct ib_sig_domain *domain,
static int set_reg_wr(struct mlx5_ib_qp *qp,
const struct ib_reg_wr *wr,
- void **seg, int *size, void **cur_edge)
+ void **seg, int *size, void **cur_edge,
+ bool check_not_free)
{
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- size_t mr_list_size = mr->ndescs * mr->desc_size;
+ int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
+ u8 flags = 0;
if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
mlx5_ib_warn(to_mdev(qp->ibqp.device),
@@ -4772,7 +4818,12 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
return -EINVAL;
}
- set_reg_umr_seg(*seg, mr, umr_inline);
+ if (check_not_free)
+ flags |= MLX5_UMR_CHECK_NOT_FREE;
+ if (umr_inline)
+ flags |= MLX5_UMR_INLINE;
+
+ set_reg_umr_seg(*seg, mr, flags);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
@@ -4898,8 +4949,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
+ struct ib_reg_wr reg_pi_wr;
struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr pa_pi_mr;
+ struct ib_sig_attrs *sig_attrs;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
void *cur_edge;
@@ -4953,7 +5008,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- if (wr->opcode == IB_WR_REG_MR) {
+ if (wr->opcode == IB_WR_REG_MR ||
+ wr->opcode == IB_WR_REG_MR_INTEGRITY) {
fence = dev->umr_fence;
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
} else {
@@ -5003,7 +5059,7 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
qp->sq.wr_data[idx] = IB_WR_REG_MR;
ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
- &cur_edge);
+ &cur_edge, true);
if (err) {
*bad_wr = wr;
goto out;
@@ -5011,26 +5067,82 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
num_sge = 0;
break;
- case IB_WR_REG_SIG_MR:
- qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
- mr = to_mmr(sig_handover_wr(wr)->sig_mr);
-
+ case IB_WR_REG_MR_INTEGRITY:
+ qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY;
+
+ mr = to_mmr(reg_wr(wr)->mr);
+ pi_mr = mr->pi_mr;
+
+ if (pi_mr) {
+ memset(&reg_pi_wr, 0,
+ sizeof(struct ib_reg_wr));
+
+ reg_pi_wr.mr = &pi_mr->ibmr;
+ reg_pi_wr.access = reg_wr(wr)->access;
+ reg_pi_wr.key = pi_mr->ibmr.rkey;
+
+ ctrl->imm = cpu_to_be32(reg_pi_wr.key);
+ /* UMR for data + prot registration */
+ err = set_reg_wr(qp, &reg_pi_wr, &seg,
+ &size, &cur_edge,
+ false);
+ if (err) {
+ *bad_wr = wr;
+ goto out;
+ }
+ finish_wqe(qp, ctrl, seg, size,
+ cur_edge, idx, wr->wr_id,
+ nreq, fence,
+ MLX5_OPCODE_UMR);
+
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, &cur_edge,
+ nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+ } else {
+ memset(&pa_pi_mr, 0,
+ sizeof(struct mlx5_ib_mr));
+ /* No UMR, use local_dma_lkey */
+ pa_pi_mr.ibmr.lkey =
+ mr->ibmr.pd->local_dma_lkey;
+
+ pa_pi_mr.ndescs = mr->ndescs;
+ pa_pi_mr.data_length = mr->data_length;
+ pa_pi_mr.data_iova = mr->data_iova;
+ if (mr->meta_ndescs) {
+ pa_pi_mr.meta_ndescs =
+ mr->meta_ndescs;
+ pa_pi_mr.meta_length =
+ mr->meta_length;
+ pa_pi_mr.pi_iova = mr->pi_iova;
+ }
+
+ pa_pi_mr.ibmr.length = mr->ibmr.length;
+ mr->pi_mr = &pa_pi_mr;
+ }
ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
- err = set_sig_umr_wr(wr, qp, &seg, &size,
- &cur_edge);
+ /* UMR for sig MR */
+ err = set_pi_umr_wr(wr, qp, &seg, &size,
+ &cur_edge);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
-
finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
wr->wr_id, nreq, fence,
MLX5_OPCODE_UMR);
+
/*
* SET_PSV WQEs are not signaled and solicited
* on error
*/
+ sig_attrs = mr->ibmr.sig_attrs;
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
&size, &cur_edge, nreq, false,
true);
@@ -5040,19 +5152,18 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
*bad_wr = wr;
goto out;
}
-
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
- mr->sig->psv_memory.psv_idx, &seg,
- &size);
+ err = set_psv_wr(&sig_attrs->mem,
+ mr->sig->psv_memory.psv_idx,
+ &seg, &size);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
-
finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
+ wr->wr_id, nreq, next_fence,
MLX5_OPCODE_SET_PSV);
+
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
&size, &cur_edge, nreq, false,
true);
@@ -5062,20 +5173,20 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
*bad_wr = wr;
goto out;
}
-
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
- mr->sig->psv_wire.psv_idx, &seg,
- &size);
+ err = set_psv_wr(&sig_attrs->wire,
+ mr->sig->psv_wire.psv_idx,
+ &seg, &size);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
-
finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
+ wr->wr_id, nreq, next_fence,
MLX5_OPCODE_SET_PSV);
- qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+ qp->next_fence =
+ MLX5_FENCE_MODE_INITIATOR_SMALL;
num_sge = 0;
goto skip_psv;
@@ -6047,7 +6158,7 @@ err:
return ERR_PTR(err);
}
-int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
@@ -6055,8 +6166,6 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
destroy_user_rq(dev, wq->pd, rwq, udata);
kfree(rwq);
-
- return 0;
}
struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
@@ -6367,3 +6476,34 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
handle_drain_completion(cq, &rdrain, dev);
}
+
+/**
+ * Bind a qp to a counter. If @counter is NULL then bind the qp to
+ * the default counter
+ */
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
+{
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ int err = 0;
+
+ mutex_lock(&mqp->mutex);
+ if (mqp->state == IB_QPS_RESET) {
+ qp->counter = counter;
+ goto out;
+ }
+
+ if (mqp->state == IB_QPS_RTS) {
+ err = __mlx5_ib_qp_set_counter(qp, counter);
+ if (!err)
+ qp->counter = counter;
+
+ goto out;
+ }
+
+ mqp->counter_pending = 1;
+ qp->counter = counter;
+
+out:
+ mutex_unlock(&mqp->mutex);
+ return err;
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index aaf10dd5364d..aef1d274a14e 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -214,8 +214,6 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
dma_unmap_addr_set(&buf->direct, mapping, t);
- memset(buf->direct.buf, 0, size);
-
while (t & ((1 << shift) - 1)) {
--shift;
npages *= 2;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 8ff0e90d7564..edccfd6e178f 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -482,7 +482,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
if (ret < 0) {
- put_page(pages[0]);
+ put_user_page(pages[0]);
goto out;
}
@@ -490,7 +490,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
mthca_uarc_virt(dev, uar, i));
if (ret) {
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_page(sg_page(&db_tab->page[i].mem));
+ put_user_page(sg_page(&db_tab->page[i].mem));
goto out;
}
@@ -556,7 +556,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_page(sg_page(&db_tab->page[i].mem));
+ put_user_page(sg_page(&db_tab->page[i].mem));
}
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 4f40dfedf920..23554d8bf241 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -601,10 +601,11 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
return 0;
}
-static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+static int mthca_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
struct mthca_create_cq ucmd;
struct mthca_cq *cq;
@@ -614,20 +615,20 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
udata, struct mthca_ucontext, ibucontext);
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (udata) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
- return ERR_PTR(-EFAULT);
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
context->db_tab, ucmd.set_db_index,
ucmd.set_db_page);
if (err)
- return ERR_PTR(err);
+ return err;
err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
context->db_tab, ucmd.arm_db_index,
@@ -636,11 +637,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
goto err_unmap_set;
}
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq) {
- err = -ENOMEM;
- goto err_unmap_arm;
- }
+ cq = to_mcq(ibcq);
if (udata) {
cq->buf.mr.ibmr.lkey = ucmd.lkey;
@@ -655,20 +652,17 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
udata ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
cq);
if (err)
- goto err_free;
+ goto err_unmap_arm;
if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) {
mthca_free_cq(to_mdev(ibdev), cq);
err = -EFAULT;
- goto err_free;
+ goto err_unmap_arm;
}
cq->resize_buf = NULL;
- return &cq->ibcq;
-
-err_free:
- kfree(cq);
+ return 0;
err_unmap_arm:
if (udata)
@@ -680,7 +674,7 @@ err_unmap_set:
mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
context->db_tab, ucmd.set_db_index);
- return ERR_PTR(err);
+ return err;
}
static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
@@ -804,7 +798,7 @@ out:
return ret;
}
-static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
if (udata) {
struct mthca_ucontext *context =
@@ -823,9 +817,6 @@ static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
to_mcq(cq)->set_ci_db_index);
}
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
- kfree(cq);
-
- return 0;
}
static inline u32 convert_access(int acc)
@@ -962,8 +953,7 @@ static int mthca_dereg_mr(struct ib_mr *mr, struct ib_udata *udata)
struct mthca_mr *mmr = to_mmr(mr);
mthca_free_mr(to_mdev(mr->device), mmr);
- if (mmr->umem)
- ib_umem_release(mmr->umem);
+ ib_umem_release(mmr->umem);
kfree(mmr);
return 0;
@@ -1153,6 +1143,11 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
}
static const struct ib_device_ops mthca_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MTHCA,
+ .uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION,
+ .uverbs_no_driver_id_binding = 1,
+
.alloc_pd = mthca_alloc_pd,
.alloc_ucontext = mthca_alloc_ucontext,
.attach_mcast = mthca_multicast_attach,
@@ -1185,6 +1180,7 @@ static const struct ib_device_ops mthca_dev_ops = {
.resize_cq = mthca_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, mthca_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mthca_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mthca_ucontext, ibucontext),
};
@@ -1243,9 +1239,6 @@ int mthca_register_device(struct mthca_dev *dev)
if (ret)
return ret;
- dev->ib_dev.owner = THIS_MODULE;
-
- dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -1303,7 +1296,6 @@ int mthca_register_device(struct mthca_dev *dev)
mutex_init(&dev->cap_mask_mutex);
rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group);
- dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA;
ret = ib_register_device(&dev->ib_dev, "mthca%d");
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
deleted file mode 100644
index 8245353e8106..000000000000
--- a/drivers/infiniband/hw/nes/Kconfig
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config INFINIBAND_NES
- tristate "NetEffect RNIC Driver"
- depends on PCI && INET
- select LIBCRC32C
- ---help---
- This is the RDMA Network Interface Card (RNIC) driver for
- NetEffect Ethernet Cluster Server Adapters.
-
-config INFINIBAND_NES_DEBUG
- bool "Verbose debugging output"
- depends on INFINIBAND_NES
- default n
- ---help---
- This option enables debug messages from the NetEffect RNIC
- driver. Select this if you are diagnosing a problem.
diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile
deleted file mode 100644
index 239689ad1f9e..000000000000
--- a/drivers/infiniband/hw/nes/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o
-
-iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o nes_mgt.o
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
deleted file mode 100644
index 29b324726ea6..000000000000
--- a/drivers/infiniband/hw/nes/nes.c
+++ /dev/null
@@ -1,1211 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/if_arp.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <rdma/ib_smi.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-#include <rdma/iw_cm.h>
-
-#include "nes.h"
-
-#include <net/netevent.h>
-#include <net/neighbour.h>
-#include <linux/route.h>
-#include <net/ip_fib.h>
-
-MODULE_AUTHOR("NetEffect");
-MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-
-int interrupt_mod_interval = 0;
-
-/* Interoperability */
-int mpa_version = 1;
-module_param(mpa_version, int, 0644);
-MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)");
-
-/* Interoperability */
-int disable_mpa_crc = 0;
-module_param(disable_mpa_crc, int, 0644);
-MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC");
-
-unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU;
-module_param(nes_drv_opt, int, 0644);
-MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
-
-unsigned int nes_debug_level = 0;
-module_param_named(debug_level, nes_debug_level, uint, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug output level");
-
-unsigned int wqm_quanta = 0x10000;
-module_param(wqm_quanta, int, 0644);
-MODULE_PARM_DESC(wqm_quanta, "WQM quanta");
-
-static bool limit_maxrdreqsz;
-module_param(limit_maxrdreqsz, bool, 0644);
-MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes");
-
-LIST_HEAD(nes_adapter_list);
-static LIST_HEAD(nes_dev_list);
-
-atomic_t qps_destroyed;
-
-static unsigned int ee_flsh_adapter;
-static unsigned int sysfs_nonidx_addr;
-static unsigned int sysfs_idx_addr;
-
-static const struct pci_device_id nes_pci_table[] = {
- { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020), },
- { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR), },
- {0}
-};
-
-MODULE_DEVICE_TABLE(pci, nes_pci_table);
-
-static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
-static int nes_net_event(struct notifier_block *, unsigned long, void *);
-static int nes_notifiers_registered;
-
-
-static struct notifier_block nes_inetaddr_notifier = {
- .notifier_call = nes_inetaddr_event
-};
-
-static struct notifier_block nes_net_notifier = {
- .notifier_call = nes_net_event
-};
-
-/**
- * nes_inetaddr_event
- */
-static int nes_inetaddr_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
-{
- struct in_ifaddr *ifa = ptr;
- struct net_device *event_netdev = ifa->ifa_dev->dev;
- struct nes_device *nesdev;
- struct net_device *netdev;
- struct net_device *upper_dev;
- struct nes_vnic *nesvnic;
- unsigned int is_bonded;
-
- nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %pI4, netmask %pI4.\n",
- &ifa->ifa_address, &ifa->ifa_mask);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n",
- nesdev, nesdev->netdev[0]->name);
- netdev = nesdev->netdev[0];
- nesvnic = netdev_priv(netdev);
- upper_dev = netdev_master_upper_dev_get(netdev);
- is_bonded = netif_is_bond_slave(netdev) &&
- (upper_dev == event_netdev);
- if ((netdev == event_netdev) || is_bonded) {
- if (nesvnic->rdma_enabled == 0) {
- nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since"
- " RDMA is not enabled.\n",
- netdev->name);
- return NOTIFY_OK;
- }
- /* we have ifa->ifa_address/mask here if we need it */
- switch (event) {
- case NETDEV_DOWN:
- nes_debug(NES_DBG_NETDEV, "event:DOWN\n");
- nes_write_indexed(nesdev,
- NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), 0);
-
- nes_manage_arp_cache(netdev, netdev->dev_addr,
- ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE);
- nesvnic->local_ipaddr = 0;
- if (is_bonded)
- continue;
- else
- return NOTIFY_OK;
- break;
- case NETDEV_UP:
- nes_debug(NES_DBG_NETDEV, "event:UP\n");
-
- if (nesvnic->local_ipaddr != 0) {
- nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n");
- return NOTIFY_OK;
- }
- /* fall through */
- case NETDEV_CHANGEADDR:
- /* Add the address to the IP table */
- if (upper_dev) {
- struct in_device *in;
-
- rcu_read_lock();
- in = __in_dev_get_rcu(upper_dev);
- if (in) {
- struct in_ifaddr *ifa;
-
- ifa = rcu_dereference(in->ifa_list);
- if (ifa)
- nesvnic->local_ipaddr = ifa->ifa_address;
- }
- rcu_read_unlock();
- } else {
- nesvnic->local_ipaddr = ifa->ifa_address;
- }
-
- nes_write_indexed(nesdev,
- NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)),
- ntohl(nesvnic->local_ipaddr));
- nes_manage_arp_cache(netdev, netdev->dev_addr,
- ntohl(nesvnic->local_ipaddr), NES_ARP_ADD);
- if (is_bonded)
- continue;
- else
- return NOTIFY_OK;
- break;
- default:
- break;
- }
- }
- }
-
- return NOTIFY_DONE;
-}
-
-
-/**
- * nes_net_event
- */
-static int nes_net_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
-{
- struct neighbour *neigh = ptr;
- struct nes_device *nesdev;
- struct net_device *netdev;
- struct nes_vnic *nesvnic;
-
- switch (event) {
- case NETEVENT_NEIGH_UPDATE:
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- /* nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p.\n", nesdev); */
- netdev = nesdev->netdev[0];
- nesvnic = netdev_priv(netdev);
- if (netdev == neigh->dev) {
- if (nesvnic->rdma_enabled == 0) {
- nes_debug(NES_DBG_NETDEV, "Skipping device %s since no RDMA\n",
- netdev->name);
- } else {
- if (neigh->nud_state & NUD_VALID) {
- nes_manage_arp_cache(neigh->dev, neigh->ha,
- ntohl(*(__be32 *)neigh->primary_key), NES_ARP_ADD);
- } else {
- nes_manage_arp_cache(neigh->dev, neigh->ha,
- ntohl(*(__be32 *)neigh->primary_key), NES_ARP_DELETE);
- }
- }
- return NOTIFY_OK;
- }
- }
- break;
- default:
- nes_debug(NES_DBG_NETDEV, "NETEVENT_ %lu undefined\n", event);
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-
-/**
- * nes_add_ref
- */
-void nes_add_ref(struct ib_qp *ibqp)
-{
- struct nes_qp *nesqp;
-
- nesqp = to_nesqp(ibqp);
- nes_debug(NES_DBG_QP, "Bumping refcount for QP%u. Pre-inc value = %u\n",
- ibqp->qp_num, atomic_read(&nesqp->refcount));
- atomic_inc(&nesqp->refcount);
-}
-
-static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
-{
- unsigned long flags;
- struct nes_qp *nesqp = cqp_request->cqp_callback_pointer;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- atomic_inc(&qps_destroyed);
-
- /* Free the control structures */
-
- if (nesqp->pbl_vbase) {
- pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase);
- nesqp->pbl_vbase = NULL;
-
- } else {
- pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id);
-
- nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
- kfree(nesqp->allocated_buffer);
-
-}
-
-/**
- * nes_rem_ref
- */
-void nes_rem_ref(struct ib_qp *ibqp)
-{
- u64 u64temp;
- struct nes_qp *nesqp;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- u32 opcode;
-
- nesqp = to_nesqp(ibqp);
-
- if (atomic_read(&nesqp->refcount) == 0) {
- printk(KERN_INFO PFX "%s: Reference count already 0 for QP%d, last aeq = 0x%04X.\n",
- __func__, ibqp->qp_num, nesqp->last_aeq);
- BUG();
- }
-
- if (atomic_dec_and_test(&nesqp->refcount)) {
- if (nesqp->pau_mode)
- nes_destroy_pau_qp(nesdev, nesqp);
-
- /* Destroy the QP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
- return;
- }
- cqp_request->waiting = 0;
- cqp_request->callback = 1;
- cqp_request->cqp_callback = nes_cqp_rem_ref_callback;
- cqp_request->cqp_callback_pointer = nesqp;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- opcode = NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_IWARP;
-
- if (nesqp->hte_added) {
- opcode |= NES_CQP_QP_DEL_HTE;
- nesqp->hte_added = 0;
- }
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
- u64temp = (u64)nesqp->nesqp_context_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
- nes_post_cqp_request(nesdev, cqp_request);
- }
-}
-
-
-/**
- * nes_get_qp
- */
-struct ib_qp *nes_get_qp(struct ib_device *device, int qpn)
-{
- struct nes_vnic *nesvnic = to_nesvnic(device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- if ((qpn < NES_FIRST_QPN) || (qpn >= (NES_FIRST_QPN + nesadapter->max_qp)))
- return NULL;
-
- return &nesadapter->qp_table[qpn - NES_FIRST_QPN]->ibqp;
-}
-
-
-/**
- * nes_print_macaddr
- */
-static void nes_print_macaddr(struct net_device *netdev)
-{
- nes_debug(NES_DBG_INIT, "%s: %pM, IRQ %u\n",
- netdev->name, netdev->dev_addr, netdev->irq);
-}
-
-/**
- * nes_interrupt - handle interrupts
- */
-static irqreturn_t nes_interrupt(int irq, void *dev_id)
-{
- struct nes_device *nesdev = (struct nes_device *)dev_id;
- int handled = 0;
- u32 int_mask;
- u32 int_req;
- u32 int_stat;
- u32 intf_int_stat;
- u32 timer_stat;
-
- if (nesdev->msi_enabled) {
- /* No need to read the interrupt pending register if msi is enabled */
- handled = 1;
- } else {
- if (unlikely(nesdev->nesadapter->hw_rev == NE020_REV)) {
- /* Master interrupt enable provides synchronization for kicking off bottom half
- when interrupt sharing is going on */
- int_mask = nes_read32(nesdev->regs + NES_INT_MASK);
- if (int_mask & 0x80000000) {
- /* Check interrupt status to see if this might be ours */
- int_stat = nes_read32(nesdev->regs + NES_INT_STAT);
- int_req = nesdev->int_req;
- if (int_stat&int_req) {
- /* if interesting CEQ or AEQ is pending, claim the interrupt */
- if ((int_stat&int_req) & (~(NES_INT_TIMER|NES_INT_INTF))) {
- handled = 1;
- } else {
- if (((int_stat & int_req) & NES_INT_TIMER) == NES_INT_TIMER) {
- /* Timer might be running but might be for another function */
- timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT);
- if ((timer_stat & nesdev->timer_int_req) != 0) {
- handled = 1;
- }
- }
- if ((((int_stat & int_req) & NES_INT_INTF) == NES_INT_INTF) &&
- (handled == 0)) {
- intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
- if ((intf_int_stat & nesdev->intf_int_req) != 0) {
- handled = 1;
- }
- }
- }
- if (handled) {
- nes_write32(nesdev->regs+NES_INT_MASK, int_mask & (~0x80000000));
- int_mask = nes_read32(nesdev->regs+NES_INT_MASK);
- /* Save off the status to save an additional read */
- nesdev->int_stat = int_stat;
- nesdev->napi_isr_ran = 1;
- }
- }
- }
- } else {
- handled = nes_read32(nesdev->regs+NES_INT_PENDING);
- }
- }
-
- if (handled) {
-
- if (nes_napi_isr(nesdev) == 0) {
- tasklet_schedule(&nesdev->dpc_tasklet);
-
- }
- return IRQ_HANDLED;
- } else {
- return IRQ_NONE;
- }
-}
-
-
-/**
- * nes_probe - Device initialization
- */
-static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
-{
- struct net_device *netdev = NULL;
- struct nes_device *nesdev = NULL;
- int ret = 0;
- void __iomem *mmio_regs = NULL;
- u8 hw_rev;
-
- printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n",
- DRV_VERSION, pci_name(pcidev));
-
- ret = pci_enable_device(pcidev);
- if (ret) {
- printk(KERN_ERR PFX "Unable to enable PCI device. (%s)\n", pci_name(pcidev));
- goto bail0;
- }
-
- nes_debug(NES_DBG_INIT, "BAR0 (@0x%08lX) size = 0x%lX bytes\n",
- (long unsigned int)pci_resource_start(pcidev, BAR_0),
- (long unsigned int)pci_resource_len(pcidev, BAR_0));
- nes_debug(NES_DBG_INIT, "BAR1 (@0x%08lX) size = 0x%lX bytes\n",
- (long unsigned int)pci_resource_start(pcidev, BAR_1),
- (long unsigned int)pci_resource_len(pcidev, BAR_1));
-
- /* Make sure PCI base addr are MMIO */
- if (!(pci_resource_flags(pcidev, BAR_0) & IORESOURCE_MEM) ||
- !(pci_resource_flags(pcidev, BAR_1) & IORESOURCE_MEM)) {
- printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
- ret = -ENODEV;
- goto bail1;
- }
-
- /* Reserve PCI I/O and memory resources */
- ret = pci_request_regions(pcidev, DRV_NAME);
- if (ret) {
- printk(KERN_ERR PFX "Unable to request regions. (%s)\n", pci_name(pcidev));
- goto bail1;
- }
-
- if ((sizeof(dma_addr_t) > 4)) {
- ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64));
- if (ret < 0) {
- printk(KERN_ERR PFX "64b DMA mask configuration failed\n");
- goto bail2;
- }
- ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(64));
- if (ret) {
- printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n");
- goto bail2;
- }
- } else {
- ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
- if (ret < 0) {
- printk(KERN_ERR PFX "32b DMA mask configuration failed\n");
- goto bail2;
- }
- ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32));
- if (ret) {
- printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n");
- goto bail2;
- }
- }
-
- pci_set_master(pcidev);
-
- /* Allocate hardware structure */
- nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL);
- if (!nesdev) {
- ret = -ENOMEM;
- goto bail2;
- }
-
- nes_debug(NES_DBG_INIT, "Allocated nes device at %p\n", nesdev);
- nesdev->pcidev = pcidev;
- pci_set_drvdata(pcidev, nesdev);
-
- pci_read_config_byte(pcidev, 0x0008, &hw_rev);
- nes_debug(NES_DBG_INIT, "hw_rev=%u\n", hw_rev);
-
- spin_lock_init(&nesdev->indexed_regs_lock);
-
- /* Remap the PCI registers in adapter BAR0 to kernel VA space */
- mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0),
- pci_resource_len(pcidev, BAR_0));
- if (mmio_regs == NULL) {
- printk(KERN_ERR PFX "Unable to remap BAR0\n");
- ret = -EIO;
- goto bail3;
- }
- nesdev->regs = mmio_regs;
- nesdev->index_reg = 0x50 + (PCI_FUNC(pcidev->devfn)*8) + mmio_regs;
-
- /* Ensure interrupts are disabled */
- nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
-
- if (nes_drv_opt & NES_DRV_OPT_ENABLE_MSI) {
- if (!pci_enable_msi(nesdev->pcidev)) {
- nesdev->msi_enabled = 1;
- nes_debug(NES_DBG_INIT, "MSI is enabled for device %s\n",
- pci_name(pcidev));
- } else {
- nes_debug(NES_DBG_INIT, "MSI is disabled by linux for device %s\n",
- pci_name(pcidev));
- }
- } else {
- nes_debug(NES_DBG_INIT, "MSI not requested due to driver options for device %s\n",
- pci_name(pcidev));
- }
-
- nesdev->csr_start = pci_resource_start(nesdev->pcidev, BAR_0);
- nesdev->doorbell_region = pci_resource_start(nesdev->pcidev, BAR_1);
-
- /* Init the adapter */
- nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev);
- if (!nesdev->nesadapter) {
- printk(KERN_ERR PFX "Unable to initialize adapter.\n");
- ret = -ENOMEM;
- goto bail5;
- }
- nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
- nesdev->nesadapter->wqm_quanta = wqm_quanta;
-
- /* nesdev->base_doorbell_index =
- nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */
- nesdev->base_doorbell_index = 1;
- nesdev->doorbell_start = nesdev->nesadapter->doorbell_start;
- if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
- switch (PCI_FUNC(nesdev->pcidev->devfn) %
- nesdev->nesadapter->port_count) {
- case 1:
- nesdev->mac_index = 2;
- break;
- case 2:
- nesdev->mac_index = 1;
- break;
- case 3:
- nesdev->mac_index = 3;
- break;
- case 0:
- default:
- nesdev->mac_index = 0;
- }
- } else {
- nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) %
- nesdev->nesadapter->port_count;
- }
-
- if ((limit_maxrdreqsz ||
- ((nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_GLADIUS) &&
- (hw_rev == NE020_REV1))) &&
- (pcie_get_readrq(pcidev) > 256)) {
- if (pcie_set_readrq(pcidev, 256))
- printk(KERN_ERR PFX "Unable to set max read request"
- " to 256 bytes\n");
- else
- nes_debug(NES_DBG_INIT, "Max read request size set"
- " to 256 bytes\n");
- }
-
- tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev);
-
- /* bring up the Control QP */
- if (nes_init_cqp(nesdev)) {
- ret = -ENODEV;
- goto bail6;
- }
-
- /* Arm the CCQ */
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- PCI_FUNC(nesdev->pcidev->devfn));
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
-
- /* Enable the interrupts */
- nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) |
- (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
- if (PCI_FUNC(nesdev->pcidev->devfn) < 4) {
- nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24));
- }
-
- /* TODO: This really should be the first driver to load, not function 0 */
- if (PCI_FUNC(nesdev->pcidev->devfn) == 0) {
- /* pick up PCI and critical errors if the first driver to load */
- nesdev->intf_int_req = NES_INTF_INT_PCIERR | NES_INTF_INT_CRITERR;
- nesdev->int_req |= NES_INT_INTF;
- } else {
- nesdev->intf_int_req = 0;
- }
- nesdev->intf_int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, 0);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 0);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS2, 0x00001265);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS4, 0x18021804);
-
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS3, 0x17801790);
-
- /* deal with both periodic and one_shot */
- nesdev->timer_int_req = 0x101 << PCI_FUNC(nesdev->pcidev->devfn);
- nesdev->nesadapter->timer_int_req |= nesdev->timer_int_req;
- nes_debug(NES_DBG_INIT, "setting int_req for function %u, nesdev = 0x%04X, adapter = 0x%04X\n",
- PCI_FUNC(nesdev->pcidev->devfn),
- nesdev->timer_int_req, nesdev->nesadapter->timer_int_req);
-
- nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
-
- list_add_tail(&nesdev->list, &nes_dev_list);
-
- /* Request an interrupt line for the driver */
- ret = request_irq(pcidev->irq, nes_interrupt, IRQF_SHARED, DRV_NAME, nesdev);
- if (ret) {
- printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
- pci_name(pcidev), pcidev->irq);
- goto bail65;
- }
-
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
-
- if (nes_notifiers_registered == 0) {
- register_inetaddr_notifier(&nes_inetaddr_notifier);
- register_netevent_notifier(&nes_net_notifier);
- }
- nes_notifiers_registered++;
-
- INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
-
- /* Initialize network devices */
- netdev = nes_netdev_init(nesdev, mmio_regs);
- if (netdev == NULL) {
- ret = -ENOMEM;
- goto bail7;
- }
-
- /* Register network device */
- ret = register_netdev(netdev);
- if (ret) {
- printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret);
- nes_netdev_destroy(netdev);
- goto bail7;
- }
-
- nes_print_macaddr(netdev);
-
- nesdev->netdev_count++;
- nesdev->nesadapter->netdev_count++;
-
- printk(KERN_INFO PFX "%s: NetEffect RNIC driver successfully loaded.\n",
- pci_name(pcidev));
- return 0;
-
- bail7:
- printk(KERN_ERR PFX "bail7\n");
- while (nesdev->netdev_count > 0) {
- nesdev->netdev_count--;
- nesdev->nesadapter->netdev_count--;
-
- unregister_netdev(nesdev->netdev[nesdev->netdev_count]);
- nes_netdev_destroy(nesdev->netdev[nesdev->netdev_count]);
- }
-
- nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
- nesdev->netdev_count, nesdev->nesadapter->netdev_count);
-
- nes_notifiers_registered--;
- if (nes_notifiers_registered == 0) {
- unregister_netevent_notifier(&nes_net_notifier);
- unregister_inetaddr_notifier(&nes_inetaddr_notifier);
- }
-
- list_del(&nesdev->list);
- nes_destroy_cqp(nesdev);
-
- bail65:
- printk(KERN_ERR PFX "bail65\n");
- free_irq(pcidev->irq, nesdev);
- if (nesdev->msi_enabled) {
- pci_disable_msi(pcidev);
- }
- bail6:
- printk(KERN_ERR PFX "bail6\n");
- tasklet_kill(&nesdev->dpc_tasklet);
- /* Deallocate the Adapter Structure */
- nes_destroy_adapter(nesdev->nesadapter);
-
- bail5:
- printk(KERN_ERR PFX "bail5\n");
- iounmap(nesdev->regs);
-
- bail3:
- printk(KERN_ERR PFX "bail3\n");
- kfree(nesdev);
-
- bail2:
- pci_release_regions(pcidev);
-
- bail1:
- pci_disable_device(pcidev);
-
- bail0:
- return ret;
-}
-
-
-/**
- * nes_remove - unload from kernel
- */
-static void nes_remove(struct pci_dev *pcidev)
-{
- struct nes_device *nesdev = pci_get_drvdata(pcidev);
- struct net_device *netdev;
- int netdev_index = 0;
- unsigned long flags;
-
- if (nesdev->netdev_count) {
- netdev = nesdev->netdev[netdev_index];
- if (netdev) {
- netif_stop_queue(netdev);
- unregister_netdev(netdev);
- nes_netdev_destroy(netdev);
-
- nesdev->netdev[netdev_index] = NULL;
- nesdev->netdev_count--;
- nesdev->nesadapter->netdev_count--;
- }
- }
-
- nes_notifiers_registered--;
- if (nes_notifiers_registered == 0) {
- unregister_netevent_notifier(&nes_net_notifier);
- unregister_inetaddr_notifier(&nes_inetaddr_notifier);
- }
-
- list_del(&nesdev->list);
- nes_destroy_cqp(nesdev);
-
- free_irq(pcidev->irq, nesdev);
- tasklet_kill(&nesdev->dpc_tasklet);
-
- spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
- if (nesdev->link_recheck) {
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
- cancel_delayed_work_sync(&nesdev->work);
- } else {
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
- }
-
- /* Deallocate the Adapter Structure */
- nes_destroy_adapter(nesdev->nesadapter);
-
- if (nesdev->msi_enabled) {
- pci_disable_msi(pcidev);
- }
-
- iounmap(nesdev->regs);
- kfree(nesdev);
-
- /* nes_debug(NES_DBG_SHUTDOWN, "calling pci_release_regions.\n"); */
- pci_release_regions(pcidev);
- pci_disable_device(pcidev);
- pci_set_drvdata(pcidev, NULL);
-}
-
-
-static ssize_t adapter_show(struct device_driver *ddp, char *buf)
-{
- unsigned int devfn = 0xffffffff;
- unsigned char bus_number = 0xff;
- unsigned int i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- devfn = nesdev->pcidev->devfn;
- bus_number = nesdev->pcidev->bus->number;
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn);
-}
-
-static ssize_t adapter_store(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
-
- ee_flsh_adapter = simple_strtoul(p, &p, 10);
- return strnlen(buf, count);
-}
-
-static ssize_t eeprom_cmd_show(struct device_driver *ddp, char *buf)
-{
- u32 eeprom_cmd = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- eeprom_cmd = nes_read32(nesdev->regs + NES_EEPROM_COMMAND);
- break;
- }
- i++;
- }
- return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_cmd);
-}
-
-static ssize_t eeprom_cmd_store(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + NES_EEPROM_COMMAND, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t eeprom_data_show(struct device_driver *ddp, char *buf)
-{
- u32 eeprom_data = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- eeprom_data = nes_read32(nesdev->regs + NES_EEPROM_DATA);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_data);
-}
-
-static ssize_t eeprom_data_store(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + NES_EEPROM_DATA, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t flash_cmd_show(struct device_driver *ddp, char *buf)
-{
- u32 flash_cmd = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- flash_cmd = nes_read32(nesdev->regs + NES_FLASH_COMMAND);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_cmd);
-}
-
-static ssize_t flash_cmd_store(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + NES_FLASH_COMMAND, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t flash_data_show(struct device_driver *ddp, char *buf)
-{
- u32 flash_data = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- flash_data = nes_read32(nesdev->regs + NES_FLASH_DATA);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_data);
-}
-
-static ssize_t flash_data_store(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + NES_FLASH_DATA, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t nonidx_addr_show(struct device_driver *ddp, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_nonidx_addr);
-}
-
-static ssize_t nonidx_addr_store(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X')
- sysfs_nonidx_addr = simple_strtoul(p, &p, 16);
-
- return strnlen(buf, count);
-}
-
-static ssize_t nonidx_data_show(struct device_driver *ddp, char *buf)
-{
- u32 nonidx_data = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nonidx_data = nes_read32(nesdev->regs + sysfs_nonidx_addr);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", nonidx_data);
-}
-
-static ssize_t nonidx_data_store(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + sysfs_nonidx_addr, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t idx_addr_show(struct device_driver *ddp, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_idx_addr);
-}
-
-static ssize_t idx_addr_store(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X')
- sysfs_idx_addr = simple_strtoul(p, &p, 16);
-
- return strnlen(buf, count);
-}
-
-static ssize_t idx_data_show(struct device_driver *ddp, char *buf)
-{
- u32 idx_data = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- idx_data = nes_read_indexed(nesdev, sysfs_idx_addr);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", idx_data);
-}
-
-static ssize_t idx_data_store(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write_indexed(nesdev, sysfs_idx_addr, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t wqm_quanta_show(struct device_driver *ddp, char *buf)
-{
- u32 wqm_quanta_value = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- wqm_quanta_value = nesdev->nesadapter->wqm_quanta;
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta_value);
-}
-
-static ssize_t wqm_quanta_store(struct device_driver *ddp, const char *buf,
- size_t count)
-{
- unsigned long wqm_quanta_value;
- u32 wqm_config1;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (kstrtoul(buf, 0, &wqm_quanta_value) < 0)
- return -EINVAL;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nesdev->nesadapter->wqm_quanta = wqm_quanta_value;
- wqm_config1 = nes_read_indexed(nesdev,
- NES_IDX_WQM_CONFIG1);
- nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1,
- ((wqm_quanta_value << 1) |
- (wqm_config1 & 0x00000001)));
- break;
- }
- i++;
- }
- return strnlen(buf, count);
-}
-
-static DRIVER_ATTR_RW(adapter);
-static DRIVER_ATTR_RW(eeprom_cmd);
-static DRIVER_ATTR_RW(eeprom_data);
-static DRIVER_ATTR_RW(flash_cmd);
-static DRIVER_ATTR_RW(flash_data);
-static DRIVER_ATTR_RW(nonidx_addr);
-static DRIVER_ATTR_RW(nonidx_data);
-static DRIVER_ATTR_RW(idx_addr);
-static DRIVER_ATTR_RW(idx_data);
-static DRIVER_ATTR_RW(wqm_quanta);
-
-static struct attribute *nes_attrs[] = {
- &driver_attr_adapter.attr,
- &driver_attr_eeprom_cmd.attr,
- &driver_attr_eeprom_data.attr,
- &driver_attr_flash_cmd.attr,
- &driver_attr_flash_data.attr,
- &driver_attr_nonidx_addr.attr,
- &driver_attr_nonidx_data.attr,
- &driver_attr_idx_addr.attr,
- &driver_attr_idx_data.attr,
- &driver_attr_wqm_quanta.attr,
- NULL,
-};
-ATTRIBUTE_GROUPS(nes);
-
-static struct pci_driver nes_pci_driver = {
- .name = DRV_NAME,
- .id_table = nes_pci_table,
- .probe = nes_probe,
- .remove = nes_remove,
- .groups = nes_groups,
-};
-
-
-/**
- * nes_init_module - module initialization entry point
- */
-static int __init nes_init_module(void)
-{
- int retval;
-
- retval = nes_cm_start();
- if (retval) {
- printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n");
- return retval;
- }
- return pci_register_driver(&nes_pci_driver);
-}
-
-
-/**
- * nes_exit_module - module unload entry point
- */
-static void __exit nes_exit_module(void)
-{
- nes_cm_stop();
-
- pci_unregister_driver(&nes_pci_driver);
-}
-
-
-module_init(nes_init_module);
-module_exit(nes_exit_module);
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
deleted file mode 100644
index a895fe980d10..000000000000
--- a/drivers/infiniband/hw/nes/nes.h
+++ /dev/null
@@ -1,574 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __NES_H
-#define __NES_H
-
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/slab.h>
-#include <asm/io.h>
-#include <linux/crc32c.h>
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-#include <rdma/rdma_cm.h>
-#include <rdma/iw_cm.h>
-#include <rdma/rdma_netlink.h>
-#include <rdma/iw_portmap.h>
-
-#define NES_SEND_FIRST_WRITE
-
-#define QUEUE_DISCONNECTS
-
-#define DRV_NAME "iw_nes"
-#define DRV_VERSION "1.5.0.1"
-#define PFX DRV_NAME ": "
-
-/*
- * NetEffect PCI vendor id and NE010 PCI device id.
- */
-#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */
-#define PCI_VENDOR_ID_NETEFFECT 0x1678
-#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
-#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110
-#endif
-
-#define NE020_REV 4
-#define NE020_REV1 5
-
-#define BAR_0 0
-#define BAR_1 2
-
-#define RX_BUF_SIZE (1536 + 8)
-#define NES_REG0_SIZE (4 * 1024)
-#define NES_TX_TIMEOUT (6*HZ)
-#define NES_FIRST_QPN 64
-#define NES_SW_CONTEXT_ALIGN 1024
-
-#define NES_MAX_MTU 9000
-
-#define NES_NIC_MAX_NICS 16
-#define NES_MAX_ARP_TABLE_SIZE 4096
-
-#define NES_NIC_CEQ_SIZE 8
-/* NICs will be on a separate CQ */
-#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32)
-
-#define NES_MAX_PORT_COUNT 4
-
-#define MAX_DPC_ITERATIONS 128
-
-#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001
-#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002
-#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
-#define NES_DRV_OPT_DISABLE_INTF 0x00000008
-#define NES_DRV_OPT_ENABLE_MSI 0x00000010
-#define NES_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020
-#define NES_DRV_OPT_SUPRESS_OPTION_BC 0x00000040
-#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080
-#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100
-#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
-#define NES_DRV_OPT_ENABLE_PAU 0x00000400
-
-#define NES_AEQ_EVENT_TIMEOUT 2500
-#define NES_DISCONNECT_EVENT_TIMEOUT 2000
-
-/* debug levels */
-/* must match userspace */
-#define NES_DBG_HW 0x00000001
-#define NES_DBG_INIT 0x00000002
-#define NES_DBG_ISR 0x00000004
-#define NES_DBG_PHY 0x00000008
-#define NES_DBG_NETDEV 0x00000010
-#define NES_DBG_CM 0x00000020
-#define NES_DBG_CM1 0x00000040
-#define NES_DBG_NIC_RX 0x00000080
-#define NES_DBG_NIC_TX 0x00000100
-#define NES_DBG_CQP 0x00000200
-#define NES_DBG_MMAP 0x00000400
-#define NES_DBG_MR 0x00000800
-#define NES_DBG_PD 0x00001000
-#define NES_DBG_CQ 0x00002000
-#define NES_DBG_QP 0x00004000
-#define NES_DBG_MOD_QP 0x00008000
-#define NES_DBG_AEQ 0x00010000
-#define NES_DBG_IW_RX 0x00020000
-#define NES_DBG_IW_TX 0x00040000
-#define NES_DBG_SHUTDOWN 0x00080000
-#define NES_DBG_PAU 0x00100000
-#define NES_DBG_NLMSG 0x00200000
-#define NES_DBG_RSVD1 0x10000000
-#define NES_DBG_RSVD2 0x20000000
-#define NES_DBG_RSVD3 0x40000000
-#define NES_DBG_RSVD4 0x80000000
-#define NES_DBG_ALL 0xffffffff
-
-#ifdef CONFIG_INFINIBAND_NES_DEBUG
-#define nes_debug(level, fmt, args...) \
-do { \
- if (level & nes_debug_level) \
- printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
-} while (0)
-
-#define NES_EVENT_TIMEOUT 1200000
-#else
-#define nes_debug(level, fmt, args...) no_printk(fmt, ##args)
-
-#define NES_EVENT_TIMEOUT 100000
-#endif
-
-#include "nes_hw.h"
-#include "nes_verbs.h"
-#include "nes_context.h"
-#include <rdma/nes-abi.h>
-#include "nes_cm.h"
-#include "nes_mgt.h"
-
-extern int interrupt_mod_interval;
-extern int nes_if_count;
-extern int mpa_version;
-extern int disable_mpa_crc;
-extern unsigned int nes_drv_opt;
-extern unsigned int nes_debug_level;
-extern unsigned int wqm_quanta;
-extern struct list_head nes_adapter_list;
-
-extern atomic_t cm_connects;
-extern atomic_t cm_accepts;
-extern atomic_t cm_disconnects;
-extern atomic_t cm_closes;
-extern atomic_t cm_connecteds;
-extern atomic_t cm_connect_reqs;
-extern atomic_t cm_rejects;
-extern atomic_t mod_qp_timouts;
-extern atomic_t qps_created;
-extern atomic_t qps_destroyed;
-extern atomic_t sw_qps_destroyed;
-extern u32 mh_detected;
-extern u32 mh_pauses_sent;
-extern u32 cm_packets_sent;
-extern u32 cm_packets_bounced;
-extern u32 cm_packets_created;
-extern u32 cm_packets_received;
-extern u32 cm_packets_dropped;
-extern u32 cm_packets_retrans;
-extern atomic_t cm_listens_created;
-extern atomic_t cm_listens_destroyed;
-extern u32 cm_backlog_drops;
-extern atomic_t cm_loopbacks;
-extern atomic_t cm_nodes_created;
-extern atomic_t cm_nodes_destroyed;
-extern atomic_t cm_accel_dropped_pkts;
-extern atomic_t cm_resets_recvd;
-extern atomic_t pau_qps_created;
-extern atomic_t pau_qps_destroyed;
-
-extern u32 int_mod_timer_init;
-extern u32 int_mod_cq_depth_256;
-extern u32 int_mod_cq_depth_128;
-extern u32 int_mod_cq_depth_32;
-extern u32 int_mod_cq_depth_24;
-extern u32 int_mod_cq_depth_16;
-extern u32 int_mod_cq_depth_4;
-extern u32 int_mod_cq_depth_1;
-
-struct nes_device {
- struct nes_adapter *nesadapter;
- void __iomem *regs;
- void __iomem *index_reg;
- struct pci_dev *pcidev;
- struct net_device *netdev[NES_NIC_MAX_NICS];
- u64 link_status_interrupts;
- struct tasklet_struct dpc_tasklet;
- spinlock_t indexed_regs_lock;
- unsigned long csr_start;
- unsigned long doorbell_region;
- unsigned long doorbell_start;
- unsigned long mac_tx_errors;
- unsigned long mac_pause_frames_sent;
- unsigned long mac_pause_frames_received;
- unsigned long mac_rx_errors;
- unsigned long mac_rx_crc_errors;
- unsigned long mac_rx_symbol_err_frames;
- unsigned long mac_rx_jabber_frames;
- unsigned long mac_rx_oversized_frames;
- unsigned long mac_rx_short_frames;
- unsigned long port_rx_discards;
- unsigned long port_tx_discards;
- unsigned int mac_index;
- unsigned int nes_stack_start;
-
- /* Control Structures */
- void *cqp_vbase;
- dma_addr_t cqp_pbase;
- u32 cqp_mem_size;
- u8 ceq_index;
- u8 nic_ceq_index;
- struct nes_hw_cqp cqp;
- struct nes_hw_cq ccq;
- struct list_head cqp_avail_reqs;
- struct list_head cqp_pending_reqs;
- struct nes_cqp_request *nes_cqp_requests;
-
- u32 int_req;
- u32 int_stat;
- u32 timer_int_req;
- u32 timer_only_int_count;
- u32 intf_int_req;
- u32 last_mac_tx_pauses;
- u32 last_used_chunks_tx;
- struct list_head list;
-
- u16 base_doorbell_index;
- u16 currcq_count;
- u16 deepcq_count;
- u8 iw_status;
- u8 msi_enabled;
- u8 netdev_count;
- u8 napi_isr_ran;
- u8 disable_rx_flow_control;
- u8 disable_tx_flow_control;
-
- struct delayed_work work;
- u8 link_recheck;
-};
-
-/* Receive skb private area - must fit in skb->cb area */
-struct nes_rskb_cb {
- u64 busaddr;
- u32 maplen;
- u32 seqnum;
- u8 *data_start;
- struct nes_qp *nesqp;
-};
-
-static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad)
-{
- u32 crc_value;
- crc_value = crc32c(~0, (void *)nes_quad, sizeof (struct nes_v4_quad));
-
- /*
- * With commit ef19454b ("[LIB] crc32c: Keep intermediate crc
- * state in cpu order"), behavior of crc32c changes on
- * big-endian platforms. Our algorithm expects the previous
- * behavior; otherwise we have RDMA connection establishment
- * issue on big-endian.
- */
- return cpu_to_le32(crc_value);
-}
-
-static inline void
-set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value)
-{
- wqe_words[index] = cpu_to_le32((u32) value);
- wqe_words[index + 1] = cpu_to_le32(upper_32_bits(value));
-}
-
-static inline void
-set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value)
-{
- wqe_words[index] = cpu_to_le32(value);
-}
-
-static inline void
-nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev)
-{
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_LEN_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_HIGH_IDX] = 0;
-}
-
-static inline void
-nes_fill_init_qp_wqe(struct nes_hw_qp_wqe *wqe, struct nes_qp *nesqp, u32 head)
-{
- u32 value;
- value = ((u32)((unsigned long) nesqp)) | head;
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX,
- (u32)(upper_32_bits((unsigned long)(nesqp))));
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, value);
-}
-
-/* Read from memory-mapped device */
-static inline u32 nes_read_indexed(struct nes_device *nesdev, u32 reg_index)
-{
- unsigned long flags;
- void __iomem *addr = nesdev->index_reg;
- u32 value;
-
- spin_lock_irqsave(&nesdev->indexed_regs_lock, flags);
-
- writel(reg_index, addr);
- value = readl((void __iomem *)addr + 4);
-
- spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
- return value;
-}
-
-static inline u32 nes_read32(const void __iomem *addr)
-{
- return readl(addr);
-}
-
-static inline u16 nes_read16(const void __iomem *addr)
-{
- return readw(addr);
-}
-
-static inline u8 nes_read8(const void __iomem *addr)
-{
- return readb(addr);
-}
-
-/* Write to memory-mapped device */
-static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u32 val)
-{
- unsigned long flags;
- void __iomem *addr = nesdev->index_reg;
-
- spin_lock_irqsave(&nesdev->indexed_regs_lock, flags);
-
- writel(reg_index, addr);
- writel(val, (void __iomem *)addr + 4);
-
- spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
-}
-
-static inline void nes_write32(void __iomem *addr, u32 val)
-{
- writel(val, addr);
-}
-
-static inline void nes_write16(void __iomem *addr, u16 val)
-{
- writew(val, addr);
-}
-
-static inline void nes_write8(void __iomem *addr, u8 val)
-{
- writeb(val, addr);
-}
-
-enum nes_resource {
- NES_RESOURCE_MW = 1,
- NES_RESOURCE_FAST_MR,
- NES_RESOURCE_PHYS_MR,
- NES_RESOURCE_USER_MR,
- NES_RESOURCE_PD,
- NES_RESOURCE_QP,
- NES_RESOURCE_CQ,
- NES_RESOURCE_ARP
-};
-
-static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
- unsigned long *resource_array, u32 max_resources,
- u32 *req_resource_num, u32 *next, enum nes_resource resource_type)
-{
- unsigned long flags;
- u32 resource_num;
-
- spin_lock_irqsave(&nesadapter->resource_lock, flags);
-
- resource_num = find_next_zero_bit(resource_array, max_resources, *next);
- if (resource_num >= max_resources) {
- resource_num = find_first_zero_bit(resource_array, max_resources);
- if (resource_num >= max_resources) {
- printk(KERN_ERR PFX "%s: No available resources [type=%u].\n", __func__, resource_type);
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
- return -EMFILE;
- }
- }
- set_bit(resource_num, resource_array);
- *next = resource_num+1;
- if (*next == max_resources) {
- *next = 0;
- }
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
- *req_resource_num = resource_num;
-
- return 0;
-}
-
-static inline int nes_is_resource_allocated(struct nes_adapter *nesadapter,
- unsigned long *resource_array, u32 resource_num)
-{
- unsigned long flags;
- int bit_is_set;
-
- spin_lock_irqsave(&nesadapter->resource_lock, flags);
-
- bit_is_set = test_bit(resource_num, resource_array);
- nes_debug(NES_DBG_HW, "resource_num %u is%s allocated.\n",
- resource_num, (bit_is_set ? "": " not"));
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
-
- return bit_is_set;
-}
-
-static inline void nes_free_resource(struct nes_adapter *nesadapter,
- unsigned long *resource_array, u32 resource_num)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&nesadapter->resource_lock, flags);
- clear_bit(resource_num, resource_array);
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
-}
-
-static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev)
-{
- return container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic;
-}
-
-static inline struct nes_pd *to_nespd(struct ib_pd *ibpd)
-{
- return container_of(ibpd, struct nes_pd, ibpd);
-}
-
-static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext)
-{
- return container_of(ibucontext, struct nes_ucontext, ibucontext);
-}
-
-static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr)
-{
- return container_of(ibmr, struct nes_mr, ibmr);
-}
-
-static inline struct nes_mr *to_nesmr_from_ibfmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct nes_mr, ibfmr);
-}
-
-static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw)
-{
- return container_of(ibmw, struct nes_mr, ibmw);
-}
-
-static inline struct nes_fmr *to_nesfmr(struct nes_mr *nesmr)
-{
- return container_of(nesmr, struct nes_fmr, nesmr);
-}
-
-static inline struct nes_cq *to_nescq(struct ib_cq *ibcq)
-{
- return container_of(ibcq, struct nes_cq, ibcq);
-}
-
-static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct nes_qp, ibqp);
-}
-
-
-
-/* nes.c */
-void nes_add_ref(struct ib_qp *);
-void nes_rem_ref(struct ib_qp *);
-struct ib_qp *nes_get_qp(struct ib_device *, int);
-
-
-/* nes_hw.c */
-struct nes_adapter *nes_init_adapter(struct nes_device *, u8);
-void nes_nic_init_timer_defaults(struct nes_device *, u8);
-void nes_destroy_adapter(struct nes_adapter *);
-int nes_init_cqp(struct nes_device *);
-int nes_init_phy(struct nes_device *);
-int nes_init_nic_qp(struct nes_device *, struct net_device *);
-void nes_destroy_nic_qp(struct nes_vnic *);
-int nes_napi_isr(struct nes_device *);
-void nes_dpc(unsigned long);
-void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *);
-void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
-int nes_destroy_cqp(struct nes_device *);
-int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
-void nes_recheck_link_status(struct work_struct *work);
-void nes_terminate_timeout(struct timer_list *t);
-
-/* nes_nic.c */
-struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);
-void nes_netdev_destroy(struct net_device *);
-int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
-
-/* nes_cm.c */
-void *nes_cm_create(struct net_device *);
-int nes_cm_recv(struct sk_buff *, struct net_device *);
-void nes_update_arp(unsigned char *, u32, u32, u16, u16);
-void nes_manage_arp_cache(struct net_device *, unsigned char *, u32, u32);
-void nes_sock_release(struct nes_qp *, unsigned long *);
-void flush_wqes(struct nes_device *nesdev, struct nes_qp *, u32, u32);
-int nes_manage_apbvt(struct nes_vnic *, u32, u32, u32);
-int nes_cm_disconn(struct nes_qp *);
-void nes_cm_disconn_worker(void *);
-
-/* nes_verbs.c */
-int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32);
-int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
-struct nes_ib_device *nes_init_ofa_device(struct net_device *);
-void nes_port_ibevent(struct nes_vnic *nesvnic);
-void nes_destroy_ofa_device(struct nes_ib_device *);
-int nes_register_ofa_device(struct nes_ib_device *);
-
-/* nes_util.c */
-int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *);
-void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16);
-void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
-void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16);
-void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16);
-struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
-void nes_free_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request);
-void nes_put_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request);
-void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *);
-int nes_arp_table(struct nes_device *, u32, u8 *, u32);
-void nes_mh_fix(struct timer_list *t);
-void nes_clc(struct timer_list *t);
-void nes_dump_mem(unsigned int, void *, int);
-u32 nes_crc32(u32, u32, u32, u32, u8 *, u32, u32, u32);
-
-#endif /* __NES_H */
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
deleted file mode 100644
index 62bf986eba67..000000000000
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ /dev/null
@@ -1,3992 +0,0 @@
-/*
- * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-
-#define TCPOPT_TIMESTAMP 8
-
-#include <linux/atomic.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/if_arp.h>
-#include <linux/if_vlan.h>
-#include <linux/notifier.h>
-#include <linux/net.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/etherdevice.h>
-#include <linux/netdevice.h>
-#include <linux/random.h>
-#include <linux/list.h>
-#include <linux/threads.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <net/arp.h>
-#include <net/neighbour.h>
-#include <net/route.h>
-#include <net/ip_fib.h>
-#include <net/secure_seq.h>
-#include <net/tcp.h>
-#include <linux/fcntl.h>
-
-#include "nes.h"
-
-u32 cm_packets_sent;
-u32 cm_packets_bounced;
-u32 cm_packets_dropped;
-u32 cm_packets_retrans;
-u32 cm_packets_created;
-u32 cm_packets_received;
-atomic_t cm_listens_created;
-atomic_t cm_listens_destroyed;
-u32 cm_backlog_drops;
-atomic_t cm_loopbacks;
-atomic_t cm_nodes_created;
-atomic_t cm_nodes_destroyed;
-atomic_t cm_accel_dropped_pkts;
-atomic_t cm_resets_recvd;
-
-static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *);
-static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *);
-static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
-static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *);
-static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *);
-static int mini_cm_dealloc_core(struct nes_cm_core *);
-static int mini_cm_get(struct nes_cm_core *);
-static int mini_cm_set(struct nes_cm_core *, u32, u32);
-
-static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8);
-static int add_ref_cm_node(struct nes_cm_node *);
-static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
-
-static int nes_cm_disconn_true(struct nes_qp *);
-static int nes_cm_post_event(struct nes_cm_event *event);
-static int nes_disconnect(struct nes_qp *nesqp, int abrupt);
-static void nes_disconnect_worker(struct work_struct *work);
-
-static int send_mpa_request(struct nes_cm_node *, struct sk_buff *);
-static int send_mpa_reject(struct nes_cm_node *);
-static int send_syn(struct nes_cm_node *, u32, struct sk_buff *);
-static int send_reset(struct nes_cm_node *, struct sk_buff *);
-static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb);
-static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb);
-static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *);
-
-static void active_open_err(struct nes_cm_node *, struct sk_buff *, int);
-static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int);
-static void cleanup_retrans_entry(struct nes_cm_node *);
-static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *);
-static void free_retrans_entry(struct nes_cm_node *cm_node);
-static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive);
-
-/* CM event handler functions */
-static void cm_event_connected(struct nes_cm_event *);
-static void cm_event_connect_error(struct nes_cm_event *);
-static void cm_event_reset(struct nes_cm_event *);
-static void cm_event_mpa_req(struct nes_cm_event *);
-static void cm_event_mpa_reject(struct nes_cm_event *);
-static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node);
-
-/* MPA build functions */
-static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8);
-static void build_mpa_v2(struct nes_cm_node *, void *, u8);
-static void build_mpa_v1(struct nes_cm_node *, void *, u8);
-static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
-
-static void print_core(struct nes_cm_core *core);
-static void record_ird_ord(struct nes_cm_node *, u16, u16);
-
-/* External CM API Interface */
-/* instance of function pointers for client API */
-/* set address of this instance to cm_core->cm_ops at cm_core alloc */
-static const struct nes_cm_ops nes_cm_api = {
- .accelerated = mini_cm_accelerated,
- .listen = mini_cm_listen,
- .stop_listener = mini_cm_del_listen,
- .connect = mini_cm_connect,
- .close = mini_cm_close,
- .accept = mini_cm_accept,
- .reject = mini_cm_reject,
- .recv_pkt = mini_cm_recv_pkt,
- .destroy_cm_core = mini_cm_dealloc_core,
- .get = mini_cm_get,
- .set = mini_cm_set
-};
-
-static struct nes_cm_core *g_cm_core;
-
-atomic_t cm_connects;
-atomic_t cm_accepts;
-atomic_t cm_disconnects;
-atomic_t cm_closes;
-atomic_t cm_connecteds;
-atomic_t cm_connect_reqs;
-atomic_t cm_rejects;
-
-int nes_add_ref_cm_node(struct nes_cm_node *cm_node)
-{
- return add_ref_cm_node(cm_node);
-}
-
-int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
-{
- return rem_ref_cm_node(cm_node->cm_core, cm_node);
-}
-/**
- * create_event
- */
-static struct nes_cm_event *create_event(struct nes_cm_node * cm_node,
- enum nes_cm_event_type type)
-{
- struct nes_cm_event *event;
-
- if (!cm_node->cm_id)
- return NULL;
-
- /* allocate an empty event */
- event = kzalloc(sizeof(*event), GFP_ATOMIC);
-
- if (!event)
- return NULL;
-
- event->type = type;
- event->cm_node = cm_node;
- event->cm_info.rem_addr = cm_node->rem_addr;
- event->cm_info.loc_addr = cm_node->loc_addr;
- event->cm_info.rem_port = cm_node->rem_port;
- event->cm_info.loc_port = cm_node->loc_port;
- event->cm_info.cm_id = cm_node->cm_id;
-
- nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, "
- "dst_addr=%08x[%x], src_addr=%08x[%x]\n",
- cm_node, event, type, event->cm_info.loc_addr,
- event->cm_info.loc_port, event->cm_info.rem_addr,
- event->cm_info.rem_port);
-
- nes_cm_post_event(event);
- return event;
-}
-
-
-/**
- * send_mpa_request
- */
-static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- u8 start_addr = 0;
- u8 *start_ptr = &start_addr;
- u8 **start_buff = &start_ptr;
- u16 buff_len = 0;
-
- if (!skb) {
- nes_debug(NES_DBG_CM, "skb set to NULL\n");
- return -1;
- }
-
- /* send an MPA Request frame */
- cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST);
- form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK);
-
- return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
-}
-
-
-
-static int send_mpa_reject(struct nes_cm_node *cm_node)
-{
- struct sk_buff *skb = NULL;
- u8 start_addr = 0;
- u8 *start_ptr = &start_addr;
- u8 **start_buff = &start_ptr;
- u16 buff_len = 0;
- struct ietf_mpa_v1 *mpa_frame;
-
- skb = dev_alloc_skb(MAX_CM_BUFFER);
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -ENOMEM;
- }
-
- /* send an MPA reject frame */
- cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY);
- mpa_frame = (struct ietf_mpa_v1 *)*start_buff;
- mpa_frame->flags |= IETF_MPA_FLAGS_REJECT;
- form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN);
-
- cm_node->state = NES_CM_STATE_FIN_WAIT1;
- return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
-}
-
-
-/**
- * recv_mpa - process a received TCP pkt, we are expecting an
- * IETF MPA frame
- */
-static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
- u32 len)
-{
- struct ietf_mpa_v1 *mpa_frame;
- struct ietf_mpa_v2 *mpa_v2_frame;
- struct ietf_rtr_msg *rtr_msg;
- int mpa_hdr_len;
- int priv_data_len;
-
- *type = NES_MPA_REQUEST_ACCEPT;
-
- /* assume req frame is in tcp data payload */
- if (len < sizeof(struct ietf_mpa_v1)) {
- nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len);
- return -EINVAL;
- }
-
- /* points to the beginning of the frame, which could be MPA V1 or V2 */
- mpa_frame = (struct ietf_mpa_v1 *)buffer;
- mpa_hdr_len = sizeof(struct ietf_mpa_v1);
- priv_data_len = ntohs(mpa_frame->priv_data_len);
-
- /* make sure mpa private data len is less than 512 bytes */
- if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
- nes_debug(NES_DBG_CM, "The received Length of Private"
- " Data field exceeds 512 octets\n");
- return -EINVAL;
- }
- /*
- * make sure MPA receiver interoperate with the
- * received MPA version and MPA key information
- *
- */
- if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
- nes_debug(NES_DBG_CM, "The received mpa version"
- " is not supported\n");
- return -EINVAL;
- }
- /*
- * backwards compatibility only
- */
- if (mpa_frame->rev > cm_node->mpa_frame_rev) {
- nes_debug(NES_DBG_CM, "The received mpa version"
- " can not be interoperated\n");
- return -EINVAL;
- } else {
- cm_node->mpa_frame_rev = mpa_frame->rev;
- }
-
- if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
- if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
- nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
- return -EINVAL;
- }
- } else {
- if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
- nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
- return -EINVAL;
- }
- }
-
- if (priv_data_len + mpa_hdr_len != len) {
- nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
- " complete (%x + %x != %x)\n",
- priv_data_len, mpa_hdr_len, len);
- return -EINVAL;
- }
- /* make sure it does not exceed the max size */
- if (len > MAX_CM_BUFFER) {
- nes_debug(NES_DBG_CM, "The received ietf buffer was too large"
- " (%x + %x != %x)\n",
- priv_data_len, mpa_hdr_len, len);
- return -EINVAL;
- }
-
- cm_node->mpa_frame_size = priv_data_len;
-
- switch (mpa_frame->rev) {
- case IETF_MPA_V2: {
- u16 ird_size;
- u16 ord_size;
- u16 rtr_ctrl_ird;
- u16 rtr_ctrl_ord;
-
- mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
- mpa_hdr_len += IETF_RTR_MSG_SIZE;
- cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE;
- rtr_msg = &mpa_v2_frame->rtr_msg;
-
- /* parse rtr message */
- rtr_ctrl_ird = ntohs(rtr_msg->ctrl_ird);
- rtr_ctrl_ord = ntohs(rtr_msg->ctrl_ord);
- ird_size = rtr_ctrl_ird & IETF_NO_IRD_ORD;
- ord_size = rtr_ctrl_ord & IETF_NO_IRD_ORD;
-
- if (!(rtr_ctrl_ird & IETF_PEER_TO_PEER)) {
- /* send reset */
- return -EINVAL;
- }
- if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD)
- cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
-
- if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) {
- /* responder */
- if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
- /* we are still negotiating */
- if (ord_size > NES_MAX_IRD) {
- cm_node->ird_size = NES_MAX_IRD;
- } else {
- cm_node->ird_size = ord_size;
- if (ord_size == 0 &&
- (rtr_ctrl_ord & IETF_RDMA0_READ)) {
- cm_node->ird_size = 1;
- nes_debug(NES_DBG_CM,
- "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n",
- __func__, ord_size);
- }
- }
- if (ird_size > NES_MAX_ORD)
- cm_node->ord_size = NES_MAX_ORD;
- else
- cm_node->ord_size = ird_size;
- } else { /* initiator */
- if (ord_size > NES_MAX_IRD) {
- nes_debug(NES_DBG_CM,
- "%s: Unable to support the requested (ord =%u)\n",
- __func__, ord_size);
- return -EINVAL;
- }
- cm_node->ird_size = ord_size;
-
- if (ird_size > NES_MAX_ORD) {
- cm_node->ord_size = NES_MAX_ORD;
- } else {
- if (ird_size == 0 &&
- (rtr_ctrl_ord & IETF_RDMA0_READ)) {
- nes_debug(NES_DBG_CM,
- "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n",
- __func__, ird_size);
- return -EINVAL;
- } else {
- cm_node->ord_size = ird_size;
- }
- }
- }
- }
-
- if (rtr_ctrl_ord & IETF_RDMA0_READ) {
- cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
-
- } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) {
- cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
- } else { /* Not supported RDMA0 operation */
- return -EINVAL;
- }
- break;
- }
- case IETF_MPA_V1:
- default:
- break;
- }
-
- /* copy entire MPA frame to our cm_node's frame */
- memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size);
-
- if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
- *type = NES_MPA_REQUEST_REJECT;
- return 0;
-}
-
-
-/**
- * form_cm_frame - get a free packet and build empty frame Use
- * node info to build.
- */
-static void form_cm_frame(struct sk_buff *skb,
- struct nes_cm_node *cm_node, void *options, u32 optionsize,
- void *data, u32 datasize, u8 flags)
-{
- struct tcphdr *tcph;
- struct iphdr *iph;
- struct ethhdr *ethh;
- u8 *buf;
- u16 packetsize = sizeof(*iph);
-
- packetsize += sizeof(*tcph);
- packetsize += optionsize + datasize;
-
- skb_trim(skb, 0);
- memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph));
-
- buf = skb_put(skb, packetsize + ETH_HLEN);
-
- ethh = (struct ethhdr *)buf;
- buf += ETH_HLEN;
-
- iph = (struct iphdr *)buf;
- buf += sizeof(*iph);
- tcph = (struct tcphdr *)buf;
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, ETH_HLEN);
- skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph));
- buf += sizeof(*tcph);
-
- skb->ip_summed = CHECKSUM_PARTIAL;
- if (!(cm_node->netdev->features & NETIF_F_IP_CSUM))
- skb->ip_summed = CHECKSUM_NONE;
- skb->protocol = htons(0x800);
- skb->data_len = 0;
- skb->mac_len = ETH_HLEN;
-
- memcpy(ethh->h_dest, cm_node->rem_mac, ETH_ALEN);
- memcpy(ethh->h_source, cm_node->loc_mac, ETH_ALEN);
- ethh->h_proto = htons(0x0800);
-
- iph->version = IPVERSION;
- iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
- iph->tos = 0;
- iph->tot_len = htons(packetsize);
- iph->id = htons(++cm_node->tcp_cntxt.loc_id);
-
- iph->frag_off = htons(0x4000);
- iph->ttl = 0x40;
- iph->protocol = 0x06; /* IPPROTO_TCP */
-
- iph->saddr = htonl(cm_node->loc_addr);
- iph->daddr = htonl(cm_node->rem_addr);
-
- tcph->source = htons(cm_node->loc_port);
- tcph->dest = htons(cm_node->rem_port);
- tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
-
- if (flags & SET_ACK) {
- cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
- tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
- tcph->ack = 1;
- } else {
- tcph->ack_seq = 0;
- }
-
- if (flags & SET_SYN) {
- cm_node->tcp_cntxt.loc_seq_num++;
- tcph->syn = 1;
- } else {
- cm_node->tcp_cntxt.loc_seq_num += datasize;
- }
-
- if (flags & SET_FIN) {
- cm_node->tcp_cntxt.loc_seq_num++;
- tcph->fin = 1;
- }
-
- if (flags & SET_RST)
- tcph->rst = 1;
-
- tcph->doff = (u16)((sizeof(*tcph) + optionsize + 3) >> 2);
- tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
- tcph->urg_ptr = 0;
- if (optionsize)
- memcpy(buf, options, optionsize);
- buf += optionsize;
- if (datasize)
- memcpy(buf, data, datasize);
-
- skb_shinfo(skb)->nr_frags = 0;
- cm_packets_created++;
-}
-
-/**
- * print_core - dump a cm core
- */
-static void print_core(struct nes_cm_core *core)
-{
- nes_debug(NES_DBG_CM, "---------------------------------------------\n");
- nes_debug(NES_DBG_CM, "CM Core -- (core = %p )\n", core);
- if (!core)
- return;
- nes_debug(NES_DBG_CM, "---------------------------------------------\n");
-
- nes_debug(NES_DBG_CM, "State : %u \n", core->state);
-
- nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt));
- nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt));
-
- nes_debug(NES_DBG_CM, "core : %p \n", core);
-
- nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
-}
-
-static void record_ird_ord(struct nes_cm_node *cm_node,
- u16 conn_ird, u16 conn_ord)
-{
- if (conn_ird > NES_MAX_IRD)
- conn_ird = NES_MAX_IRD;
-
- if (conn_ord > NES_MAX_ORD)
- conn_ord = NES_MAX_ORD;
-
- cm_node->ird_size = conn_ird;
- cm_node->ord_size = conn_ord;
-}
-
-/**
- * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
- */
-static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff,
- u16 *buff_len, u8 *pci_mem, u8 mpa_key)
-{
- int ret = 0;
-
- *start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0];
-
- switch (cm_node->mpa_frame_rev) {
- case IETF_MPA_V1:
- *start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg);
- *buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size;
- build_mpa_v1(cm_node, *start_buff, mpa_key);
- break;
- case IETF_MPA_V2:
- *buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size;
- build_mpa_v2(cm_node, *start_buff, mpa_key);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-
-/**
- * build_mpa_v2 - build a MPA V2 frame
- */
-static void build_mpa_v2(struct nes_cm_node *cm_node,
- void *start_addr, u8 mpa_key)
-{
- struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
- struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
- u16 ctrl_ird;
- u16 ctrl_ord;
-
- /* initialize the upper 5 bytes of the frame */
- build_mpa_v1(cm_node, start_addr, mpa_key);
- mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */
- mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
-
- /* initialize RTR msg */
- if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
- ctrl_ird = IETF_NO_IRD_ORD;
- ctrl_ord = IETF_NO_IRD_ORD;
- } else {
- ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD;
- ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
- }
- ctrl_ird |= IETF_PEER_TO_PEER;
-
- switch (mpa_key) {
- case MPA_KEY_REQUEST:
- ctrl_ord |= IETF_RDMA0_WRITE;
- ctrl_ord |= IETF_RDMA0_READ;
- break;
- case MPA_KEY_REPLY:
- switch (cm_node->send_rdma0_op) {
- case SEND_RDMA_WRITE_ZERO:
- ctrl_ord |= IETF_RDMA0_WRITE;
- break;
- case SEND_RDMA_READ_ZERO:
- ctrl_ord |= IETF_RDMA0_READ;
- break;
- }
- }
- rtr_msg->ctrl_ird = htons(ctrl_ird);
- rtr_msg->ctrl_ord = htons(ctrl_ord);
-}
-
-/**
- * build_mpa_v1 - build a MPA V1 frame
- */
-static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key)
-{
- struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
-
- switch (mpa_key) {
- case MPA_KEY_REQUEST:
- memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
- break;
- case MPA_KEY_REPLY:
- memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
- break;
- }
- mpa_frame->flags = IETF_MPA_FLAGS_CRC;
- mpa_frame->rev = cm_node->mpa_frame_rev;
- mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size);
-}
-
-static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr)
-{
- u64 u64temp;
- struct nes_qp *nesqp = *nesqp_addr;
- struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
-
- u64temp = (unsigned long)nesqp->nesuqp_addr;
- u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
-
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
-
- switch (cm_node->send_rdma0_op) {
- case SEND_RDMA_WRITE_ZERO:
- nes_debug(NES_DBG_CM, "Sending first write.\n");
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
- break;
-
- case SEND_RDMA_READ_ZERO:
- default:
- if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO)
- WARN(1, "Unsupported RDMA0 len operation=%u\n",
- cm_node->send_rdma0_op);
- nes_debug(NES_DBG_CM, "Sending first rdma operation.\n");
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_IWARP_SQ_OP_RDMAR);
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1;
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1;
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1;
- break;
- }
-
- if (nesqp->sq_kmapped) {
- nesqp->sq_kmapped = 0;
- kunmap(nesqp->page);
- }
-
- /*use the reserved spot on the WQ for the extra first WQE*/
- nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
- NES_QPCONTEXT_ORDIRD_WRPDU |
- NES_QPCONTEXT_ORDIRD_ALSMM));
- nesqp->skip_lsmm = 1;
- nesqp->hwqp.sq_tail = 0;
-}
-
-/**
- * schedule_nes_timer
- * note - cm_node needs to be protected before calling this. Encase in:
- * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node);
- */
-int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
- enum nes_timer_type type, int send_retrans,
- int close_when_complete)
-{
- unsigned long flags;
- struct nes_cm_core *cm_core = cm_node->cm_core;
- struct nes_timer_entry *new_send;
- int ret = 0;
-
- new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
- if (!new_send)
- return -ENOMEM;
-
- /* new_send->timetosend = currenttime */
- new_send->retrycount = NES_DEFAULT_RETRYS;
- new_send->retranscount = NES_DEFAULT_RETRANS;
- new_send->skb = skb;
- new_send->timetosend = jiffies;
- new_send->type = type;
- new_send->netdev = cm_node->netdev;
- new_send->send_retrans = send_retrans;
- new_send->close_when_complete = close_when_complete;
-
- if (type == NES_TIMER_TYPE_CLOSE) {
- new_send->timetosend += (HZ / 10);
- if (cm_node->recv_entry) {
- kfree(new_send);
- WARN_ON(1);
- return -EINVAL;
- }
- cm_node->recv_entry = new_send;
- }
-
- if (type == NES_TIMER_TYPE_SEND) {
- new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
- refcount_inc(&new_send->skb->users);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- cm_node->send_entry = new_send;
- add_ref_cm_node(cm_node);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- new_send->timetosend = jiffies + NES_RETRY_TIMEOUT;
-
- ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
- if (ret != NETDEV_TX_OK) {
- nes_debug(NES_DBG_CM, "Error sending packet %p "
- "(jiffies = %lu)\n", new_send, jiffies);
- new_send->timetosend = jiffies;
- ret = NETDEV_TX_OK;
- } else {
- cm_packets_sent++;
- if (!send_retrans) {
- cleanup_retrans_entry(cm_node);
- if (close_when_complete)
- rem_ref_cm_node(cm_core, cm_node);
- return ret;
- }
- }
- }
-
- if (!timer_pending(&cm_core->tcp_timer))
- mod_timer(&cm_core->tcp_timer, new_send->timetosend);
-
- return ret;
-}
-
-static void nes_retrans_expired(struct nes_cm_node *cm_node)
-{
- struct iw_cm_id *cm_id = cm_node->cm_id;
- enum nes_cm_node_state state = cm_node->state;
- cm_node->state = NES_CM_STATE_CLOSED;
-
- switch (state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_CLOSING:
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- break;
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_FIN_WAIT1:
- if (cm_node->cm_id)
- cm_id->rem_ref(cm_id);
- send_reset(cm_node, NULL);
- break;
- default:
- add_ref_cm_node(cm_node);
- send_reset(cm_node, NULL);
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- }
-}
-
-static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node)
-{
- struct nes_timer_entry *recv_entry = cm_node->recv_entry;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct nes_qp *nesqp;
- unsigned long qplockflags;
-
- if (!recv_entry)
- return;
- nesqp = (struct nes_qp *)recv_entry->skb;
- if (nesqp) {
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- if (nesqp->cm_id) {
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
- "refcount = %d: HIT A "
- "NES_TIMER_TYPE_CLOSE with something "
- "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
- atomic_read(&nesqp->refcount));
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
- nesqp->ibqp_state = IB_QPS_ERR;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_cm_disconn(nesqp);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
- "refcount = %d: HIT A "
- "NES_TIMER_TYPE_CLOSE with nothing "
- "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
- atomic_read(&nesqp->refcount));
- }
- } else if (rem_node) {
- /* TIME_WAIT state */
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- }
- if (cm_node->cm_id)
- cm_id->rem_ref(cm_id);
- kfree(recv_entry);
- cm_node->recv_entry = NULL;
-}
-
-/**
- * nes_cm_timer_tick
- */
-static void nes_cm_timer_tick(struct timer_list *unused)
-{
- unsigned long flags;
- unsigned long nexttimeout = jiffies + NES_LONG_TIME;
- struct nes_cm_node *cm_node;
- struct nes_timer_entry *send_entry, *recv_entry;
- struct list_head *list_core_temp;
- struct list_head *list_node;
- struct nes_cm_core *cm_core = g_cm_core;
- u32 settimer = 0;
- unsigned long timetosend;
- int ret = NETDEV_TX_OK;
-
- struct list_head timer_list;
-
- INIT_LIST_HEAD(&timer_list);
- spin_lock_irqsave(&cm_core->ht_lock, flags);
-
- list_for_each_safe(list_node, list_core_temp,
- &cm_core->connected_nodes) {
- cm_node = container_of(list_node, struct nes_cm_node, list);
- if ((cm_node->recv_entry) || (cm_node->send_entry)) {
- add_ref_cm_node(cm_node);
- list_add(&cm_node->timer_entry, &timer_list);
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- list_for_each_safe(list_node, list_core_temp, &timer_list) {
- cm_node = container_of(list_node, struct nes_cm_node,
- timer_entry);
- recv_entry = cm_node->recv_entry;
-
- if (recv_entry) {
- if (time_after(recv_entry->timetosend, jiffies)) {
- if (nexttimeout > recv_entry->timetosend ||
- !settimer) {
- nexttimeout = recv_entry->timetosend;
- settimer = 1;
- }
- } else {
- handle_recv_entry(cm_node, 1);
- }
- }
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- do {
- send_entry = cm_node->send_entry;
- if (!send_entry)
- break;
- if (time_after(send_entry->timetosend, jiffies)) {
- if (cm_node->state != NES_CM_STATE_TSA) {
- if ((nexttimeout >
- send_entry->timetosend) ||
- !settimer) {
- nexttimeout =
- send_entry->timetosend;
- settimer = 1;
- }
- } else {
- free_retrans_entry(cm_node);
- }
- break;
- }
-
- if ((cm_node->state == NES_CM_STATE_TSA) ||
- (cm_node->state == NES_CM_STATE_CLOSED)) {
- free_retrans_entry(cm_node);
- break;
- }
-
- if (!send_entry->retranscount ||
- !send_entry->retrycount) {
- cm_packets_dropped++;
- free_retrans_entry(cm_node);
-
- spin_unlock_irqrestore(
- &cm_node->retrans_list_lock, flags);
- nes_retrans_expired(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- spin_lock_irqsave(&cm_node->retrans_list_lock,
- flags);
- break;
- }
- refcount_inc(&send_entry->skb->users);
- cm_packets_retrans++;
- nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
- "for node %p, jiffies = %lu, time to send = "
- "%lu, retranscount = %u, send_entry->seq_num = "
- "0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
- "0x%08X\n", send_entry, cm_node, jiffies,
- send_entry->timetosend,
- send_entry->retranscount,
- send_entry->seq_num,
- cm_node->tcp_cntxt.rem_ack_num);
-
- spin_unlock_irqrestore(&cm_node->retrans_list_lock,
- flags);
- ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- if (ret != NETDEV_TX_OK) {
- nes_debug(NES_DBG_CM, "rexmit failed for "
- "node=%p\n", cm_node);
- cm_packets_bounced++;
- send_entry->retrycount--;
- nexttimeout = jiffies + NES_SHORT_TIME;
- settimer = 1;
- break;
- } else {
- cm_packets_sent++;
- }
- nes_debug(NES_DBG_CM, "Packet Sent: retrans count = "
- "%u, retry count = %u.\n",
- send_entry->retranscount,
- send_entry->retrycount);
- if (send_entry->send_retrans) {
- send_entry->retranscount--;
- timetosend = (NES_RETRY_TIMEOUT <<
- (NES_DEFAULT_RETRANS - send_entry->retranscount));
-
- send_entry->timetosend = jiffies +
- min(timetosend, NES_MAX_TIMEOUT);
- if (nexttimeout > send_entry->timetosend ||
- !settimer) {
- nexttimeout = send_entry->timetosend;
- settimer = 1;
- }
- } else {
- int close_when_complete;
- close_when_complete =
- send_entry->close_when_complete;
- nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n",
- cm_node, cm_node->state);
- free_retrans_entry(cm_node);
- if (close_when_complete)
- rem_ref_cm_node(cm_node->cm_core,
- cm_node);
- }
- } while (0);
-
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- }
-
- if (settimer) {
- if (!timer_pending(&cm_core->tcp_timer))
- mod_timer(&cm_core->tcp_timer, nexttimeout);
- }
-}
-
-
-/**
- * send_syn
- */
-static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
- struct sk_buff *skb)
-{
- int ret;
- int flags = SET_SYN;
- char optionsbuffer[sizeof(struct option_mss) +
- sizeof(struct option_windowscale) + sizeof(struct option_base) +
- TCP_OPTIONS_PADDING];
-
- int optionssize = 0;
- /* Sending MSS option */
- union all_known_options *options;
-
- if (!cm_node)
- return -EINVAL;
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_mss.optionnum = OPTION_NUMBER_MSS;
- options->as_mss.length = sizeof(struct option_mss);
- options->as_mss.mss = htons(cm_node->tcp_cntxt.mss);
- optionssize += sizeof(struct option_mss);
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE;
- options->as_windowscale.length = sizeof(struct option_windowscale);
- options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
- optionssize += sizeof(struct option_windowscale);
-
- if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) {
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_base.optionnum = OPTION_NUMBER_WRITE0;
- options->as_base.length = sizeof(struct option_base);
- optionssize += sizeof(struct option_base);
- /* we need the size to be a multiple of 4 */
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_end = 1;
- optionssize += 1;
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_end = 1;
- optionssize += 1;
- }
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_end = OPTION_NUMBER_END;
- optionssize += 1;
-
- if (!skb)
- skb = dev_alloc_skb(MAX_CM_BUFFER);
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -1;
- }
-
- if (sendack)
- flags |= SET_ACK;
-
- form_cm_frame(skb, cm_node, optionsbuffer, optionssize, NULL, 0, flags);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
-
- return ret;
-}
-
-
-/**
- * send_reset
- */
-static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret;
- int flags = SET_RST | SET_ACK;
-
- if (!skb)
- skb = dev_alloc_skb(MAX_CM_BUFFER);
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -ENOMEM;
- }
-
- form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1);
-
- return ret;
-}
-
-
-/**
- * send_ack
- */
-static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret;
-
- if (!skb)
- skb = dev_alloc_skb(MAX_CM_BUFFER);
-
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -1;
- }
-
- form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 0);
-
- return ret;
-}
-
-
-/**
- * send_fin
- */
-static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret;
-
- /* if we didn't get a frame get one */
- if (!skb)
- skb = dev_alloc_skb(MAX_CM_BUFFER);
-
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -1;
- }
-
- form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK | SET_FIN);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
-
- return ret;
-}
-
-
-/**
- * find_node - find a cm node that matches the reference cm node
- */
-static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
- u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
-{
- unsigned long flags;
- struct list_head *hte;
- struct nes_cm_node *cm_node;
-
- /* get a handle on the hte */
- hte = &cm_core->connected_nodes;
-
- /* walk list and find cm_node associated with this session ID */
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_entry(cm_node, hte, list) {
- /* compare quad, return node handle if a match */
- nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n",
- cm_node->loc_addr, cm_node->loc_port,
- loc_addr, loc_port,
- cm_node->rem_addr, cm_node->rem_port,
- rem_addr, rem_port);
- if ((cm_node->loc_addr == loc_addr) &&
- (cm_node->loc_port == loc_port) &&
- (cm_node->rem_addr == rem_addr) &&
- (cm_node->rem_port == rem_port)) {
- add_ref_cm_node(cm_node);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- return cm_node;
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- /* no owner node */
- return NULL;
-}
-
-
-/**
- * find_listener - find a cm node listening on this addr-port pair
- */
-static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
- nes_addr_t dst_addr, u16 dst_port,
- enum nes_cm_listener_state listener_state)
-{
- unsigned long flags;
- struct nes_cm_listener *listen_node;
- nes_addr_t listen_addr;
- u16 listen_port;
-
- /* walk list and find cm_node associated with this session ID */
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
- listen_addr = listen_node->loc_addr;
- listen_port = listen_node->loc_port;
-
- /* compare node pair, return node handle if a match */
- if (((listen_addr == dst_addr) ||
- listen_addr == 0x00000000) &&
- (listen_port == dst_port) &&
- (listener_state & listen_node->listener_state)) {
- atomic_inc(&listen_node->ref_count);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- return listen_node;
- }
- }
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
-
- /* no listener */
- return NULL;
-}
-
-/**
- * add_hte_node - add a cm node to the hash table
- */
-static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
-{
- unsigned long flags;
- struct list_head *hte;
-
- if (!cm_node || !cm_core)
- return -EINVAL;
-
- nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
- cm_node);
-
- spin_lock_irqsave(&cm_core->ht_lock, flags);
-
- /* get a handle on the hash table element (list head for this slot) */
- hte = &cm_core->connected_nodes;
- list_add_tail(&cm_node->list, hte);
- atomic_inc(&cm_core->ht_node_cnt);
-
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- return 0;
-}
-
-
-/**
- * mini_cm_dec_refcnt_listen
- */
-static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
- struct nes_cm_listener *listener, int free_hanging_nodes)
-{
- int ret = -EINVAL;
- int err = 0;
- unsigned long flags;
- struct list_head *list_pos = NULL;
- struct list_head *list_temp = NULL;
- struct nes_cm_node *cm_node = NULL;
- struct list_head reset_list;
-
- nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
- "refcnt=%d\n", listener, free_hanging_nodes,
- atomic_read(&listener->ref_count));
- /* free non-accelerated child nodes for this listener */
- INIT_LIST_HEAD(&reset_list);
- if (free_hanging_nodes) {
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_pos, list_temp,
- &g_cm_core->connected_nodes) {
- cm_node = container_of(list_pos, struct nes_cm_node,
- list);
- if ((cm_node->listener == listener) &&
- (!cm_node->accelerated)) {
- add_ref_cm_node(cm_node);
- list_add(&cm_node->reset_entry, &reset_list);
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- }
-
- list_for_each_safe(list_pos, list_temp, &reset_list) {
- cm_node = container_of(list_pos, struct nes_cm_node,
- reset_entry);
- {
- struct nes_cm_node *loopback = cm_node->loopbackpartner;
- enum nes_cm_node_state old_state;
- if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) {
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- } else {
- if (!loopback) {
- cleanup_retrans_entry(cm_node);
- err = send_reset(cm_node, NULL);
- if (err) {
- cm_node->state =
- NES_CM_STATE_CLOSED;
- WARN_ON(1);
- } else {
- old_state = cm_node->state;
- cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
- if (old_state != NES_CM_STATE_MPAREQ_RCVD)
- rem_ref_cm_node(
- cm_node->cm_core,
- cm_node);
- }
- } else {
- struct nes_cm_event event;
-
- event.cm_node = loopback;
- event.cm_info.rem_addr =
- loopback->rem_addr;
- event.cm_info.loc_addr =
- loopback->loc_addr;
- event.cm_info.rem_port =
- loopback->rem_port;
- event.cm_info.loc_port =
- loopback->loc_port;
- event.cm_info.cm_id = loopback->cm_id;
- add_ref_cm_node(loopback);
- loopback->state = NES_CM_STATE_CLOSED;
- cm_event_connect_error(&event);
- cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
-
- rem_ref_cm_node(cm_node->cm_core,
- cm_node);
-
- }
- }
- }
- }
-
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- if (!atomic_dec_return(&listener->ref_count)) {
- list_del(&listener->list);
-
- /* decrement our listen node count */
- atomic_dec(&cm_core->listen_node_cnt);
-
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
-
- if (listener->nesvnic) {
- nes_manage_apbvt(listener->nesvnic,
- listener->loc_port,
- PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_DEL);
-
- nes_debug(NES_DBG_NLMSG,
- "Delete APBVT loc_port = %04X\n",
- listener->loc_port);
- }
-
- nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
-
- kfree(listener);
- listener = NULL;
- ret = 0;
- atomic_inc(&cm_listens_destroyed);
- } else {
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- }
- if (listener) {
- if (atomic_read(&listener->pend_accepts_cnt) > 0)
- nes_debug(NES_DBG_CM, "destroying listener (%p)"
- " with non-zero pending accepts=%u\n",
- listener, atomic_read(&listener->pend_accepts_cnt));
- }
-
- return ret;
-}
-
-
-/**
- * mini_cm_del_listen
- */
-static int mini_cm_del_listen(struct nes_cm_core *cm_core,
- struct nes_cm_listener *listener)
-{
- listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE;
- listener->cm_id = NULL; /* going to be destroyed pretty soon */
- return mini_cm_dec_refcnt_listen(cm_core, listener, 1);
-}
-
-
-/**
- * mini_cm_accelerated
- */
-static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
- struct nes_cm_node *cm_node)
-{
- cm_node->accelerated = true;
-
- if (cm_node->accept_pend) {
- BUG_ON(!cm_node->listener);
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- cm_node->accept_pend = 0;
- BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
- }
-
- if (!timer_pending(&cm_core->tcp_timer))
- mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME));
-
- return 0;
-}
-
-
-/**
- * nes_addr_resolve_neigh
- */
-static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex)
-{
- struct rtable *rt;
- struct neighbour *neigh;
- int rc = arpindex;
- struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
- __be32 dst_ipaddr = htonl(dst_ip);
-
- rt = ip_route_output(&init_net, dst_ipaddr, nesvnic->local_ipaddr, 0, 0);
- if (IS_ERR(rt)) {
- printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n",
- __func__, dst_ip);
- return rc;
- }
-
- neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
-
- rcu_read_lock();
- if (neigh) {
- if (neigh->nud_state & NUD_VALID) {
- nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
- " is %pM, Gateway is 0x%08X \n", dst_ip,
- neigh->ha, ntohl(rt->rt_gw4));
-
- if (arpindex >= 0) {
- if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {
- /* Mac address same as in nes_arp_table */
- goto out;
- }
-
- nes_manage_arp_cache(nesvnic->netdev,
- nesadapter->arp_table[arpindex].mac_addr,
- dst_ip, NES_ARP_DELETE);
- }
-
- nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
- dst_ip, NES_ARP_ADD);
- rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
- NES_ARP_RESOLVE);
- } else {
- neigh_event_send(neigh, NULL);
- }
- }
-out:
- rcu_read_unlock();
-
- if (neigh)
- neigh_release(neigh);
-
- ip_rt_put(rt);
- return rc;
-}
-
-/**
- * make_cm_node - create a new instance of a cm node
- */
-static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
- struct nes_cm_listener *listener)
-{
- struct nes_cm_node *cm_node;
- int oldarpindex = 0;
- int arpindex = 0;
- struct nes_device *nesdev;
- struct nes_adapter *nesadapter;
-
- /* create an hte and cm_node for this instance */
- cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
- if (!cm_node)
- return NULL;
-
- /* set our node specific transport info */
- if (listener) {
- cm_node->loc_addr = listener->loc_addr;
- cm_node->loc_port = listener->loc_port;
- } else {
- cm_node->loc_addr = cm_info->loc_addr;
- cm_node->loc_port = cm_info->loc_port;
- }
- cm_node->rem_addr = cm_info->rem_addr;
- cm_node->rem_port = cm_info->rem_port;
-
- cm_node->mpa_frame_rev = mpa_version;
- cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
- cm_node->mpav2_ird_ord = 0;
- cm_node->ird_size = 0;
- cm_node->ord_size = 0;
-
- nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
- &cm_node->loc_addr, cm_node->loc_port,
- &cm_node->rem_addr, cm_node->rem_port);
- cm_node->listener = listener;
- if (listener)
- cm_node->tos = listener->tos;
- cm_node->netdev = nesvnic->netdev;
- cm_node->cm_id = cm_info->cm_id;
- memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN);
-
- nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener,
- cm_node->cm_id);
-
- spin_lock_init(&cm_node->retrans_list_lock);
-
- cm_node->loopbackpartner = NULL;
- atomic_set(&cm_node->ref_count, 1);
- /* associate our parent CM core */
- cm_node->cm_core = cm_core;
- cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID;
- cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
- cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
- NES_CM_DEFAULT_RCV_WND_SCALE;
- cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr),
- htonl(cm_node->rem_addr),
- htons(cm_node->loc_port),
- htons(cm_node->rem_port));
- cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
- sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
- cm_node->tcp_cntxt.rcv_nxt = 0;
- /* get a unique session ID , add thread_id to an upcounter to handle race */
- atomic_inc(&cm_core->node_cnt);
- cm_node->conn_type = cm_info->conn_type;
- cm_node->apbvt_set = 0;
- cm_node->accept_pend = 0;
-
- cm_node->nesvnic = nesvnic;
- /* get some device handles, for arp lookup */
- nesdev = nesvnic->nesdev;
- nesadapter = nesdev->nesadapter;
-
- cm_node->loopbackpartner = NULL;
-
- /* get the mac addr for the remote node */
- oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr,
- NULL, NES_ARP_RESOLVE);
- arpindex = nes_addr_resolve_neigh(nesvnic, cm_node->rem_addr,
- oldarpindex);
- if (arpindex < 0) {
- kfree(cm_node);
- return NULL;
- }
-
- /* copy the mac addr to node context */
- memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN);
- nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %pM\n",
- cm_node->rem_mac);
-
- add_hte_node(cm_core, cm_node);
- atomic_inc(&cm_nodes_created);
-
- return cm_node;
-}
-
-
-/**
- * add_ref_cm_node - destroy an instance of a cm node
- */
-static int add_ref_cm_node(struct nes_cm_node *cm_node)
-{
- atomic_inc(&cm_node->ref_count);
- return 0;
-}
-
-
-/**
- * rem_ref_cm_node - destroy an instance of a cm node
- */
-static int rem_ref_cm_node(struct nes_cm_core *cm_core,
- struct nes_cm_node *cm_node)
-{
- unsigned long flags;
- struct nes_qp *nesqp;
-
- if (!cm_node)
- return -EINVAL;
-
- spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
- if (atomic_dec_return(&cm_node->ref_count)) {
- spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
- return 0;
- }
- list_del(&cm_node->list);
- atomic_dec(&cm_core->ht_node_cnt);
- spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
-
- /* if the node is destroyed before connection was accelerated */
- if (!cm_node->accelerated && cm_node->accept_pend) {
- BUG_ON(!cm_node->listener);
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
- }
- WARN_ON(cm_node->send_entry);
- if (cm_node->recv_entry)
- handle_recv_entry(cm_node, 0);
- if (cm_node->listener) {
- mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
- } else {
- if (cm_node->apbvt_set && cm_node->nesvnic) {
- nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
- PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_DEL);
- }
- nes_debug(NES_DBG_NLMSG, "Delete APBVT loc_port = %04X\n",
- cm_node->loc_port);
- }
-
- atomic_dec(&cm_core->node_cnt);
- atomic_inc(&cm_nodes_destroyed);
- nesqp = cm_node->nesqp;
- if (nesqp) {
- nesqp->cm_node = NULL;
- nes_rem_ref(&nesqp->ibqp);
- cm_node->nesqp = NULL;
- }
-
- kfree(cm_node);
- return 0;
-}
-
-/**
- * process_options
- */
-static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
- u32 optionsize, u32 syn_packet)
-{
- u32 tmp;
- u32 offset = 0;
- union all_known_options *all_options;
- char got_mss_option = 0;
-
- while (offset < optionsize) {
- all_options = (union all_known_options *)(optionsloc + offset);
- switch (all_options->as_base.optionnum) {
- case OPTION_NUMBER_END:
- offset = optionsize;
- break;
- case OPTION_NUMBER_NONE:
- offset += 1;
- continue;
- case OPTION_NUMBER_MSS:
- nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d "
- "Size: %d\n", __func__,
- all_options->as_mss.length, offset, optionsize);
- got_mss_option = 1;
- if (all_options->as_mss.length != 4) {
- return 1;
- } else {
- tmp = ntohs(all_options->as_mss.mss);
- if (tmp > 0 && tmp <
- cm_node->tcp_cntxt.mss)
- cm_node->tcp_cntxt.mss = tmp;
- }
- break;
- case OPTION_NUMBER_WINDOW_SCALE:
- cm_node->tcp_cntxt.snd_wscale =
- all_options->as_windowscale.shiftcount;
- break;
- default:
- nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
- all_options->as_base.optionnum);
- break;
- }
- offset += all_options->as_base.length;
- }
- if ((!got_mss_option) && (syn_packet))
- cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS;
- return 0;
-}
-
-static void drop_packet(struct sk_buff *skb)
-{
- atomic_inc(&cm_accel_dropped_pkts);
- dev_kfree_skb_any(skb);
-}
-
-static void handle_fin_pkt(struct nes_cm_node *cm_node)
-{
- nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
- "refcnt=%d\n", cm_node, cm_node->state,
- atomic_read(&cm_node->ref_count));
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_MPAREJ_RCVD:
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_LAST_ACK;
- send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_MPAREQ_SENT:
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- add_ref_cm_node(cm_node);
- send_reset(cm_node, NULL);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSING;
- send_ack(cm_node, NULL);
- /* Wait for ACK as this is simultaneous close..
- * After we receive ACK, do not send anything..
- * Just rm the node.. Done.. */
- break;
- case NES_CM_STATE_FIN_WAIT2:
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_TIME_WAIT;
- send_ack(cm_node, NULL);
- schedule_nes_timer(cm_node, NULL, NES_TIMER_TYPE_CLOSE, 1, 0);
- break;
- case NES_CM_STATE_TIME_WAIT:
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- break;
- case NES_CM_STATE_TSA:
- default:
- nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n",
- cm_node, cm_node->state);
- break;
- }
-}
-
-
-static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct tcphdr *tcph)
-{
-
- int reset = 0; /* whether to send reset in case of err.. */
- atomic_inc(&cm_resets_recvd);
- nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u."
- " refcnt=%d\n", cm_node, cm_node->state,
- atomic_read(&cm_node->ref_count));
- cleanup_retrans_entry(cm_node);
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_MPAREQ_SENT:
- nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
- "listener=%p state=%d\n", __func__, __LINE__, cm_node,
- cm_node->listener, cm_node->state);
- switch (cm_node->mpa_frame_rev) {
- case IETF_MPA_V2:
- cm_node->mpa_frame_rev = IETF_MPA_V1;
- /* send a syn and goto syn sent state */
- cm_node->state = NES_CM_STATE_SYN_SENT;
- if (send_syn(cm_node, 0, NULL)) {
- active_open_err(cm_node, skb, reset);
- }
- break;
- case IETF_MPA_V1:
- default:
- active_open_err(cm_node, skb, reset);
- break;
- }
- break;
- case NES_CM_STATE_MPAREQ_RCVD:
- atomic_inc(&cm_node->passive_state);
- dev_kfree_skb_any(skb);
- break;
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_LISTENING:
- nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
- passive_open_err(cm_node, skb, reset);
- break;
- case NES_CM_STATE_TSA:
- active_open_err(cm_node, skb, reset);
- break;
- case NES_CM_STATE_CLOSED:
- drop_packet(skb);
- break;
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_LAST_ACK:
- cm_node->cm_id->rem_ref(cm_node->cm_id);
- /* fall through */
- case NES_CM_STATE_TIME_WAIT:
- cm_node->state = NES_CM_STATE_CLOSED;
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- drop_packet(skb);
- break;
- default:
- drop_packet(skb);
- break;
- }
-}
-
-
-static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret = 0;
- int datasize = skb->len;
- u8 *dataloc = skb->data;
-
- enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN;
- u32 res_type;
-
- ret = parse_mpa(cm_node, dataloc, &res_type, datasize);
- if (ret) {
- nes_debug(NES_DBG_CM, "didn't like MPA Request\n");
- if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) {
- nes_debug(NES_DBG_CM, "%s[%u] create abort for "
- "cm_node=%p listener=%p state=%d\n", __func__,
- __LINE__, cm_node, cm_node->listener,
- cm_node->state);
- active_open_err(cm_node, skb, 1);
- } else {
- passive_open_err(cm_node, skb, 1);
- }
- return;
- }
-
- switch (cm_node->state) {
- case NES_CM_STATE_ESTABLISHED:
- if (res_type == NES_MPA_REQUEST_REJECT)
- /*BIG problem as we are receiving the MPA.. So should
- * not be REJECT.. This is Passive Open.. We can
- * only receive it Reject for Active Open...*/
- WARN_ON(1);
- cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
- type = NES_CM_EVENT_MPA_REQ;
- atomic_set(&cm_node->passive_state,
- NES_PASSIVE_STATE_INDICATED);
- break;
- case NES_CM_STATE_MPAREQ_SENT:
- cleanup_retrans_entry(cm_node);
- if (res_type == NES_MPA_REQUEST_REJECT) {
- type = NES_CM_EVENT_MPA_REJECT;
- cm_node->state = NES_CM_STATE_MPAREJ_RCVD;
- } else {
- type = NES_CM_EVENT_CONNECTED;
- cm_node->state = NES_CM_STATE_TSA;
- }
- send_ack(cm_node, NULL);
- break;
- default:
- WARN_ON(1);
- break;
- }
- dev_kfree_skb_any(skb);
- create_event(cm_node, type);
-}
-
-static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_MPAREQ_SENT:
- nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
- "listener=%p state=%d\n", __func__, __LINE__, cm_node,
- cm_node->listener, cm_node->state);
- active_open_err(cm_node, skb, 1);
- break;
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_SYN_RCVD:
- passive_open_err(cm_node, skb, 1);
- break;
- case NES_CM_STATE_TSA:
- default:
- drop_packet(skb);
- }
-}
-
-static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
- struct sk_buff *skb)
-{
- int err;
-
- err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1;
- if (err)
- active_open_err(cm_node, skb, 1);
-
- return err;
-}
-
-static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
- struct sk_buff *skb)
-{
- int err = 0;
- u32 seq;
- u32 ack_seq;
- u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
- u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
- u32 rcv_wnd;
-
- seq = ntohl(tcph->seq);
- ack_seq = ntohl(tcph->ack_seq);
- rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
- if (ack_seq != loc_seq_num)
- err = 1;
- else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
- err = 1;
- if (err) {
- nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
- "listener=%p state=%d\n", __func__, __LINE__, cm_node,
- cm_node->listener, cm_node->state);
- indicate_pkt_err(cm_node, skb);
- nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X "
- "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
- rcv_wnd);
- }
- return err;
-}
-
-/*
- * handle_syn_pkt() is for Passive node. The syn packet is received when a node
- * is created with a listener or it may comein as rexmitted packet which in
- * that case will be just dropped.
- */
-static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct tcphdr *tcph)
-{
- int ret;
- u32 inc_sequence;
- int optionsize;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
- skb_trim(skb, 0);
- inc_sequence = ntohl(tcph->seq);
-
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_MPAREQ_SENT:
- /* Rcvd syn on active open connection*/
- active_open_err(cm_node, skb, 1);
- break;
- case NES_CM_STATE_LISTENING:
- /* Passive OPEN */
- if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
- cm_node->listener->backlog) {
- nes_debug(NES_DBG_CM, "drop syn due to backlog "
- "pressure \n");
- cm_backlog_drops++;
- passive_open_err(cm_node, skb, 0);
- break;
- }
- ret = handle_tcp_options(cm_node, tcph, skb, optionsize,
- 1);
- if (ret) {
- passive_open_err(cm_node, skb, 0);
- /* drop pkt */
- break;
- }
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
- BUG_ON(cm_node->send_entry);
- cm_node->accept_pend = 1;
- atomic_inc(&cm_node->listener->pend_accepts_cnt);
-
- cm_node->state = NES_CM_STATE_SYN_RCVD;
- send_syn(cm_node, 1, skb);
- break;
- case NES_CM_STATE_CLOSED:
- cleanup_retrans_entry(cm_node);
- add_ref_cm_node(cm_node);
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_TSA:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_MPAREQ_RCVD:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_CLOSING:
- case NES_CM_STATE_UNKNOWN:
- default:
- drop_packet(skb);
- break;
- }
-}
-
-static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct tcphdr *tcph)
-{
- int ret;
- u32 inc_sequence;
- int optionsize;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
- skb_trim(skb, 0);
- inc_sequence = ntohl(tcph->seq);
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_SENT:
- cleanup_retrans_entry(cm_node);
- /* active open */
- if (check_syn(cm_node, tcph, skb))
- return;
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- /* setup options */
- ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0);
- if (ret) {
- nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n",
- cm_node);
- break;
- }
- cleanup_retrans_entry(cm_node);
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
- send_mpa_request(cm_node, skb);
- cm_node->state = NES_CM_STATE_MPAREQ_SENT;
- break;
- case NES_CM_STATE_MPAREQ_RCVD:
- /* passive open, so should not be here */
- passive_open_err(cm_node, skb, 1);
- break;
- case NES_CM_STATE_LISTENING:
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_CLOSED:
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- cleanup_retrans_entry(cm_node);
- add_ref_cm_node(cm_node);
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_TSA:
- case NES_CM_STATE_CLOSING:
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_MPAREQ_SENT:
- default:
- drop_packet(skb);
- break;
- }
-}
-
-static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct tcphdr *tcph)
-{
- int datasize = 0;
- u32 inc_sequence;
- int ret = 0;
- int optionsize;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
-
- if (check_seq(cm_node, tcph, skb))
- return -EINVAL;
-
- skb_pull(skb, tcph->doff << 2);
- inc_sequence = ntohl(tcph->seq);
- datasize = skb->len;
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- /* Passive OPEN */
- cleanup_retrans_entry(cm_node);
- ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
- if (ret)
- break;
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- cm_node->state = NES_CM_STATE_ESTABLISHED;
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- handle_rcv_mpa(cm_node, skb);
- } else { /* rcvd ACK only */
- dev_kfree_skb_any(skb);
- }
- break;
- case NES_CM_STATE_ESTABLISHED:
- /* Passive OPEN */
- cleanup_retrans_entry(cm_node);
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- handle_rcv_mpa(cm_node, skb);
- } else {
- drop_packet(skb);
- }
- break;
- case NES_CM_STATE_MPAREQ_SENT:
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- handle_rcv_mpa(cm_node, skb);
- } else { /* Could be just an ack pkt.. */
- dev_kfree_skb_any(skb);
- }
- break;
- case NES_CM_STATE_LISTENING:
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_CLOSED:
- cleanup_retrans_entry(cm_node);
- add_ref_cm_node(cm_node);
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_CLOSING:
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- cm_node->cm_id->rem_ref(cm_node->cm_id);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- drop_packet(skb);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- cleanup_retrans_entry(cm_node);
- drop_packet(skb);
- cm_node->state = NES_CM_STATE_FIN_WAIT2;
- break;
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_TSA:
- case NES_CM_STATE_MPAREQ_RCVD:
- case NES_CM_STATE_UNKNOWN:
- default:
- cleanup_retrans_entry(cm_node);
- drop_packet(skb);
- break;
- }
- return ret;
-}
-
-
-
-static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
- struct sk_buff *skb, int optionsize, int passive)
-{
- u8 *optionsloc = (u8 *)&tcph[1];
-
- if (optionsize) {
- if (process_options(cm_node, optionsloc, optionsize,
- (u32)tcph->syn)) {
- nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
- __func__, cm_node);
- if (passive)
- passive_open_err(cm_node, skb, 1);
- else
- active_open_err(cm_node, skb, 1);
- return 1;
- }
- }
-
- cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
- cm_node->tcp_cntxt.snd_wscale;
-
- if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
- cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
- return 0;
-}
-
-/*
- * active_open_err() will send reset() if flag set..
- * It will also send ABORT event.
- */
-static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
- int reset)
-{
- cleanup_retrans_entry(cm_node);
- if (reset) {
- nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, "
- "state=%d\n", cm_node, cm_node->state);
- add_ref_cm_node(cm_node);
- send_reset(cm_node, skb);
- } else {
- dev_kfree_skb_any(skb);
- }
-
- cm_node->state = NES_CM_STATE_CLOSED;
- create_event(cm_node, NES_CM_EVENT_ABORTED);
-}
-
-/*
- * passive_open_err() will either do a reset() or will free up the skb and
- * remove the cm_node.
- */
-static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
- int reset)
-{
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- if (reset) {
- nes_debug(NES_DBG_CM, "passive_open_err sending RST for "
- "cm_node=%p state =%d\n", cm_node, cm_node->state);
- send_reset(cm_node, skb);
- } else {
- dev_kfree_skb_any(skb);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- }
-}
-
-/*
- * free_retrans_entry() routines assumes that the retrans_list_lock has
- * been acquired before calling.
- */
-static void free_retrans_entry(struct nes_cm_node *cm_node)
-{
- struct nes_timer_entry *send_entry;
-
- send_entry = cm_node->send_entry;
- if (send_entry) {
- cm_node->send_entry = NULL;
- dev_kfree_skb_any(send_entry->skb);
- kfree(send_entry);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- }
-}
-
-static void cleanup_retrans_entry(struct nes_cm_node *cm_node)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- free_retrans_entry(cm_node);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-}
-
-/**
- * process_packet
- * Returns skb if to be freed, else it will return NULL if already used..
- */
-static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct nes_cm_core *cm_core)
-{
- enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
- struct tcphdr *tcph = tcp_hdr(skb);
- u32 fin_set = 0;
- int ret = 0;
-
- skb_pull(skb, ip_hdr(skb)->ihl << 2);
-
- nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
- "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
- tcph->ack, tcph->rst, tcph->fin);
-
- if (tcph->rst) {
- pkt_type = NES_PKT_TYPE_RST;
- } else if (tcph->syn) {
- pkt_type = NES_PKT_TYPE_SYN;
- if (tcph->ack)
- pkt_type = NES_PKT_TYPE_SYNACK;
- } else if (tcph->ack) {
- pkt_type = NES_PKT_TYPE_ACK;
- }
- if (tcph->fin)
- fin_set = 1;
-
- switch (pkt_type) {
- case NES_PKT_TYPE_SYN:
- handle_syn_pkt(cm_node, skb, tcph);
- break;
- case NES_PKT_TYPE_SYNACK:
- handle_synack_pkt(cm_node, skb, tcph);
- break;
- case NES_PKT_TYPE_ACK:
- ret = handle_ack_pkt(cm_node, skb, tcph);
- if (fin_set && !ret)
- handle_fin_pkt(cm_node);
- break;
- case NES_PKT_TYPE_RST:
- handle_rst_pkt(cm_node, skb, tcph);
- break;
- default:
- if ((fin_set) && (!check_seq(cm_node, tcph, skb)))
- handle_fin_pkt(cm_node);
- drop_packet(skb);
- break;
- }
-}
-
-/**
- * mini_cm_listen - create a listen node with params
- */
-static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
-{
- struct nes_cm_listener *listener;
- unsigned long flags;
-
- nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
- cm_info->loc_addr, cm_info->loc_port);
-
- /* cannot have multiple matching listeners */
- listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
- NES_CM_LISTENER_EITHER_STATE);
-
- if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
- /* find automatically incs ref count ??? */
- atomic_dec(&listener->ref_count);
- nes_debug(NES_DBG_CM, "Not creating listener since it already exists\n");
- return NULL;
- }
-
- if (!listener) {
- /* create a CM listen node (1/2 node to compare incoming traffic to) */
- listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
- if (!listener)
- return NULL;
-
- listener->loc_addr = cm_info->loc_addr;
- listener->loc_port = cm_info->loc_port;
- listener->reused_node = 0;
-
- atomic_set(&listener->ref_count, 1);
- }
- /* pasive case */
- /* find already inc'ed the ref count */
- else {
- listener->reused_node = 1;
- }
-
- listener->cm_id = cm_info->cm_id;
- atomic_set(&listener->pend_accepts_cnt, 0);
- listener->cm_core = cm_core;
- listener->nesvnic = nesvnic;
- atomic_inc(&cm_core->node_cnt);
-
- listener->conn_type = cm_info->conn_type;
- listener->backlog = cm_info->backlog;
- listener->listener_state = NES_CM_LISTENER_ACTIVE_STATE;
-
- if (!listener->reused_node) {
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_add(&listener->list, &cm_core->listen_list.list);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- atomic_inc(&cm_core->listen_node_cnt);
- }
-
- nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x,"
- " listener = %p, backlog = %d, cm_id = %p.\n",
- cm_info->loc_addr, cm_info->loc_port,
- listener, listener->backlog, listener->cm_id);
-
- return listener;
-}
-
-
-/**
- * mini_cm_connect - make a connection node with params
- */
-static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, u16 private_data_len,
- void *private_data, struct nes_cm_info *cm_info)
-{
- int ret = 0;
- struct nes_cm_node *cm_node;
- struct nes_cm_listener *loopbackremotelistener;
- struct nes_cm_node *loopbackremotenode;
- struct nes_cm_info loopback_cm_info;
- u8 *start_buff;
-
- /* create a CM connection node */
- cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL);
- if (!cm_node)
- return NULL;
-
- /* set our node side to client (active) side */
- cm_node->tcp_cntxt.client = 1;
- cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
-
- if (cm_info->loc_addr == cm_info->rem_addr) {
- loopbackremotelistener = find_listener(cm_core,
- cm_node->loc_addr, cm_node->rem_port,
- NES_CM_LISTENER_ACTIVE_STATE);
- if (loopbackremotelistener == NULL) {
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- } else {
- loopback_cm_info = *cm_info;
- loopback_cm_info.loc_port = cm_info->rem_port;
- loopback_cm_info.rem_port = cm_info->loc_port;
- loopback_cm_info.loc_port =
- cm_info->rem_port;
- loopback_cm_info.rem_port =
- cm_info->loc_port;
- loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
- loopbackremotenode = make_cm_node(cm_core, nesvnic,
- &loopback_cm_info, loopbackremotelistener);
- if (!loopbackremotenode) {
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- return NULL;
- }
- atomic_inc(&cm_loopbacks);
- loopbackremotenode->loopbackpartner = cm_node;
- loopbackremotenode->tcp_cntxt.rcv_wscale =
- NES_CM_DEFAULT_RCV_WND_SCALE;
- cm_node->loopbackpartner = loopbackremotenode;
- memcpy(loopbackremotenode->mpa_frame_buf, private_data,
- private_data_len);
- loopbackremotenode->mpa_frame_size = private_data_len;
-
- /* we are done handling this state. */
- /* set node to a TSA state */
- cm_node->state = NES_CM_STATE_TSA;
- cm_node->tcp_cntxt.rcv_nxt =
- loopbackremotenode->tcp_cntxt.loc_seq_num;
- loopbackremotenode->tcp_cntxt.rcv_nxt =
- cm_node->tcp_cntxt.loc_seq_num;
- cm_node->tcp_cntxt.max_snd_wnd =
- loopbackremotenode->tcp_cntxt.rcv_wnd;
- loopbackremotenode->tcp_cntxt.max_snd_wnd =
- cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wnd =
- loopbackremotenode->tcp_cntxt.rcv_wnd;
- loopbackremotenode->tcp_cntxt.snd_wnd =
- cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wscale =
- loopbackremotenode->tcp_cntxt.rcv_wscale;
- loopbackremotenode->tcp_cntxt.snd_wscale =
- cm_node->tcp_cntxt.rcv_wscale;
- loopbackremotenode->state = NES_CM_STATE_MPAREQ_RCVD;
- create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ);
- }
- return cm_node;
- }
-
- start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
- cm_node->mpa_frame_size = private_data_len;
-
- memcpy(start_buff, private_data, private_data_len);
-
- /* send a syn and goto syn sent state */
- cm_node->state = NES_CM_STATE_SYN_SENT;
- ret = send_syn(cm_node, 0, NULL);
-
- if (ret) {
- /* error in sending the syn free up the cm_node struct */
- nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest "
- "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
- cm_node->rem_addr, cm_node->rem_port, cm_node,
- cm_node->cm_id);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- cm_node = NULL;
- }
-
- if (cm_node) {
- nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X,"
- "port=0x%04x, cm_node=%p, cm_id = %p.\n",
- cm_node->rem_addr, cm_node->rem_port, cm_node,
- cm_node->cm_id);
- }
-
- return cm_node;
-}
-
-
-/**
- * mini_cm_accept - accept a connection
- * This function is never called
- */
-static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
-{
- return 0;
-}
-
-
-/**
- * mini_cm_reject - reject and teardown a connection
- */
-static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
-{
- int ret = 0;
- int err = 0;
- int passive_state;
- struct nes_cm_event event;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct nes_cm_node *loopback = cm_node->loopbackpartner;
-
- nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
- __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
-
- if (cm_node->tcp_cntxt.client)
- return ret;
- cleanup_retrans_entry(cm_node);
-
- if (!loopback) {
- passive_state = atomic_add_return(1, &cm_node->passive_state);
- if (passive_state == NES_SEND_RESET_EVENT) {
- cm_node->state = NES_CM_STATE_CLOSED;
- rem_ref_cm_node(cm_core, cm_node);
- } else {
- if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
- rem_ref_cm_node(cm_core, cm_node);
- } else {
- ret = send_mpa_reject(cm_node);
- if (ret) {
- cm_node->state = NES_CM_STATE_CLOSED;
- err = send_reset(cm_node, NULL);
- if (err)
- WARN_ON(1);
- } else {
- cm_id->add_ref(cm_id);
- }
- }
- }
- } else {
- cm_node->cm_id = NULL;
- if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
- rem_ref_cm_node(cm_core, cm_node);
- rem_ref_cm_node(cm_core, loopback);
- } else {
- event.cm_node = loopback;
- event.cm_info.rem_addr = loopback->rem_addr;
- event.cm_info.loc_addr = loopback->loc_addr;
- event.cm_info.rem_port = loopback->rem_port;
- event.cm_info.loc_port = loopback->loc_port;
- event.cm_info.cm_id = loopback->cm_id;
- cm_event_mpa_reject(&event);
- rem_ref_cm_node(cm_core, cm_node);
- loopback->state = NES_CM_STATE_CLOSING;
-
- cm_id = loopback->cm_id;
- rem_ref_cm_node(cm_core, loopback);
- cm_id->rem_ref(cm_id);
- }
- }
-
- return ret;
-}
-
-
-/**
- * mini_cm_close
- */
-static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
-{
- int ret = 0;
-
- if (!cm_core || !cm_node)
- return -EINVAL;
-
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_ACCEPTING:
- case NES_CM_STATE_MPAREQ_SENT:
- case NES_CM_STATE_MPAREQ_RCVD:
- cleanup_retrans_entry(cm_node);
- send_reset(cm_node, NULL);
- break;
- case NES_CM_STATE_CLOSE_WAIT:
- cm_node->state = NES_CM_STATE_LAST_ACK;
- send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_TIME_WAIT:
- case NES_CM_STATE_CLOSING:
- ret = -1;
- break;
- case NES_CM_STATE_LISTENING:
- cleanup_retrans_entry(cm_node);
- send_reset(cm_node, NULL);
- break;
- case NES_CM_STATE_MPAREJ_RCVD:
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_INITED:
- case NES_CM_STATE_CLOSED:
- case NES_CM_STATE_LISTENER_DESTROYED:
- ret = rem_ref_cm_node(cm_core, cm_node);
- break;
- case NES_CM_STATE_TSA:
- if (cm_node->send_entry)
- printk(KERN_ERR "ERROR Close got called from STATE_TSA "
- "send_entry=%p\n", cm_node->send_entry);
- ret = rem_ref_cm_node(cm_core, cm_node);
- break;
- }
- return ret;
-}
-
-
-/**
- * recv_pkt - recv an ETHERNET packet, and process it through CM
- * node state machine
- */
-static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct sk_buff *skb)
-{
- struct nes_cm_node *cm_node = NULL;
- struct nes_cm_listener *listener = NULL;
- struct iphdr *iph;
- struct tcphdr *tcph;
- struct nes_cm_info nfo;
- int skb_handled = 1;
- __be32 tmp_daddr, tmp_saddr;
-
- if (!skb)
- return 0;
- if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr))
- return 0;
-
- iph = (struct iphdr *)skb->data;
- tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
-
- nfo.loc_addr = ntohl(iph->daddr);
- nfo.loc_port = ntohs(tcph->dest);
- nfo.rem_addr = ntohl(iph->saddr);
- nfo.rem_port = ntohs(tcph->source);
-
- tmp_daddr = cpu_to_be32(iph->daddr);
- tmp_saddr = cpu_to_be32(iph->saddr);
-
- nes_debug(NES_DBG_CM, "Received packet: dest=%pI4:0x%04X src=%pI4:0x%04X\n",
- &tmp_daddr, tcph->dest, &tmp_saddr, tcph->source);
-
- do {
- cm_node = find_node(cm_core,
- nfo.rem_port, nfo.rem_addr,
- nfo.loc_port, nfo.loc_addr);
-
- if (!cm_node) {
- /* Only type of packet accepted are for */
- /* the PASSIVE open (syn only) */
- if ((!tcph->syn) || (tcph->ack)) {
- skb_handled = 0;
- break;
- }
- listener = find_listener(cm_core, nfo.loc_addr,
- nfo.loc_port,
- NES_CM_LISTENER_ACTIVE_STATE);
- if (!listener) {
- nfo.cm_id = NULL;
- nfo.conn_type = 0;
- nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n");
- skb_handled = 0;
- break;
- }
- nfo.cm_id = listener->cm_id;
- nfo.conn_type = listener->conn_type;
- cm_node = make_cm_node(cm_core, nesvnic, &nfo,
- listener);
- if (!cm_node) {
- nes_debug(NES_DBG_CM, "Unable to allocate "
- "node\n");
- cm_packets_dropped++;
- atomic_dec(&listener->ref_count);
- dev_kfree_skb_any(skb);
- break;
- }
- if (!tcph->rst && !tcph->fin) {
- cm_node->state = NES_CM_STATE_LISTENING;
- } else {
- cm_packets_dropped++;
- rem_ref_cm_node(cm_core, cm_node);
- dev_kfree_skb_any(skb);
- break;
- }
- add_ref_cm_node(cm_node);
- } else if (cm_node->state == NES_CM_STATE_TSA) {
- if (cm_node->nesqp->pau_mode)
- nes_queue_mgt_skbs(skb, nesvnic, cm_node->nesqp);
- else {
- rem_ref_cm_node(cm_core, cm_node);
- atomic_inc(&cm_accel_dropped_pkts);
- dev_kfree_skb_any(skb);
- }
- break;
- }
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, sizeof(*tcph));
- skb->len = ntohs(iph->tot_len);
- process_packet(cm_node, skb, cm_core);
- rem_ref_cm_node(cm_core, cm_node);
- } while (0);
- return skb_handled;
-}
-
-
-/**
- * nes_cm_alloc_core - allocate a top level instance of a cm core
- */
-static struct nes_cm_core *nes_cm_alloc_core(void)
-{
- struct nes_cm_core *cm_core;
-
- /* setup the CM core */
- /* alloc top level core control structure */
- cm_core = kzalloc(sizeof(*cm_core), GFP_KERNEL);
- if (!cm_core)
- return NULL;
-
- INIT_LIST_HEAD(&cm_core->connected_nodes);
- timer_setup(&cm_core->tcp_timer, nes_cm_timer_tick, 0);
-
- cm_core->mtu = NES_CM_DEFAULT_MTU;
- cm_core->state = NES_CM_STATE_INITED;
- cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS;
-
- atomic_set(&cm_core->events_posted, 0);
-
- cm_core->api = &nes_cm_api;
-
- spin_lock_init(&cm_core->ht_lock);
- spin_lock_init(&cm_core->listen_list_lock);
-
- INIT_LIST_HEAD(&cm_core->listen_list.list);
-
- nes_debug(NES_DBG_CM, "Init CM Core completed -- cm_core=%p\n", cm_core);
-
- nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n");
- cm_core->event_wq = alloc_ordered_workqueue("nesewq", 0);
- if (!cm_core->event_wq)
- goto out_free_cmcore;
- cm_core->post_event = nes_cm_post_event;
- nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n");
- cm_core->disconn_wq = alloc_ordered_workqueue("nesdwq", 0);
- if (!cm_core->disconn_wq)
- goto out_free_wq;
-
- print_core(cm_core);
- return cm_core;
-
-out_free_wq:
- destroy_workqueue(cm_core->event_wq);
-out_free_cmcore:
- kfree(cm_core);
- return NULL;
-}
-
-
-/**
- * mini_cm_dealloc_core - deallocate a top level instance of a cm core
- */
-static int mini_cm_dealloc_core(struct nes_cm_core *cm_core)
-{
- nes_debug(NES_DBG_CM, "De-Alloc CM Core (%p)\n", cm_core);
-
- if (!cm_core)
- return -EINVAL;
-
- barrier();
-
- if (timer_pending(&cm_core->tcp_timer))
- del_timer(&cm_core->tcp_timer);
-
- destroy_workqueue(cm_core->event_wq);
- destroy_workqueue(cm_core->disconn_wq);
- nes_debug(NES_DBG_CM, "\n");
- kfree(cm_core);
-
- return 0;
-}
-
-
-/**
- * mini_cm_get
- */
-static int mini_cm_get(struct nes_cm_core *cm_core)
-{
- return cm_core->state;
-}
-
-
-/**
- * mini_cm_set
- */
-static int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value)
-{
- int ret = 0;
-
- switch (type) {
- case NES_CM_SET_PKT_SIZE:
- cm_core->mtu = value;
- break;
- case NES_CM_SET_FREE_PKT_Q_SIZE:
- cm_core->free_tx_pkt_max = value;
- break;
- default:
- /* unknown set option */
- ret = -EINVAL;
- }
-
- return ret;
-}
-
-
-/**
- * nes_cm_init_tsa_conn setup HW; MPA frames must be
- * successfully exchanged when this is called
- */
-static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_node)
-{
- int ret = 0;
-
- if (!nesqp)
- return -EINVAL;
-
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 |
- NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
- NES_QPCONTEXT_MISC_DROS);
-
- if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale)
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE);
-
- nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT);
-
- nesqp->nesqp_context->misc2 |= cpu_to_le32(
- cm_node->tos << NES_QPCONTEXT_MISC2_TOS_SHIFT);
-
- nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16);
-
- nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32(
- (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
-
- nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
- (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
- NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
-
- nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
- (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
- NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
-
- nesqp->nesqp_context->keepalive = cpu_to_le32(0x80);
- nesqp->nesqp_context->ts_recent = 0;
- nesqp->nesqp_context->ts_age = 0;
- nesqp->nesqp_context->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
- nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
- nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
- cm_node->tcp_cntxt.rcv_wscale);
- nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- nesqp->nesqp_context->srtt = 0;
- nesqp->nesqp_context->rttvar = cpu_to_le32(0x6);
- nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000);
- nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
- nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
- nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
-
- nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X,"
- " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
- nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
- le32_to_cpu(nesqp->nesqp_context->snd_nxt),
- cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
- le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
- le32_to_cpu(nesqp->nesqp_context->misc));
- nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd));
- nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd));
- nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd));
-
- nes_debug(NES_DBG_CM, "Change cm_node state to TSA\n");
- cm_node->state = NES_CM_STATE_TSA;
-
- return ret;
-}
-
-
-/**
- * nes_cm_disconn
- */
-int nes_cm_disconn(struct nes_qp *nesqp)
-{
- struct disconn_work *work;
-
- work = kzalloc(sizeof *work, GFP_ATOMIC);
- if (!work)
- return -ENOMEM; /* Timer will clean up */
-
- nes_add_ref(&nesqp->ibqp);
- work->nesqp = nesqp;
- INIT_WORK(&work->work, nes_disconnect_worker);
- queue_work(g_cm_core->disconn_wq, &work->work);
- return 0;
-}
-
-
-/**
- * nes_disconnect_worker
- */
-static void nes_disconnect_worker(struct work_struct *work)
-{
- struct disconn_work *dwork = container_of(work, struct disconn_work, work);
- struct nes_qp *nesqp = dwork->nesqp;
-
- kfree(dwork);
- nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
- nesqp->last_aeq, nesqp->hwqp.qp_id);
- nes_cm_disconn_true(nesqp);
- nes_rem_ref(&nesqp->ibqp);
-}
-
-
-/**
- * nes_cm_disconn_true
- */
-static int nes_cm_disconn_true(struct nes_qp *nesqp)
-{
- unsigned long flags;
- int ret = 0;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- struct nes_vnic *nesvnic;
- u16 last_ae;
- u8 original_hw_tcp_state;
- u8 original_ibqp_state;
- int disconn_status = 0;
- int issue_disconn = 0;
- int issue_close = 0;
- int issue_flush = 0;
- u32 flush_q = NES_CQP_FLUSH_RQ;
- struct ib_event ibevent;
-
- if (!nesqp) {
- nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n");
- return -1;
- }
-
- spin_lock_irqsave(&nesqp->lock, flags);
- cm_id = nesqp->cm_id;
- /* make sure we havent already closed this connection */
- if (!cm_id) {
- nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n",
- nesqp->hwqp.qp_id);
- spin_unlock_irqrestore(&nesqp->lock, flags);
- return -1;
- }
-
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id);
-
- original_hw_tcp_state = nesqp->hw_tcp_state;
- original_ibqp_state = nesqp->ibqp_state;
- last_ae = nesqp->last_aeq;
-
- if (nesqp->term_flags) {
- issue_disconn = 1;
- issue_close = 1;
- nesqp->cm_id = NULL;
- del_timer(&nesqp->terminate_timer);
- if (nesqp->flush_issued == 0) {
- nesqp->flush_issued = 1;
- issue_flush = 1;
- }
- } else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- ((original_ibqp_state == IB_QPS_RTS) &&
- (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
- issue_disconn = 1;
- if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET)
- disconn_status = -ECONNRESET;
- }
-
- if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
- (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
- (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
- (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
- issue_close = 1;
- nesqp->cm_id = NULL;
- if (nesqp->flush_issued == 0) {
- nesqp->flush_issued = 1;
- issue_flush = 1;
- }
- }
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- if ((issue_flush) && (nesqp->destroyed == 0)) {
- /* Flush the queue(s) */
- if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE)
- flush_q |= NES_CQP_FLUSH_SQ;
- flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1);
-
- if (nesqp->term_flags) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.event = nesqp->terminate_eventtype;
- ibevent.element.qp = &nesqp->ibqp;
- if (nesqp->ibqp.event_handler)
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- }
-
- if ((cm_id) && (cm_id->event_handler)) {
- if (issue_disconn) {
- atomic_inc(&cm_disconnects);
- cm_event.event = IW_CM_EVENT_DISCONNECT;
- cm_event.status = disconn_status;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
- nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event"
- " for QP%u, SQ Head = %u, SQ Tail = %u. "
- "cm_id = %p, refcount = %u.\n",
- nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
- nesqp->hwqp.sq_tail, cm_id,
- atomic_read(&nesqp->refcount));
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- nes_debug(NES_DBG_CM, "OFA CM event_handler "
- "returned, ret=%d\n", ret);
- }
-
- if (issue_close) {
- atomic_inc(&cm_closes);
- nes_disconnect(nesqp, 1);
-
- cm_id->provider_data = nesqp;
- /* Send up the close complete event */
- cm_event.event = IW_CM_EVENT_CLOSE;
- cm_event.status = 0;
- cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
-
- cm_id->rem_ref(cm_id);
- }
- }
-
- return 0;
-}
-
-
-/**
- * nes_disconnect
- */
-static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
-{
- int ret = 0;
- struct nes_vnic *nesvnic;
- struct nes_device *nesdev;
- struct nes_ib_device *nesibdev;
-
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- if (!nesvnic)
- return -EINVAL;
-
- nesdev = nesvnic->nesdev;
- nesibdev = nesvnic->nesibdev;
-
- nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
- netdev_refcnt_read(nesvnic->netdev));
-
- if (nesqp->active_conn) {
-
- /* indicate this connection is NOT active */
- nesqp->active_conn = 0;
- } else {
- /* Need to free the Last Streaming Mode Message */
- if (nesqp->ietf_frame) {
- if (nesqp->lsmm_mr)
- nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr,
- NULL);
- pci_free_consistent(nesdev->pcidev,
- nesqp->private_data_len + nesqp->ietf_frame_size,
- nesqp->ietf_frame, nesqp->ietf_frame_pbase);
- }
- }
-
- /* close the CM node down if it is still active */
- if (nesqp->cm_node) {
- nes_debug(NES_DBG_CM, "Call close API\n");
-
- g_cm_core->api->close(g_cm_core, nesqp->cm_node);
- }
-
- return ret;
-}
-
-
-/**
- * nes_accept
- */
-int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- u64 u64temp;
- struct ib_qp *ibqp;
- struct nes_qp *nesqp;
- struct nes_vnic *nesvnic;
- struct nes_device *nesdev;
- struct nes_cm_node *cm_node;
- struct nes_adapter *adapter;
- struct ib_qp_attr attr;
- struct iw_cm_event cm_event;
- struct nes_hw_qp_wqe *wqe;
- struct nes_v4_quad nes_quad;
- u32 crc_value;
- int ret;
- int passive_state;
- struct ib_mr *ibmr = NULL;
- struct nes_pd *nespd;
- u64 tagged_offset;
- u8 mpa_frame_offset = 0;
- struct ietf_mpa_v2 *mpa_v2_frame;
- u8 start_addr = 0;
- u8 *start_ptr = &start_addr;
- u8 **start_buff = &start_ptr;
- u16 buff_len = 0;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
- if (!ibqp)
- return -EINVAL;
-
- /* get all our handles */
- nesqp = to_nesqp(ibqp);
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- nesdev = nesvnic->nesdev;
- adapter = nesdev->nesadapter;
-
- cm_node = (struct nes_cm_node *)cm_id->provider_data;
- nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p,"
- "%s\n", cm_node, nesvnic, nesvnic->netdev,
- nesvnic->netdev->name);
-
- if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) {
- if (cm_node->loopbackpartner)
- rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- return -EINVAL;
- }
-
- passive_state = atomic_add_return(1, &cm_node->passive_state);
- if (passive_state == NES_SEND_RESET_EVENT) {
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- return -ECONNRESET;
- }
- /* associate the node with the QP */
- nesqp->cm_node = (void *)cm_node;
- cm_node->nesqp = nesqp;
-
-
- nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n",
- nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener);
- atomic_inc(&cm_accepts);
-
- nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
- netdev_refcnt_read(nesvnic->netdev));
-
- nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2);
- /* allocate the ietf frame and space for private data */
- nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
- nesqp->ietf_frame_size + conn_param->private_data_len,
- &nesqp->ietf_frame_pbase);
-
- if (!nesqp->ietf_frame) {
- nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n");
- return -ENOMEM;
- }
- mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame;
-
- if (cm_node->mpa_frame_rev == IETF_MPA_V1)
- mpa_frame_offset = 4;
-
- if (cm_node->mpa_frame_rev == IETF_MPA_V1 ||
- cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
- record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
- }
-
- memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
- conn_param->private_data_len);
-
- cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY);
- nesqp->private_data_len = conn_param->private_data_len;
-
- /* setup our first outgoing iWarp send WQE (the IETF frame response) */
- wqe = &nesqp->hwqp.sq_vbase[0];
-
- if (raddr->sin_addr.s_addr != laddr->sin_addr.s_addr) {
- u64temp = (unsigned long)nesqp;
- nespd = nesqp->nespd;
- tagged_offset = (u64)(unsigned long)*start_buff;
- ibmr = nes_reg_phys_mr(&nespd->ibpd,
- nesqp->ietf_frame_pbase + mpa_frame_offset,
- buff_len, IB_ACCESS_LOCAL_WRITE,
- &tagged_offset);
- if (IS_ERR(ibmr)) {
- nes_debug(NES_DBG_CM, "Unable to register memory region"
- "for lSMM for cm_node = %p \n",
- cm_node);
- pci_free_consistent(nesdev->pcidev,
- nesqp->private_data_len + nesqp->ietf_frame_size,
- nesqp->ietf_frame, nesqp->ietf_frame_pbase);
- return PTR_ERR(ibmr);
- }
-
- ibmr->pd = &nespd->ibpd;
- ibmr->device = nespd->ibpd.device;
- nesqp->lsmm_mr = ibmr;
-
- u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
- set_wqe_64bit_value(wqe->wqe_words,
- NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
- u64temp);
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING |
- NES_IWARP_SQ_WQE_WRPDU);
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
- cpu_to_le32(buff_len);
- set_wqe_64bit_value(wqe->wqe_words,
- NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
- (u64)(unsigned long)(*start_buff));
- wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
- cpu_to_le32(buff_len);
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
- if (nesqp->sq_kmapped) {
- nesqp->sq_kmapped = 0;
- kunmap(nesqp->page);
- }
-
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
- NES_QPCONTEXT_ORDIRD_WRPDU);
- } else {
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU);
- }
- nesqp->skip_lsmm = 1;
-
- /* Cache the cm_id in the qp */
- nesqp->cm_id = cm_id;
- cm_node->cm_id = cm_id;
-
- /* nesqp->cm_node = (void *)cm_id->provider_data; */
- cm_id->provider_data = nesqp;
- nesqp->active_conn = 0;
-
- if (cm_node->state == NES_CM_STATE_TSA)
- nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n",
- cm_node);
-
- nes_cm_init_tsa_conn(nesqp, cm_node);
-
- nesqp->nesqp_context->tcpPorts[0] =
- cpu_to_le16(cm_node->loc_port);
- nesqp->nesqp_context->tcpPorts[1] =
- cpu_to_le16(cm_node->rem_port);
-
- nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr);
-
- nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
- NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
-
- nesqp->nesqp_context->arp_index_vlan |=
- cpu_to_le32(nes_arp_table(nesdev,
- le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
- NES_ARP_RESOLVE) << 16);
-
- nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
- jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
-
- nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
-
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
- ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)cm_node->ord_size);
-
- memset(&nes_quad, 0, sizeof(nes_quad));
- nes_quad.DstIpAdrIndex =
- cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = htonl(cm_node->rem_addr);
- nes_quad.TcpPorts[0] = htons(cm_node->rem_port);
- nes_quad.TcpPorts[1] = htons(cm_node->loc_port);
-
- /* Produce hash key */
- crc_value = get_crc_value(&nes_quad);
- nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
- nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n",
- nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
-
- nesqp->hte_index &= adapter->hte_index_mask;
- nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
-
- cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
-
- nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
- "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
- "private data length=%u.\n", nesqp->hwqp.qp_id,
- ntohl(raddr->sin_addr.s_addr), ntohs(raddr->sin_port),
- ntohl(laddr->sin_addr.s_addr), ntohs(laddr->sin_port),
- le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
- le32_to_cpu(nesqp->nesqp_context->snd_nxt),
- buff_len);
-
- /* notify OF layer that accept event was successful */
- cm_id->add_ref(cm_id);
- nes_add_ref(&nesqp->ibqp);
-
- cm_event.event = IW_CM_EVENT_ESTABLISHED;
- cm_event.status = 0;
- cm_event.provider_data = (void *)nesqp;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
- cm_event.ird = cm_node->ird_size;
- cm_event.ord = cm_node->ord_size;
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- attr.qp_state = IB_QPS_RTS;
- nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
- if (cm_node->loopbackpartner) {
- cm_node->loopbackpartner->mpa_frame_size =
- nesqp->private_data_len;
- /* copy entire MPA frame to our cm_node's frame */
- memcpy(cm_node->loopbackpartner->mpa_frame_buf,
- conn_param->private_data, conn_param->private_data_len);
- create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED);
- }
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
- "ret=%d\n", __func__, __LINE__, ret);
-
- return 0;
-}
-
-
-/**
- * nes_reject
- */
-int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
- struct nes_cm_node *cm_node;
- struct nes_cm_node *loopback;
- struct nes_cm_core *cm_core;
- u8 *start_buff;
-
- atomic_inc(&cm_rejects);
- cm_node = (struct nes_cm_node *)cm_id->provider_data;
- loopback = cm_node->loopbackpartner;
- cm_core = cm_node->cm_core;
- cm_node->cm_id = cm_id;
-
- if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
- return -EINVAL;
-
- if (loopback) {
- memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
- loopback->mpa_frame.priv_data_len = pdata_len;
- loopback->mpa_frame_size = pdata_len;
- } else {
- start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
- cm_node->mpa_frame_size = pdata_len;
- memcpy(start_buff, pdata, pdata_len);
- }
- return cm_core->api->reject(cm_core, cm_node);
-}
-
-
-/**
- * nes_connect
- * setup and launch cm connect node
- */
-int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- struct ib_qp *ibqp;
- struct nes_qp *nesqp;
- struct nes_vnic *nesvnic;
- struct nes_device *nesdev;
- struct nes_cm_node *cm_node;
- struct nes_cm_info cm_info;
- int apbvt_set = 0;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- if (cm_id->remote_addr.ss_family != AF_INET)
- return -ENOSYS;
- ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
- if (!ibqp)
- return -EINVAL;
- nesqp = to_nesqp(ibqp);
- if (!nesqp)
- return -EINVAL;
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- if (!nesvnic)
- return -EINVAL;
- nesdev = nesvnic->nesdev;
- if (!nesdev)
- return -EINVAL;
-
- if (!laddr->sin_port || !raddr->sin_port)
- return -EINVAL;
-
- nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
- "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
- ntohl(nesvnic->local_ipaddr), ntohl(raddr->sin_addr.s_addr),
- ntohs(raddr->sin_port), ntohl(laddr->sin_addr.s_addr),
- ntohs(laddr->sin_port));
-
- atomic_inc(&cm_connects);
- nesqp->active_conn = 1;
-
- /* cache the cm_id in the qp */
- nesqp->cm_id = cm_id;
- cm_id->provider_data = nesqp;
- nesqp->private_data_len = conn_param->private_data_len;
-
- nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
- nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
- conn_param->private_data_len);
-
- /* set up the connection params for the node */
- cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr);
- cm_info.loc_port = ntohs(laddr->sin_port);
- cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr);
- cm_info.rem_port = ntohs(raddr->sin_port);
- cm_info.cm_id = cm_id;
- cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
-
- if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
- nes_manage_apbvt(nesvnic, cm_info.loc_port,
- PCI_FUNC(nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_ADD);
- apbvt_set = 1;
- }
-
- cm_id->add_ref(cm_id);
-
- /* create a connect CM node connection */
- cm_node = g_cm_core->api->connect(g_cm_core, nesvnic,
- conn_param->private_data_len, (void *)conn_param->private_data,
- &cm_info);
- if (!cm_node) {
- if (apbvt_set)
- nes_manage_apbvt(nesvnic, cm_info.loc_port,
- PCI_FUNC(nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_DEL);
-
- nes_debug(NES_DBG_NLMSG, "Delete loc_port = %04X\n",
- cm_info.loc_port);
- cm_id->rem_ref(cm_id);
- return -ENOMEM;
- }
-
- record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
- if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
- cm_node->ord_size == 0)
- cm_node->ord_size = 1;
-
- cm_node->apbvt_set = apbvt_set;
- cm_node->tos = cm_id->tos;
- nesqp->cm_node = cm_node;
- cm_node->nesqp = nesqp;
- nes_add_ref(&nesqp->ibqp);
-
- return 0;
-}
-
-
-/**
- * nes_create_listen
- */
-int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- struct nes_vnic *nesvnic;
- struct nes_cm_listener *cm_node;
- struct nes_cm_info cm_info;
- int err;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
-
- nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
- cm_id, ntohs(laddr->sin_port));
-
- if (cm_id->m_local_addr.ss_family != AF_INET)
- return -ENOSYS;
- nesvnic = to_nesvnic(cm_id->device);
- if (!nesvnic)
- return -EINVAL;
-
- nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n",
- nesvnic, nesvnic->netdev, nesvnic->netdev->name);
-
- nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n",
- nesvnic->local_ipaddr, laddr->sin_addr.s_addr);
-
- /* setup listen params in our api call struct */
- cm_info.loc_addr = ntohl(nesvnic->local_ipaddr);
- cm_info.loc_port = ntohs(laddr->sin_port);
- cm_info.backlog = backlog;
- cm_info.cm_id = cm_id;
-
- cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
-
- cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
- if (!cm_node) {
- printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
- __func__, __LINE__);
- return -ENOMEM;
- }
-
- cm_id->provider_data = cm_node;
- cm_node->tos = cm_id->tos;
-
- if (!cm_node->reused_node) {
- err = nes_manage_apbvt(nesvnic, cm_node->loc_port,
- PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_ADD);
- if (err) {
- printk(KERN_ERR "nes_manage_apbvt call returned %d.\n",
- err);
- g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
- return err;
- }
- atomic_inc(&cm_listens_created);
- }
-
- cm_id->add_ref(cm_id);
- cm_id->provider_data = (void *)cm_node;
-
-
- return 0;
-}
-
-
-/**
- * nes_destroy_listen
- */
-int nes_destroy_listen(struct iw_cm_id *cm_id)
-{
- if (cm_id->provider_data)
- g_cm_core->api->stop_listener(g_cm_core, cm_id->provider_data);
- else
- nes_debug(NES_DBG_CM, "cm_id->provider_data was NULL\n");
-
- cm_id->rem_ref(cm_id);
-
- return 0;
-}
-
-
-/**
- * nes_cm_recv
- */
-int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
-{
- int rc = 0;
-
- cm_packets_received++;
- if ((g_cm_core) && (g_cm_core->api))
- rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
- else
- nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
- " cm is not setup properly.\n");
-
- return rc;
-}
-
-
-/**
- * nes_cm_start
- * Start and init a cm core module
- */
-int nes_cm_start(void)
-{
- nes_debug(NES_DBG_CM, "\n");
- /* create the primary CM core, pass this handle to subsequent core inits */
- g_cm_core = nes_cm_alloc_core();
- if (g_cm_core)
- return 0;
- else
- return -ENOMEM;
-}
-
-
-/**
- * nes_cm_stop
- * stop and dealloc all cm core instances
- */
-int nes_cm_stop(void)
-{
- g_cm_core->api->destroy_cm_core(g_cm_core);
- return 0;
-}
-
-
-/**
- * cm_event_connected
- * handle a connected event, setup QPs and HW
- */
-static void cm_event_connected(struct nes_cm_event *event)
-{
- struct nes_qp *nesqp;
- struct nes_vnic *nesvnic;
- struct nes_device *nesdev;
- struct nes_cm_node *cm_node;
- struct nes_adapter *nesadapter;
- struct ib_qp_attr attr;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- struct nes_v4_quad nes_quad;
- u32 crc_value;
- int ret;
- struct sockaddr_in *laddr;
- struct sockaddr_in *raddr;
- struct sockaddr_in *cm_event_laddr;
-
- /* get all our handles */
- cm_node = event->cm_node;
- cm_id = cm_node->cm_id;
- nes_debug(NES_DBG_CM, "cm_event_connected - %p - cm_id = %p\n", cm_node, cm_id);
- nesqp = (struct nes_qp *)cm_id->provider_data;
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- nesdev = nesvnic->nesdev;
- nesadapter = nesdev->nesadapter;
- laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
- cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr;
-
- if (nesqp->destroyed)
- return;
- atomic_inc(&cm_connecteds);
- nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on"
- " local port 0x%04X. jiffies = %lu.\n",
- nesqp->hwqp.qp_id, ntohl(raddr->sin_addr.s_addr),
- ntohs(raddr->sin_port), ntohs(laddr->sin_port), jiffies);
-
- nes_cm_init_tsa_conn(nesqp, cm_node);
-
- /* set the QP tsa context */
- nesqp->nesqp_context->tcpPorts[0] =
- cpu_to_le16(cm_node->loc_port);
- nesqp->nesqp_context->tcpPorts[1] =
- cpu_to_le16(cm_node->rem_port);
- nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr);
-
- nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
- NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
- nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(
- nes_arp_table(nesdev,
- le32_to_cpu(nesqp->nesqp_context->ip0),
- NULL, NES_ARP_RESOLVE) << 16);
- nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
- jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
- nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)1 <<
- NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)cm_node->ord_size);
-
- /* Adjust tail for not having a LSMM */
- /*nesqp->hwqp.sq_tail = 1;*/
-
- build_rdma0_msg(cm_node, &nesqp);
-
- nes_write32(nesdev->regs + NES_WQE_ALLOC,
- (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-
- memset(&nes_quad, 0, sizeof(nes_quad));
-
- nes_quad.DstIpAdrIndex =
- cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = htonl(cm_node->rem_addr);
- nes_quad.TcpPorts[0] = htons(cm_node->rem_port);
- nes_quad.TcpPorts[1] = htons(cm_node->loc_port);
-
- /* Produce hash key */
- crc_value = get_crc_value(&nes_quad);
- nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
- nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n",
- nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
-
- nesqp->hte_index &= nesadapter->hte_index_mask;
- nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
-
- nesqp->ietf_frame = &cm_node->mpa_frame;
- nesqp->private_data_len = (u8)cm_node->mpa_frame_size;
- cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
-
- /* notify OF layer we successfully created the requested connection */
- cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = 0;
- cm_event.provider_data = cm_id->provider_data;
- cm_event_laddr->sin_family = AF_INET;
- cm_event_laddr->sin_port = laddr->sin_port;
- cm_event.remote_addr = cm_id->m_remote_addr;
-
- cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
- cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size;
- cm_event.ird = cm_node->ird_size;
- cm_event.ord = cm_node->ord_size;
-
- cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
- ret = cm_id->event_handler(cm_id, &cm_event);
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
-
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
- "ret=%d\n", __func__, __LINE__, ret);
- attr.qp_state = IB_QPS_RTS;
- nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
-
- nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = "
- "%lu\n", nesqp->hwqp.qp_id, jiffies);
-
- return;
-}
-
-
-/**
- * cm_event_connect_error
- */
-static void cm_event_connect_error(struct nes_cm_event *event)
-{
- struct nes_qp *nesqp;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- /* struct nes_cm_info cm_info; */
- int ret;
-
- if (!event->cm_node)
- return;
-
- cm_id = event->cm_node->cm_id;
- if (!cm_id)
- return;
-
- nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id);
- nesqp = cm_id->provider_data;
-
- if (!nesqp)
- return;
-
- /* notify OF layer about this connection error event */
- /* cm_id->rem_ref(cm_id); */
- nesqp->cm_id = NULL;
- cm_id->provider_data = NULL;
- cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = -ECONNRESET;
- cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
-#ifdef CONFIG_INFINIBAND_NES_DEBUG
- {
- struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
- &cm_event.local_addr;
- struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
- &cm_event.remote_addr;
- nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remote_addr=%08x\n",
- cm_event_laddr->sin_addr.s_addr, cm_event_raddr->sin_addr.s_addr);
- }
-#endif
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
- "ret=%d\n", __func__, __LINE__, ret);
- cm_id->rem_ref(cm_id);
-
- rem_ref_cm_node(event->cm_node->cm_core, event->cm_node);
- return;
-}
-
-
-/**
- * cm_event_reset
- */
-static void cm_event_reset(struct nes_cm_event *event)
-{
- struct nes_qp *nesqp;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- /* struct nes_cm_info cm_info; */
- int ret;
-
- if (!event->cm_node)
- return;
-
- if (!event->cm_node->cm_id)
- return;
-
- cm_id = event->cm_node->cm_id;
-
- nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
- nesqp = cm_id->provider_data;
- if (!nesqp)
- return;
-
- nesqp->cm_id = NULL;
- /* cm_id->provider_data = NULL; */
- cm_event.event = IW_CM_EVENT_DISCONNECT;
- cm_event.status = -ECONNRESET;
- cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
- cm_id->add_ref(cm_id);
- ret = cm_id->event_handler(cm_id, &cm_event);
- atomic_inc(&cm_closes);
- cm_event.event = IW_CM_EVENT_CLOSE;
- cm_event.status = 0;
- cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
- nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node);
- ret = cm_id->event_handler(cm_id, &cm_event);
-
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
-
-
- /* notify OF layer about this connection error event */
- cm_id->rem_ref(cm_id);
-
- return;
-}
-
-
-/**
- * cm_event_mpa_req
- */
-static void cm_event_mpa_req(struct nes_cm_event *event)
-{
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- int ret;
- struct nes_cm_node *cm_node;
- struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
- &cm_event.local_addr;
- struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
- &cm_event.remote_addr;
-
- cm_node = event->cm_node;
- if (!cm_node)
- return;
- cm_id = cm_node->cm_id;
-
- atomic_inc(&cm_connect_reqs);
- nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
- cm_node, cm_id, jiffies);
-
- cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
- cm_event.status = 0;
- cm_event.provider_data = (void *)cm_node;
-
- cm_event_laddr->sin_family = AF_INET;
- cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
- cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
-
- cm_event_raddr->sin_family = AF_INET;
- cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
- cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
- cm_event.private_data = cm_node->mpa_frame_buf;
- cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
- if (cm_node->mpa_frame_rev == IETF_MPA_V1) {
- cm_event.ird = NES_MAX_IRD;
- cm_event.ord = NES_MAX_ORD;
- } else {
- cm_event.ird = cm_node->ird_size;
- cm_event.ord = cm_node->ord_size;
- }
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
- return;
-}
-
-
-static void cm_event_mpa_reject(struct nes_cm_event *event)
-{
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- struct nes_cm_node *cm_node;
- int ret;
- struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
- &cm_event.local_addr;
- struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
- &cm_event.remote_addr;
-
- cm_node = event->cm_node;
- if (!cm_node)
- return;
- cm_id = cm_node->cm_id;
-
- atomic_inc(&cm_connect_reqs);
- nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
- cm_node, cm_id, jiffies);
-
- cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = -ECONNREFUSED;
- cm_event.provider_data = cm_id->provider_data;
-
- cm_event_laddr->sin_family = AF_INET;
- cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
- cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
-
- cm_event_raddr->sin_family = AF_INET;
- cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
- cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
-
- cm_event.private_data = cm_node->mpa_frame_buf;
- cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
-
- nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, "
- "remove_addr=%08x\n",
- cm_event_laddr->sin_addr.s_addr,
- cm_event_raddr->sin_addr.s_addr);
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
-
- return;
-}
-
-
-static void nes_cm_event_handler(struct work_struct *);
-
-/**
- * nes_cm_post_event
- * post an event to the cm event handler
- */
-static int nes_cm_post_event(struct nes_cm_event *event)
-{
- atomic_inc(&event->cm_node->cm_core->events_posted);
- add_ref_cm_node(event->cm_node);
- event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
- INIT_WORK(&event->event_work, nes_cm_event_handler);
- nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n",
- event->cm_node, event);
-
- queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
-
- nes_debug(NES_DBG_CM, "Exit\n");
- return 0;
-}
-
-
-/**
- * nes_cm_event_handler
- * worker function to handle cm events
- * will free instance of nes_cm_event
- */
-static void nes_cm_event_handler(struct work_struct *work)
-{
- struct nes_cm_event *event = container_of(work, struct nes_cm_event,
- event_work);
- struct nes_cm_core *cm_core;
-
- if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core))
- return;
-
- cm_core = event->cm_node->cm_core;
- nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n",
- event, event->type, atomic_read(&cm_core->events_posted));
-
- switch (event->type) {
- case NES_CM_EVENT_MPA_REQ:
- cm_event_mpa_req(event);
- nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n",
- event->cm_node);
- break;
- case NES_CM_EVENT_RESET:
- nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n",
- event->cm_node);
- cm_event_reset(event);
- break;
- case NES_CM_EVENT_CONNECTED:
- if ((!event->cm_node->cm_id) ||
- (event->cm_node->state != NES_CM_STATE_TSA))
- break;
- cm_event_connected(event);
- nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
- break;
- case NES_CM_EVENT_MPA_REJECT:
- if ((!event->cm_node->cm_id) ||
- (event->cm_node->state == NES_CM_STATE_TSA))
- break;
- cm_event_mpa_reject(event);
- nes_debug(NES_DBG_CM, "CM Event: REJECT\n");
- break;
-
- case NES_CM_EVENT_ABORTED:
- if ((!event->cm_node->cm_id) ||
- (event->cm_node->state == NES_CM_STATE_TSA))
- break;
- cm_event_connect_error(event);
- nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
- break;
- case NES_CM_EVENT_DROPPED_PKT:
- nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n");
- break;
- default:
- nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n");
- break;
- }
-
- atomic_dec(&cm_core->events_posted);
- event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
- rem_ref_cm_node(cm_core, event->cm_node);
- kfree(event);
-
- return;
-}
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
deleted file mode 100644
index b9cc02b4e8d5..000000000000
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef NES_CM_H
-#define NES_CM_H
-
-#define QUEUE_EVENTS
-
-#define NES_MANAGE_APBVT_DEL 0
-#define NES_MANAGE_APBVT_ADD 1
-
-#define NES_MPA_REQUEST_ACCEPT 1
-#define NES_MPA_REQUEST_REJECT 2
-
-/* IETF MPA -- defines, enums, structs */
-#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
-#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
-#define IETF_MPA_KEY_SIZE 16
-#define IETF_MPA_VERSION 1
-#define IETF_MAX_PRIV_DATA_LEN 512
-#define IETF_MPA_FRAME_SIZE 20
-#define IETF_RTR_MSG_SIZE 4
-#define IETF_MPA_V2_FLAG 0x10
-
-/* IETF RTR MSG Fields */
-#define IETF_PEER_TO_PEER 0x8000
-#define IETF_FLPDU_ZERO_LEN 0x4000
-#define IETF_RDMA0_WRITE 0x8000
-#define IETF_RDMA0_READ 0x4000
-#define IETF_NO_IRD_ORD 0x3FFF
-#define NES_MAX_IRD 0x40
-#define NES_MAX_ORD 0x7F
-
-enum ietf_mpa_flags {
- IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
- IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */
- IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */
-};
-
-struct ietf_mpa_v1 {
- u8 key[IETF_MPA_KEY_SIZE];
- u8 flags;
- u8 rev;
- __be16 priv_data_len;
- u8 priv_data[0];
-};
-
-#define ietf_mpa_req_resp_frame ietf_mpa_frame
-
-struct ietf_rtr_msg {
- __be16 ctrl_ird;
- __be16 ctrl_ord;
-};
-
-struct ietf_mpa_v2 {
- u8 key[IETF_MPA_KEY_SIZE];
- u8 flags;
- u8 rev;
- __be16 priv_data_len;
- struct ietf_rtr_msg rtr_msg;
- u8 priv_data[0];
-};
-
-struct nes_v4_quad {
- u32 rsvd0;
- __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */
- __be32 SrcIpadr;
- __be16 TcpPorts[2]; /* src is low, dest is high */
-};
-
-struct nes_cm_node;
-enum nes_timer_type {
- NES_TIMER_TYPE_SEND,
- NES_TIMER_TYPE_RECV,
- NES_TIMER_NODE_CLEANUP,
- NES_TIMER_TYPE_CLOSE,
-};
-
-#define NES_PASSIVE_STATE_INDICATED 0
-#define NES_DO_NOT_SEND_RESET_EVENT 1
-#define NES_SEND_RESET_EVENT 2
-
-#define MAX_NES_IFS 4
-
-#define SET_ACK 1
-#define SET_SYN 2
-#define SET_FIN 4
-#define SET_RST 8
-
-#define TCP_OPTIONS_PADDING 3
-
-struct option_base {
- u8 optionnum;
- u8 length;
-};
-
-enum option_numbers {
- OPTION_NUMBER_END,
- OPTION_NUMBER_NONE,
- OPTION_NUMBER_MSS,
- OPTION_NUMBER_WINDOW_SCALE,
- OPTION_NUMBER_SACK_PERM,
- OPTION_NUMBER_SACK,
- OPTION_NUMBER_WRITE0 = 0xbc
-};
-
-struct option_mss {
- u8 optionnum;
- u8 length;
- __be16 mss;
-};
-
-struct option_windowscale {
- u8 optionnum;
- u8 length;
- u8 shiftcount;
-};
-
-union all_known_options {
- char as_end;
- struct option_base as_base;
- struct option_mss as_mss;
- struct option_windowscale as_windowscale;
-};
-
-struct nes_timer_entry {
- struct list_head list;
- unsigned long timetosend; /* jiffies */
- struct sk_buff *skb;
- u32 type;
- u32 retrycount;
- u32 retranscount;
- u32 context;
- u32 seq_num;
- u32 send_retrans;
- int close_when_complete;
- struct net_device *netdev;
-};
-
-#define NES_DEFAULT_RETRYS 64
-#define NES_DEFAULT_RETRANS 8
-#ifdef CONFIG_INFINIBAND_NES_DEBUG
-#define NES_RETRY_TIMEOUT (1000*HZ/1000)
-#else
-#define NES_RETRY_TIMEOUT (3000*HZ/1000)
-#endif
-#define NES_SHORT_TIME (10)
-#define NES_LONG_TIME (2000*HZ/1000)
-#define NES_MAX_TIMEOUT ((unsigned long) (12*HZ))
-
-#define NES_CM_HASHTABLE_SIZE 1024
-#define NES_CM_TCP_TIMER_INTERVAL 3000
-#define NES_CM_DEFAULT_MTU 1540
-#define NES_CM_DEFAULT_FRAME_CNT 10
-#define NES_CM_THREAD_STACK_SIZE 256
-#define NES_CM_DEFAULT_RCV_WND 64240 // before we know that window scaling is allowed
-#define NES_CM_DEFAULT_RCV_WND_SCALED 256960 // after we know that window scaling is allowed
-#define NES_CM_DEFAULT_RCV_WND_SCALE 2
-#define NES_CM_DEFAULT_FREE_PKTS 0x000A
-#define NES_CM_FREE_PKT_LO_WATERMARK 2
-
-#define NES_CM_DEFAULT_MSS 536
-
-#define NES_CM_DEF_SEQ 0x159bf75f
-#define NES_CM_DEF_LOCAL_ID 0x3b47
-
-#define NES_CM_DEF_SEQ2 0x18ed5740
-#define NES_CM_DEF_LOCAL_ID2 0xb807
-#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN)
-
-typedef u32 nes_addr_t;
-
-#define nes_cm_tsa_context nes_qp_context
-
-struct nes_qp;
-
-/* cm node transition states */
-enum nes_cm_node_state {
- NES_CM_STATE_UNKNOWN,
- NES_CM_STATE_INITED,
- NES_CM_STATE_LISTENING,
- NES_CM_STATE_SYN_RCVD,
- NES_CM_STATE_SYN_SENT,
- NES_CM_STATE_ONE_SIDE_ESTABLISHED,
- NES_CM_STATE_ESTABLISHED,
- NES_CM_STATE_ACCEPTING,
- NES_CM_STATE_MPAREQ_SENT,
- NES_CM_STATE_MPAREQ_RCVD,
- NES_CM_STATE_MPAREJ_RCVD,
- NES_CM_STATE_TSA,
- NES_CM_STATE_FIN_WAIT1,
- NES_CM_STATE_FIN_WAIT2,
- NES_CM_STATE_CLOSE_WAIT,
- NES_CM_STATE_TIME_WAIT,
- NES_CM_STATE_LAST_ACK,
- NES_CM_STATE_CLOSING,
- NES_CM_STATE_LISTENER_DESTROYED,
- NES_CM_STATE_CLOSED
-};
-
-enum mpa_frame_version {
- IETF_MPA_V1 = 1,
- IETF_MPA_V2 = 2
-};
-
-enum mpa_frame_key {
- MPA_KEY_REQUEST,
- MPA_KEY_REPLY
-};
-
-enum send_rdma0 {
- SEND_RDMA_READ_ZERO = 1,
- SEND_RDMA_WRITE_ZERO = 2
-};
-
-enum nes_tcpip_pkt_type {
- NES_PKT_TYPE_UNKNOWN,
- NES_PKT_TYPE_SYN,
- NES_PKT_TYPE_SYNACK,
- NES_PKT_TYPE_ACK,
- NES_PKT_TYPE_FIN,
- NES_PKT_TYPE_RST
-};
-
-
-/* type of nes connection */
-enum nes_cm_conn_type {
- NES_CM_IWARP_CONN_TYPE,
-};
-
-/* CM context params */
-struct nes_cm_tcp_context {
- u8 client;
-
- u32 loc_seq_num;
- u32 loc_ack_num;
- u32 rem_ack_num;
- u32 rcv_nxt;
-
- u32 loc_id;
- u32 rem_id;
-
- u32 snd_wnd;
- u32 max_snd_wnd;
-
- u32 rcv_wnd;
- u32 mss;
- u8 snd_wscale;
- u8 rcv_wscale;
-
- struct nes_cm_tsa_context tsa_cntxt;
-};
-
-
-enum nes_cm_listener_state {
- NES_CM_LISTENER_PASSIVE_STATE = 1,
- NES_CM_LISTENER_ACTIVE_STATE = 2,
- NES_CM_LISTENER_EITHER_STATE = 3
-};
-
-struct nes_cm_listener {
- struct list_head list;
- struct nes_cm_core *cm_core;
- u8 loc_mac[ETH_ALEN];
- nes_addr_t loc_addr;
- u16 loc_port;
- struct iw_cm_id *cm_id;
- enum nes_cm_conn_type conn_type;
- atomic_t ref_count;
- struct nes_vnic *nesvnic;
- atomic_t pend_accepts_cnt;
- int backlog;
- enum nes_cm_listener_state listener_state;
- u32 reused_node;
- u8 tos;
-};
-
-/* per connection node and node state information */
-struct nes_cm_node {
- nes_addr_t loc_addr, rem_addr;
- u16 loc_port, rem_port;
-
- u8 loc_mac[ETH_ALEN];
- u8 rem_mac[ETH_ALEN];
-
- enum nes_cm_node_state state;
- struct nes_cm_tcp_context tcp_cntxt;
- struct nes_cm_core *cm_core;
- struct sk_buff_head resend_list;
- atomic_t ref_count;
- struct net_device *netdev;
-
- struct nes_cm_node *loopbackpartner;
-
- struct nes_timer_entry *send_entry;
- struct nes_timer_entry *recv_entry;
- spinlock_t retrans_list_lock;
- enum send_rdma0 send_rdma0_op;
-
- union {
- struct ietf_mpa_v1 mpa_frame;
- struct ietf_mpa_v2 mpa_v2_frame;
- u8 mpa_frame_buf[MAX_CM_BUFFER];
- };
- enum mpa_frame_version mpa_frame_rev;
- u16 ird_size;
- u16 ord_size;
- u16 mpav2_ird_ord;
-
- u16 mpa_frame_size;
- struct iw_cm_id *cm_id;
- struct list_head list;
- bool accelerated;
- struct nes_cm_listener *listener;
- enum nes_cm_conn_type conn_type;
- struct nes_vnic *nesvnic;
- int apbvt_set;
- int accept_pend;
- struct list_head timer_entry;
- struct list_head reset_entry;
- struct nes_qp *nesqp;
- atomic_t passive_state;
- u8 tos;
-};
-
-/* structure for client or CM to fill when making CM api calls. */
-/* - only need to set relevant data, based on op. */
-struct nes_cm_info {
- union {
- struct iw_cm_id *cm_id;
- struct net_device *netdev;
- };
-
- u16 loc_port;
- u16 rem_port;
- nes_addr_t loc_addr;
- nes_addr_t rem_addr;
- enum nes_cm_conn_type conn_type;
- int backlog;
-};
-
-/* CM event codes */
-enum nes_cm_event_type {
- NES_CM_EVENT_UNKNOWN,
- NES_CM_EVENT_ESTABLISHED,
- NES_CM_EVENT_MPA_REQ,
- NES_CM_EVENT_MPA_CONNECT,
- NES_CM_EVENT_MPA_ACCEPT,
- NES_CM_EVENT_MPA_REJECT,
- NES_CM_EVENT_MPA_ESTABLISHED,
- NES_CM_EVENT_CONNECTED,
- NES_CM_EVENT_CLOSED,
- NES_CM_EVENT_RESET,
- NES_CM_EVENT_DROPPED_PKT,
- NES_CM_EVENT_CLOSE_IMMED,
- NES_CM_EVENT_CLOSE_HARD,
- NES_CM_EVENT_CLOSE_CLEAN,
- NES_CM_EVENT_ABORTED,
- NES_CM_EVENT_SEND_FIRST
-};
-
-/* event to post to CM event handler */
-struct nes_cm_event {
- enum nes_cm_event_type type;
-
- struct nes_cm_info cm_info;
- struct work_struct event_work;
- struct nes_cm_node *cm_node;
-};
-
-struct nes_cm_core {
- enum nes_cm_node_state state;
-
- atomic_t listen_node_cnt;
- struct nes_cm_node listen_list;
- spinlock_t listen_list_lock;
-
- u32 mtu;
- u32 free_tx_pkt_max;
- u32 rx_pkt_posted;
- atomic_t ht_node_cnt;
- struct list_head connected_nodes;
- /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */
- spinlock_t ht_lock;
-
- struct timer_list tcp_timer;
-
- const struct nes_cm_ops *api;
-
- int (*post_event)(struct nes_cm_event *event);
- atomic_t events_posted;
- struct workqueue_struct *event_wq;
- struct workqueue_struct *disconn_wq;
-
- atomic_t node_cnt;
- u64 aborted_connects;
- u32 options;
-
- struct nes_cm_node *current_listen_node;
-};
-
-
-#define NES_CM_SET_PKT_SIZE (1 << 1)
-#define NES_CM_SET_FREE_PKT_Q_SIZE (1 << 2)
-
-/* CM ops/API for client interface */
-struct nes_cm_ops {
- int (*accelerated)(struct nes_cm_core *, struct nes_cm_node *);
- struct nes_cm_listener * (*listen)(struct nes_cm_core *, struct nes_vnic *,
- struct nes_cm_info *);
- int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *);
- struct nes_cm_node * (*connect)(struct nes_cm_core *,
- struct nes_vnic *, u16, void *,
- struct nes_cm_info *);
- int (*close)(struct nes_cm_core *, struct nes_cm_node *);
- int (*accept)(struct nes_cm_core *, struct nes_cm_node *);
- int (*reject)(struct nes_cm_core *, struct nes_cm_node *);
- int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
- struct sk_buff *);
- int (*destroy_cm_core)(struct nes_cm_core *);
- int (*get)(struct nes_cm_core *);
- int (*set)(struct nes_cm_core *, u32, u32);
-};
-
-int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
- enum nes_timer_type, int, int);
-
-int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
-int nes_reject(struct iw_cm_id *, const void *, u8);
-int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
-int nes_create_listen(struct iw_cm_id *, int);
-int nes_destroy_listen(struct iw_cm_id *);
-
-int nes_cm_recv(struct sk_buff *, struct net_device *);
-int nes_cm_start(void);
-int nes_cm_stop(void);
-int nes_add_ref_cm_node(struct nes_cm_node *cm_node);
-int nes_rem_ref_cm_node(struct nes_cm_node *cm_node);
-
-#endif /* NES_CM_H */
diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h
deleted file mode 100644
index a69eef16d72d..000000000000
--- a/drivers/infiniband/hw/nes/nes_context.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef NES_CONTEXT_H
-#define NES_CONTEXT_H
-
-struct nes_qp_context {
- __le32 misc;
- __le32 cqs;
- __le32 sq_addr_low;
- __le32 sq_addr_high;
- __le32 rq_addr_low;
- __le32 rq_addr_high;
- __le32 misc2;
- __le16 tcpPorts[2];
- __le32 ip0;
- __le32 ip1;
- __le32 ip2;
- __le32 ip3;
- __le32 mss;
- __le32 arp_index_vlan;
- __le32 tcp_state_flow_label;
- __le32 pd_index_wscale;
- __le32 keepalive;
- u32 ts_recent;
- u32 ts_age;
- __le32 snd_nxt;
- __le32 snd_wnd;
- __le32 rcv_nxt;
- __le32 rcv_wnd;
- __le32 snd_max;
- __le32 snd_una;
- u32 srtt;
- __le32 rttvar;
- __le32 ssthresh;
- __le32 cwnd;
- __le32 snd_wl1;
- __le32 snd_wl2;
- __le32 max_snd_wnd;
- __le32 ts_val_delta;
- u32 retransmit;
- u32 probe_cnt;
- u32 hte_index;
- __le32 q2_addr_low;
- __le32 q2_addr_high;
- __le32 ird_index;
- u32 Rsvd3;
- __le32 ird_ord_sizes;
- u32 mrkr_offset;
- __le32 aeq_token_low;
- __le32 aeq_token_high;
-};
-
-/* QP Context Misc Field */
-
-#define NES_QPCONTEXT_MISC_IWARP_VER_MASK 0x00000003
-#define NES_QPCONTEXT_MISC_IWARP_VER_SHIFT 0
-#define NES_QPCONTEXT_MISC_EFB_SIZE_MASK 0x000000C0
-#define NES_QPCONTEXT_MISC_EFB_SIZE_SHIFT 6
-#define NES_QPCONTEXT_MISC_RQ_SIZE_MASK 0x00000300
-#define NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT 8
-#define NES_QPCONTEXT_MISC_SQ_SIZE_MASK 0x00000c00
-#define NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT 10
-#define NES_QPCONTEXT_MISC_PCI_FCN_MASK 0x00007000
-#define NES_QPCONTEXT_MISC_PCI_FCN_SHIFT 12
-#define NES_QPCONTEXT_MISC_DUP_ACKS_MASK 0x00070000
-#define NES_QPCONTEXT_MISC_DUP_ACKS_SHIFT 16
-
-enum nes_qp_context_misc_bits {
- NES_QPCONTEXT_MISC_RX_WQE_SIZE = 0x00000004,
- NES_QPCONTEXT_MISC_IPV4 = 0x00000008,
- NES_QPCONTEXT_MISC_DO_NOT_FRAG = 0x00000010,
- NES_QPCONTEXT_MISC_INSERT_VLAN = 0x00000020,
- NES_QPCONTEXT_MISC_DROS = 0x00008000,
- NES_QPCONTEXT_MISC_WSCALE = 0x00080000,
- NES_QPCONTEXT_MISC_KEEPALIVE = 0x00100000,
- NES_QPCONTEXT_MISC_TIMESTAMP = 0x00200000,
- NES_QPCONTEXT_MISC_SACK = 0x00400000,
- NES_QPCONTEXT_MISC_RDMA_WRITE_EN = 0x00800000,
- NES_QPCONTEXT_MISC_RDMA_READ_EN = 0x01000000,
- NES_QPCONTEXT_MISC_WBIND_EN = 0x10000000,
- NES_QPCONTEXT_MISC_FAST_REGISTER_EN = 0x20000000,
- NES_QPCONTEXT_MISC_PRIV_EN = 0x40000000,
- NES_QPCONTEXT_MISC_NO_NAGLE = 0x80000000
-};
-
-enum nes_qp_acc_wq_sizes {
- HCONTEXT_TSA_WQ_SIZE_4 = 0,
- HCONTEXT_TSA_WQ_SIZE_32 = 1,
- HCONTEXT_TSA_WQ_SIZE_128 = 2,
- HCONTEXT_TSA_WQ_SIZE_512 = 3
-};
-
-/* QP Context Misc2 Fields */
-#define NES_QPCONTEXT_MISC2_TTL_MASK 0x000000ff
-#define NES_QPCONTEXT_MISC2_TTL_SHIFT 0
-#define NES_QPCONTEXT_MISC2_HOP_LIMIT_MASK 0x000000ff
-#define NES_QPCONTEXT_MISC2_HOP_LIMIT_SHIFT 0
-#define NES_QPCONTEXT_MISC2_LIMIT_MASK 0x00000300
-#define NES_QPCONTEXT_MISC2_LIMIT_SHIFT 8
-#define NES_QPCONTEXT_MISC2_NIC_INDEX_MASK 0x0000fc00
-#define NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT 10
-#define NES_QPCONTEXT_MISC2_SRC_IP_MASK 0x001f0000
-#define NES_QPCONTEXT_MISC2_SRC_IP_SHIFT 16
-#define NES_QPCONTEXT_MISC2_TOS_MASK 0xff000000
-#define NES_QPCONTEXT_MISC2_TOS_SHIFT 24
-#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_MASK 0xff000000
-#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_SHIFT 24
-
-/* QP Context Tcp State/Flow Label Fields */
-#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_MASK 0x000fffff
-#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_SHIFT 0
-#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_MASK 0xf0000000
-#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT 28
-
-enum nes_qp_tcp_state {
- NES_QPCONTEXT_TCPSTATE_CLOSED = 1,
- NES_QPCONTEXT_TCPSTATE_EST = 5,
- NES_QPCONTEXT_TCPSTATE_TIME_WAIT = 11,
-};
-
-/* QP Context PD Index/wscale Fields */
-#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK 0x0000000f
-#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT 0
-#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK 0x00000f00
-#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT 8
-#define NES_QPCONTEXT_PDWSCALE_PDINDEX_MASK 0xffff0000
-#define NES_QPCONTEXT_PDWSCALE_PDINDEX_SHIFT 16
-
-/* QP Context Keepalive Fields */
-#define NES_QPCONTEXT_KEEPALIVE_DELTA_MASK 0x0000ffff
-#define NES_QPCONTEXT_KEEPALIVE_DELTA_SHIFT 0
-#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_MASK 0x00ff0000
-#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_SHIFT 16
-#define NES_QPCONTEXT_KEEPALIVE_INTV_MASK 0xff000000
-#define NES_QPCONTEXT_KEEPALIVE_INTV_SHIFT 24
-
-/* QP Context ORD/IRD Fields */
-#define NES_QPCONTEXT_ORDIRD_ORDSIZE_MASK 0x0000007f
-#define NES_QPCONTEXT_ORDIRD_ORDSIZE_SHIFT 0
-#define NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK 0x00030000
-#define NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT 16
-#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_MASK 0x30000000
-#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT 28
-
-enum nes_ord_ird_bits {
- NES_QPCONTEXT_ORDIRD_WRPDU = 0x02000000,
- NES_QPCONTEXT_ORDIRD_LSMM_PRESENT = 0x04000000,
- NES_QPCONTEXT_ORDIRD_ALSMM = 0x08000000,
- NES_QPCONTEXT_ORDIRD_AAH = 0x40000000,
- NES_QPCONTEXT_ORDIRD_RNMC = 0x80000000
-};
-
-enum nes_iwarp_qp_state {
- NES_QPCONTEXT_IWARP_STATE_NONEXIST = 0,
- NES_QPCONTEXT_IWARP_STATE_IDLE = 1,
- NES_QPCONTEXT_IWARP_STATE_RTS = 2,
- NES_QPCONTEXT_IWARP_STATE_CLOSING = 3,
- NES_QPCONTEXT_IWARP_STATE_TERMINATE = 5,
- NES_QPCONTEXT_IWARP_STATE_ERROR = 6
-};
-
-
-#endif /* NES_CONTEXT_H */
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
deleted file mode 100644
index 5517e392bc01..000000000000
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ /dev/null
@@ -1,3887 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_vlan.h>
-#include <linux/slab.h>
-
-#include "nes.h"
-
-static int wide_ppm_offset;
-module_param(wide_ppm_offset, int, 0644);
-MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm");
-
-static u32 crit_err_count;
-u32 int_mod_timer_init;
-u32 int_mod_cq_depth_256;
-u32 int_mod_cq_depth_128;
-u32 int_mod_cq_depth_32;
-u32 int_mod_cq_depth_24;
-u32 int_mod_cq_depth_16;
-u32 int_mod_cq_depth_4;
-u32 int_mod_cq_depth_1;
-static const u8 nes_max_critical_error_count = 100;
-#include "nes_cm.h"
-
-static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq);
-static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count);
-static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
- struct nes_adapter *nesadapter, u8 OneG_Mode);
-static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
-static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq);
-static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq);
-static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
- struct nes_hw_aeqe *aeqe);
-static void process_critical_error(struct nes_device *nesdev);
-static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
-static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
-static void nes_terminate_start_timer(struct nes_qp *nesqp);
-
-static const char *const nes_iwarp_state_str[] = {
- "Non-Existent",
- "Idle",
- "RTS",
- "Closing",
- "RSVD1",
- "Terminate",
- "Error",
- "RSVD2",
-};
-
-static const char *const nes_tcp_state_str[] = {
- "Non-Existent",
- "Closed",
- "Listen",
- "SYN Sent",
- "SYN Rcvd",
- "Established",
- "Close Wait",
- "FIN Wait 1",
- "Closing",
- "Last Ack",
- "FIN Wait 2",
- "Time Wait",
- "RSVD1",
- "RSVD2",
- "RSVD3",
- "RSVD4",
-};
-
-static inline void print_ip(struct nes_cm_node *cm_node)
-{
- unsigned char *rem_addr;
- if (cm_node) {
- rem_addr = (unsigned char *)&cm_node->rem_addr;
- printk(KERN_ERR PFX "Remote IP addr: %pI4\n", rem_addr);
- }
-}
-
-/**
- * nes_nic_init_timer_defaults
- */
-void nes_nic_init_timer_defaults(struct nes_device *nesdev, u8 jumbomode)
-{
- unsigned long flags;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
-
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
-
- shared_timer->timer_in_use_min = NES_NIC_FAST_TIMER_LOW;
- shared_timer->timer_in_use_max = NES_NIC_FAST_TIMER_HIGH;
- if (jumbomode) {
- shared_timer->threshold_low = DEFAULT_JUMBO_NES_QL_LOW;
- shared_timer->threshold_target = DEFAULT_JUMBO_NES_QL_TARGET;
- shared_timer->threshold_high = DEFAULT_JUMBO_NES_QL_HIGH;
- } else {
- shared_timer->threshold_low = DEFAULT_NES_QL_LOW;
- shared_timer->threshold_target = DEFAULT_NES_QL_TARGET;
- shared_timer->threshold_high = DEFAULT_NES_QL_HIGH;
- }
-
- /* todo use netdev->mtu to set thresholds */
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
-}
-
-
-/**
- * nes_nic_init_timer
- */
-static void nes_nic_init_timer(struct nes_device *nesdev)
-{
- unsigned long flags;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
-
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
-
- if (shared_timer->timer_in_use_old == 0) {
- nesdev->deepcq_count = 0;
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward = 0;
- shared_timer->timer_in_use = NES_NIC_FAST_TIMER;
- shared_timer->timer_in_use_old = 0;
-
- }
- if (shared_timer->timer_in_use != shared_timer->timer_in_use_old) {
- shared_timer->timer_in_use_old = shared_timer->timer_in_use;
- nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
- 0x80000000 | ((u32)(shared_timer->timer_in_use*8)));
- }
- /* todo use netdev->mtu to set thresholds */
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
-}
-
-
-/**
- * nes_nic_tune_timer
- */
-static void nes_nic_tune_timer(struct nes_device *nesdev)
-{
- unsigned long flags;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
- u16 cq_count = nesdev->currcq_count;
-
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
-
- if (shared_timer->cq_count_old <= cq_count)
- shared_timer->cq_direction_downward = 0;
- else
- shared_timer->cq_direction_downward++;
- shared_timer->cq_count_old = cq_count;
- if (shared_timer->cq_direction_downward > NES_NIC_CQ_DOWNWARD_TREND) {
- if (cq_count <= shared_timer->threshold_low &&
- shared_timer->threshold_low > 4) {
- shared_timer->threshold_low = shared_timer->threshold_low/2;
- shared_timer->cq_direction_downward=0;
- nesdev->currcq_count = 0;
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
- return;
- }
- }
-
- if (cq_count > 1) {
- nesdev->deepcq_count += cq_count;
- if (cq_count <= shared_timer->threshold_low) { /* increase timer gently */
- shared_timer->timer_direction_upward++;
- shared_timer->timer_direction_downward = 0;
- } else if (cq_count <= shared_timer->threshold_target) { /* balanced */
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward = 0;
- } else if (cq_count <= shared_timer->threshold_high) { /* decrease timer gently */
- shared_timer->timer_direction_downward++;
- shared_timer->timer_direction_upward = 0;
- } else if (cq_count <= (shared_timer->threshold_high) * 2) {
- shared_timer->timer_in_use -= 2;
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward++;
- } else {
- shared_timer->timer_in_use -= 4;
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward++;
- }
-
- if (shared_timer->timer_direction_upward > 3 ) { /* using history */
- shared_timer->timer_in_use += 3;
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward = 0;
- }
- if (shared_timer->timer_direction_downward > 5) { /* using history */
- shared_timer->timer_in_use -= 4 ;
- shared_timer->timer_direction_downward = 0;
- shared_timer->timer_direction_upward = 0;
- }
- }
-
- /* boundary checking */
- if (shared_timer->timer_in_use > shared_timer->threshold_high)
- shared_timer->timer_in_use = shared_timer->threshold_high;
- else if (shared_timer->timer_in_use < shared_timer->threshold_low)
- shared_timer->timer_in_use = shared_timer->threshold_low;
-
- nesdev->currcq_count = 0;
-
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
-}
-
-
-/**
- * nes_init_adapter - initialize adapter
- */
-struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
- struct nes_adapter *nesadapter = NULL;
- unsigned long num_pds;
- u32 u32temp;
- u32 port_count;
- u16 max_rq_wrs;
- u16 max_sq_wrs;
- u32 max_mr;
- u32 max_256pbl;
- u32 max_4kpbl;
- u32 max_qp;
- u32 max_irrq;
- u32 max_cq;
- u32 hte_index_mask;
- u32 adapter_size;
- u32 arp_table_size;
- u16 vendor_id;
- u16 device_id;
- u8 OneG_Mode;
- u8 func_index;
-
- /* search the list of existing adapters */
- list_for_each_entry(nesadapter, &nes_adapter_list, list) {
- nes_debug(NES_DBG_INIT, "Searching Adapter list for PCI devfn = 0x%X,"
- " adapter PCI slot/bus = %u/%u, pci devices PCI slot/bus = %u/%u, .\n",
- nesdev->pcidev->devfn,
- PCI_SLOT(nesadapter->devfn),
- nesadapter->bus_number,
- PCI_SLOT(nesdev->pcidev->devfn),
- nesdev->pcidev->bus->number );
- if ((PCI_SLOT(nesadapter->devfn) == PCI_SLOT(nesdev->pcidev->devfn)) &&
- (nesadapter->bus_number == nesdev->pcidev->bus->number)) {
- nesadapter->ref_count++;
- return nesadapter;
- }
- }
-
- /* no adapter found */
- num_pds = pci_resource_len(nesdev->pcidev, BAR_1) >> PAGE_SHIFT;
- if ((hw_rev != NE020_REV) && (hw_rev != NE020_REV1)) {
- nes_debug(NES_DBG_INIT, "NE020 driver detected unknown hardware revision 0x%x\n",
- hw_rev);
- return NULL;
- }
-
- nes_debug(NES_DBG_INIT, "Determine Soft Reset, QP_control=0x%x, CPU0=0x%x, CPU1=0x%x, CPU2=0x%x\n",
- nes_read_indexed(nesdev, NES_IDX_QP_CONTROL + PCI_FUNC(nesdev->pcidev->devfn) * 8),
- nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS),
- nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 4),
- nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 8));
-
- nes_debug(NES_DBG_INIT, "Reset and init NE020\n");
-
-
- if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0)
- return NULL;
-
- max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE);
- nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp);
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_QUAD_HASH_TABLE_SIZE);
- if (max_qp > ((u32)1 << (u32temp & 0x001f))) {
- nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to hash table size = 0x%08X\n",
- max_qp, u32temp);
- max_qp = (u32)1 << (u32temp & 0x001f);
- }
-
- hte_index_mask = ((u32)1 << ((u32temp & 0x001f)+1))-1;
- nes_debug(NES_DBG_INIT, "Max QP = %u, hte_index_mask = 0x%08X.\n",
- max_qp, hte_index_mask);
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_IRRQ_COUNT);
-
- max_irrq = 1 << (u32temp & 0x001f);
-
- if (max_qp > max_irrq) {
- max_qp = max_irrq;
- nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to Available Q1s.\n",
- max_qp);
- }
-
- /* there should be no reason to allocate more pds than qps */
- if (num_pds > max_qp)
- num_pds = max_qp;
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_MRT_SIZE);
- max_mr = (u32)8192 << (u32temp & 0x7);
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_PBL_REGION_SIZE);
- max_256pbl = (u32)1 << (u32temp & 0x0000001f);
- max_4kpbl = (u32)1 << ((u32temp >> 16) & 0x0000001f);
- max_cq = nes_read_indexed(nesdev, NES_IDX_CQ_CTX_SIZE);
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_ARP_CACHE_SIZE);
- arp_table_size = 1 << u32temp;
-
- adapter_size = (sizeof(struct nes_adapter) +
- (sizeof(unsigned long)-1)) & (~(sizeof(unsigned long)-1));
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
- adapter_size += sizeof(struct nes_qp **) * max_qp;
-
- /* allocate a new adapter struct */
- nesadapter = kzalloc(adapter_size, GFP_KERNEL);
- if (!nesadapter)
- return NULL;
-
- nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
- nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
-
- if (nes_read_eeprom_values(nesdev, nesadapter)) {
- printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
- kfree(nesadapter);
- return NULL;
- }
-
- nesadapter->vendor_id = (((u32) nesadapter->mac_addr_high) << 8) |
- (nesadapter->mac_addr_low >> 24);
-
- pci_bus_read_config_word(nesdev->pcidev->bus, nesdev->pcidev->devfn,
- PCI_DEVICE_ID, &device_id);
- nesadapter->vendor_part_id = device_id;
-
- if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter,
- OneG_Mode)) {
- kfree(nesadapter);
- return NULL;
- }
- nes_init_csr_ne020(nesdev, hw_rev, port_count);
-
- memset(nesadapter->pft_mcast_map, 255,
- sizeof nesadapter->pft_mcast_map);
-
- /* populate the new nesadapter */
- nesadapter->nesdev = nesdev;
- nesadapter->devfn = nesdev->pcidev->devfn;
- nesadapter->bus_number = nesdev->pcidev->bus->number;
- nesadapter->ref_count = 1;
- nesadapter->timer_int_req = 0xffff0000;
- nesadapter->OneG_Mode = OneG_Mode;
- nesadapter->doorbell_start = nesdev->doorbell_region;
-
- /* nesadapter->tick_delta = clk_divisor; */
- nesadapter->hw_rev = hw_rev;
- nesadapter->port_count = port_count;
-
- nesadapter->max_qp = max_qp;
- nesadapter->hte_index_mask = hte_index_mask;
- nesadapter->max_irrq = max_irrq;
- nesadapter->max_mr = max_mr;
- nesadapter->max_256pbl = max_256pbl - 1;
- nesadapter->max_4kpbl = max_4kpbl - 1;
- nesadapter->max_cq = max_cq;
- nesadapter->free_256pbl = max_256pbl - 1;
- nesadapter->free_4kpbl = max_4kpbl - 1;
- nesadapter->max_pd = num_pds;
- nesadapter->arp_table_size = arp_table_size;
-
- nesadapter->et_pkt_rate_low = NES_TIMER_ENABLE_LIMIT;
- if (nes_drv_opt & NES_DRV_OPT_DISABLE_INT_MOD) {
- nesadapter->et_use_adaptive_rx_coalesce = 0;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
- nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
- } else {
- nesadapter->et_use_adaptive_rx_coalesce = 1;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
- nesadapter->et_rx_coalesce_usecs_irq = 0;
- printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __func__);
- }
- /* Setup and enable the periodic timer */
- if (nesadapter->et_rx_coalesce_usecs_irq)
- nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 |
- ((u32)(nesadapter->et_rx_coalesce_usecs_irq * 8)));
- else
- nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000);
-
- nesadapter->base_pd = 1;
-
- nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
- IB_DEVICE_MEM_WINDOW |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
-
- nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
- [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
- nesadapter->allocated_cqs = &nesadapter->allocated_qps[BITS_TO_LONGS(max_qp)];
- nesadapter->allocated_mrs = &nesadapter->allocated_cqs[BITS_TO_LONGS(max_cq)];
- nesadapter->allocated_pds = &nesadapter->allocated_mrs[BITS_TO_LONGS(max_mr)];
- nesadapter->allocated_arps = &nesadapter->allocated_pds[BITS_TO_LONGS(num_pds)];
- nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
-
-
- /* mark the usual suspect QPs, MR and CQs as in use */
- for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
- set_bit(u32temp, nesadapter->allocated_qps);
- set_bit(u32temp, nesadapter->allocated_cqs);
- }
- set_bit(0, nesadapter->allocated_mrs);
-
- for (u32temp = 0; u32temp < 20; u32temp++)
- set_bit(u32temp, nesadapter->allocated_pds);
- u32temp = nes_read_indexed(nesdev, NES_IDX_QP_MAX_CFG_SIZES);
-
- max_rq_wrs = ((u32temp >> 8) & 3);
- switch (max_rq_wrs) {
- case 0:
- max_rq_wrs = 4;
- break;
- case 1:
- max_rq_wrs = 16;
- break;
- case 2:
- max_rq_wrs = 32;
- break;
- case 3:
- max_rq_wrs = 512;
- break;
- }
-
- max_sq_wrs = (u32temp & 3);
- switch (max_sq_wrs) {
- case 0:
- max_sq_wrs = 4;
- break;
- case 1:
- max_sq_wrs = 16;
- break;
- case 2:
- max_sq_wrs = 32;
- break;
- case 3:
- max_sq_wrs = 512;
- break;
- }
- nesadapter->max_qp_wr = min(max_rq_wrs, max_sq_wrs);
- nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
-
- nesadapter->max_sge = 4;
- nesadapter->max_cqe = 32766;
-
- if (nes_read_eeprom_values(nesdev, nesadapter)) {
- printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
- kfree(nesadapter);
- return NULL;
- }
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG);
- nes_write_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG,
- (u32temp & 0xff000000) | (nesadapter->tcp_timer_core_clk_divisor & 0x00ffffff));
-
- /* setup port configuration */
- if (nesadapter->port_count == 1) {
- nesadapter->log_port = 0x00000000;
- if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT)
- nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002);
- else
- nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
- } else {
- if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
- nesadapter->log_port = 0x000000D8;
- } else {
- if (nesadapter->port_count == 2)
- nesadapter->log_port = 0x00000044;
- else
- nesadapter->log_port = 0x000000e4;
- }
- nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
- }
-
- nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT,
- nesadapter->log_port);
- nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n",
- nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT));
-
- spin_lock_init(&nesadapter->resource_lock);
- spin_lock_init(&nesadapter->phy_lock);
- spin_lock_init(&nesadapter->pbl_lock);
- spin_lock_init(&nesadapter->periodic_timer_lock);
-
- INIT_LIST_HEAD(&nesadapter->nesvnic_list[0]);
- INIT_LIST_HEAD(&nesadapter->nesvnic_list[1]);
- INIT_LIST_HEAD(&nesadapter->nesvnic_list[2]);
- INIT_LIST_HEAD(&nesadapter->nesvnic_list[3]);
-
- if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) {
- u32 pcs_control_status0, pcs_control_status1;
- u32 reset_value;
- u32 i = 0;
- u32 int_cnt = 0;
- u32 ext_cnt = 0;
- unsigned long flags;
- u32 j = 0;
-
- pcs_control_status0 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
-
- for (i = 0; i < NES_MAX_LINK_CHECK; i++) {
- pcs_control_status0 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
- if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
- || (0x0F000100 == (pcs_control_status1 & 0x0F000100)))
- int_cnt++;
- usleep_range(1000, 2000);
- }
- if (int_cnt > 1) {
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
- mh_detected++;
- reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
- reset_value |= 0x0000003d;
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
-
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (j++ < 5000));
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-
- pcs_control_status0 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
-
- for (i = 0; i < NES_MAX_LINK_CHECK; i++) {
- pcs_control_status0 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
- if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
- || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) {
- if (++ext_cnt > int_cnt) {
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1,
- 0x0000F088);
- mh_detected++;
- reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
- reset_value |= 0x0000003d;
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
-
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (j++ < 5000));
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- break;
- }
- }
- usleep_range(1000, 2000);
- }
- }
- }
-
- if (nesadapter->hw_rev == NE020_REV) {
- timer_setup(&nesadapter->mh_timer, nes_mh_fix, 0);
- nesadapter->mh_timer.expires = jiffies + (HZ/5); /* 1 second */
- add_timer(&nesadapter->mh_timer);
- } else {
- nes_write32(nesdev->regs+NES_INTF_INT_STAT, 0x0f000000);
- }
-
- timer_setup(&nesadapter->lc_timer, nes_clc, 0);
- nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */
- add_timer(&nesadapter->lc_timer);
-
- list_add_tail(&nesadapter->list, &nes_adapter_list);
-
- for (func_index = 0; func_index < 8; func_index++) {
- pci_bus_read_config_word(nesdev->pcidev->bus,
- PCI_DEVFN(PCI_SLOT(nesdev->pcidev->devfn),
- func_index), 0, &vendor_id);
- if (vendor_id == 0xffff)
- break;
- }
- nes_debug(NES_DBG_INIT, "%s %d functions found for %s.\n", __func__,
- func_index, pci_name(nesdev->pcidev));
- nesadapter->adapter_fcn_count = func_index;
-
- return nesadapter;
-}
-
-
-/**
- * nes_reset_adapter_ne020
- */
-static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode)
-{
- u32 port_count;
- u32 u32temp;
- u32 i;
-
- u32temp = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
- port_count = ((u32temp & 0x00000300) >> 8) + 1;
- /* TODO: assuming that both SERDES are set the same for now */
- *OneG_Mode = (u32temp & 0x00003c00) ? 0 : 1;
- nes_debug(NES_DBG_INIT, "Initial Software Reset = 0x%08X, port_count=%u\n",
- u32temp, port_count);
- if (*OneG_Mode)
- nes_debug(NES_DBG_INIT, "Running in 1G mode.\n");
- u32temp &= 0xff00ffc0;
- switch (port_count) {
- case 1:
- u32temp |= 0x00ee0000;
- break;
- case 2:
- u32temp |= 0x00cc0000;
- break;
- case 4:
- u32temp |= 0x00000000;
- break;
- default:
- return 0;
- break;
- }
-
- /* check and do full reset if needed */
- if (nes_read_indexed(nesdev, NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))) {
- nes_debug(NES_DBG_INIT, "Issuing Full Soft reset = 0x%08X\n", u32temp | 0xd);
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
-
- i = 0;
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
- mdelay(1);
- if (i > 10000) {
- nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n");
- return 0;
- }
-
- i = 0;
- while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000)
- mdelay(1);
- if (i > 10000) {
- printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n",
- nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS));
- return 0;
- }
- }
-
- /* port reset */
- switch (port_count) {
- case 1:
- u32temp |= 0x00ee0010;
- break;
- case 2:
- u32temp |= 0x00cc0030;
- break;
- case 4:
- u32temp |= 0x00000030;
- break;
- }
-
- nes_debug(NES_DBG_INIT, "Issuing Port Soft reset = 0x%08X\n", u32temp | 0xd);
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
-
- i = 0;
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
- mdelay(1);
- if (i > 10000) {
- nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n");
- return 0;
- }
-
- /* serdes 0 */
- i = 0;
- while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
- & 0x0000000f)) != 0x0000000f) && i++ < 5000)
- mdelay(1);
- if (i > 5000) {
- nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp);
- return 0;
- }
-
- /* serdes 1 */
- if (port_count > 1) {
- i = 0;
- while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
- & 0x0000000f)) != 0x0000000f) && i++ < 5000)
- mdelay(1);
- if (i > 5000) {
- nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp);
- return 0;
- }
- }
-
- return port_count;
-}
-
-
-/**
- * nes_init_serdes
- */
-static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
- struct nes_adapter *nesadapter, u8 OneG_Mode)
-{
- int i;
- u32 u32temp;
- u32 sds;
-
- if (hw_rev != NE020_REV) {
- /* init serdes 0 */
- switch (nesadapter->phy_type[0]) {
- case NES_PHY_TYPE_CX4:
- if (wide_ppm_offset)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA);
- else
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
- break;
- case NES_PHY_TYPE_KR:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
- break;
- case NES_PHY_TYPE_PUMA_1G:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
- sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
- sds |= 0x00000100;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds);
- break;
- default:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
- break;
- }
-
- if (!OneG_Mode)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
-
- if (port_count < 2)
- return 0;
-
- /* init serdes 1 */
- if (!(OneG_Mode && (nesadapter->phy_type[1] != NES_PHY_TYPE_PUMA_1G)))
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF);
-
- switch (nesadapter->phy_type[1]) {
- case NES_PHY_TYPE_ARGUS:
- case NES_PHY_TYPE_SFP_D:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
- break;
- case NES_PHY_TYPE_CX4:
- if (wide_ppm_offset)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA);
- break;
- case NES_PHY_TYPE_KR:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
- break;
- case NES_PHY_TYPE_PUMA_1G:
- sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
- sds |= 0x000000100;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds);
- }
- if (!OneG_Mode) {
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000);
- sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
- sds &= 0xFFFFFFBF;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds);
- }
- } else {
- /* init serdes 0 */
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
- i = 0;
- while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
- & 0x0000000f)) != 0x0000000f) && i++ < 5000)
- mdelay(1);
- if (i > 5000) {
- nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp);
- return 1;
- }
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000);
- if (OneG_Mode)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222);
- else
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222);
-
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff);
- if (port_count > 1) {
- /* init serdes 1 */
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x00000048);
- i = 0;
- while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
- & 0x0000000f)) != 0x0000000f) && (i++ < 5000))
- mdelay(1);
- if (i > 5000) {
- printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp);
- /* return 1; */
- }
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x000bdef7);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE1, 0x9ce73000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE1, 0x0ff00000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET1, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS1, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL1, 0xf0002222);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000ff);
- }
- }
- return 0;
-}
-
-
-/**
- * nes_init_csr_ne020
- * Initialize registers for ne020 hardware
- */
-static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count)
-{
- u32 u32temp;
-
- nes_debug(NES_DBG_INIT, "port_count=%d\n", port_count);
-
- nes_write_indexed(nesdev, 0x000001E4, 0x00000007);
- /* nes_write_indexed(nesdev, 0x000001E8, 0x000208C4); */
- nes_write_indexed(nesdev, 0x000001E8, 0x00020874);
- nes_write_indexed(nesdev, 0x000001D8, 0x00048002);
- /* nes_write_indexed(nesdev, 0x000001D8, 0x0004B002); */
- nes_write_indexed(nesdev, 0x000001FC, 0x00050005);
- nes_write_indexed(nesdev, 0x00000600, 0x55555555);
- nes_write_indexed(nesdev, 0x00000604, 0x55555555);
-
- /* TODO: move these MAC register settings to NIC bringup */
- nes_write_indexed(nesdev, 0x00002000, 0x00000001);
- nes_write_indexed(nesdev, 0x00002004, 0x00000001);
- nes_write_indexed(nesdev, 0x00002008, 0x0000FFFF);
- nes_write_indexed(nesdev, 0x0000200C, 0x00000001);
- nes_write_indexed(nesdev, 0x00002010, 0x000003c1);
- nes_write_indexed(nesdev, 0x0000201C, 0x75345678);
- if (port_count > 1) {
- nes_write_indexed(nesdev, 0x00002200, 0x00000001);
- nes_write_indexed(nesdev, 0x00002204, 0x00000001);
- nes_write_indexed(nesdev, 0x00002208, 0x0000FFFF);
- nes_write_indexed(nesdev, 0x0000220C, 0x00000001);
- nes_write_indexed(nesdev, 0x00002210, 0x000003c1);
- nes_write_indexed(nesdev, 0x0000221C, 0x75345678);
- nes_write_indexed(nesdev, 0x00000908, 0x20000001);
- }
- if (port_count > 2) {
- nes_write_indexed(nesdev, 0x00002400, 0x00000001);
- nes_write_indexed(nesdev, 0x00002404, 0x00000001);
- nes_write_indexed(nesdev, 0x00002408, 0x0000FFFF);
- nes_write_indexed(nesdev, 0x0000240C, 0x00000001);
- nes_write_indexed(nesdev, 0x00002410, 0x000003c1);
- nes_write_indexed(nesdev, 0x0000241C, 0x75345678);
- nes_write_indexed(nesdev, 0x00000910, 0x20000001);
-
- nes_write_indexed(nesdev, 0x00002600, 0x00000001);
- nes_write_indexed(nesdev, 0x00002604, 0x00000001);
- nes_write_indexed(nesdev, 0x00002608, 0x0000FFFF);
- nes_write_indexed(nesdev, 0x0000260C, 0x00000001);
- nes_write_indexed(nesdev, 0x00002610, 0x000003c1);
- nes_write_indexed(nesdev, 0x0000261C, 0x75345678);
- nes_write_indexed(nesdev, 0x00000918, 0x20000001);
- }
-
- nes_write_indexed(nesdev, 0x00005000, 0x00018000);
- /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */
- nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) |
- 0x00000001);
- nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F);
- nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F);
- nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F);
- nes_write_indexed(nesdev, 0x00005020, 0x1F1F1F1F);
- nes_write_indexed(nesdev, 0x00006090, 0xFFFFFFFF);
-
- /* TODO: move this to code, get from EEPROM */
- nes_write_indexed(nesdev, 0x00000900, 0x20000001);
- nes_write_indexed(nesdev, 0x000060C0, 0x0000028e);
- nes_write_indexed(nesdev, 0x000060C8, 0x00000020);
-
- nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0);
- /* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */
-
- if (hw_rev != NE020_REV) {
- u32temp = nes_read_indexed(nesdev, 0x000008e8);
- u32temp |= 0x80000000;
- nes_write_indexed(nesdev, 0x000008e8, u32temp);
- u32temp = nes_read_indexed(nesdev, 0x000021f8);
- u32temp &= 0x7fffffff;
- u32temp |= 0x7fff0010;
- nes_write_indexed(nesdev, 0x000021f8, u32temp);
- if (port_count > 1) {
- u32temp = nes_read_indexed(nesdev, 0x000023f8);
- u32temp &= 0x7fffffff;
- u32temp |= 0x7fff0010;
- nes_write_indexed(nesdev, 0x000023f8, u32temp);
- }
- }
-}
-
-
-/**
- * nes_destroy_adapter - destroy the adapter structure
- */
-void nes_destroy_adapter(struct nes_adapter *nesadapter)
-{
- struct nes_adapter *tmp_adapter;
-
- list_for_each_entry(tmp_adapter, &nes_adapter_list, list) {
- nes_debug(NES_DBG_SHUTDOWN, "Nes Adapter list entry = 0x%p.\n",
- tmp_adapter);
- }
-
- nesadapter->ref_count--;
- if (!nesadapter->ref_count) {
- if (nesadapter->hw_rev == NE020_REV) {
- del_timer(&nesadapter->mh_timer);
- }
- del_timer(&nesadapter->lc_timer);
-
- list_del(&nesadapter->list);
- kfree(nesadapter);
- }
-}
-
-
-/**
- * nes_init_cqp
- */
-int nes_init_cqp(struct nes_device *nesdev)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_cqp_qp_context *cqp_qp_context;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_hw_ceq *ceq;
- struct nes_hw_ceq *nic_ceq;
- struct nes_hw_aeq *aeq;
- void *vmem;
- dma_addr_t pmem;
- u32 count=0;
- u32 cqp_head;
- u64 u64temp;
- u32 u32temp;
-
- /* allocate CQP memory */
- /* Need to add max_cq to the aeq size once cq overflow checking is added back */
- /* SQ is 512 byte aligned, others are 256 byte aligned */
- nesdev->cqp_mem_size = 512 +
- (sizeof(struct nes_hw_cqp_wqe) * NES_CQP_SQ_SIZE) +
- (sizeof(struct nes_hw_cqe) * NES_CCQ_SIZE) +
- max(((u32)sizeof(struct nes_hw_ceqe) * NES_CCEQ_SIZE), (u32)256) +
- max(((u32)sizeof(struct nes_hw_ceqe) * NES_NIC_CEQ_SIZE), (u32)256) +
- (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) +
- sizeof(struct nes_hw_cqp_qp_context);
-
- nesdev->cqp_vbase = pci_zalloc_consistent(nesdev->pcidev,
- nesdev->cqp_mem_size,
- &nesdev->cqp_pbase);
- if (!nesdev->cqp_vbase) {
- nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n");
- return -ENOMEM;
- }
-
- /* Allocate a twice the number of CQP requests as the SQ size */
- nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
- 2 * NES_CQP_SQ_SIZE, GFP_KERNEL);
- if (!nesdev->nes_cqp_requests) {
- pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
- nesdev->cqp.sq_pbase);
- return -ENOMEM;
- }
-
- nes_debug(NES_DBG_INIT, "Allocated CQP structures at %p (phys = %016lX), size = %u.\n",
- nesdev->cqp_vbase, (unsigned long)nesdev->cqp_pbase, nesdev->cqp_mem_size);
-
- spin_lock_init(&nesdev->cqp.lock);
- init_waitqueue_head(&nesdev->cqp.waitq);
-
- /* Setup Various Structures */
- vmem = (void *)(((unsigned long)nesdev->cqp_vbase + (512 - 1)) &
- ~(unsigned long)(512 - 1));
- pmem = (dma_addr_t)(((unsigned long long)nesdev->cqp_pbase + (512 - 1)) &
- ~(unsigned long long)(512 - 1));
-
- nesdev->cqp.sq_vbase = vmem;
- nesdev->cqp.sq_pbase = pmem;
- nesdev->cqp.sq_size = NES_CQP_SQ_SIZE;
- nesdev->cqp.sq_head = 0;
- nesdev->cqp.sq_tail = 0;
- nesdev->cqp.qp_id = PCI_FUNC(nesdev->pcidev->devfn);
-
- vmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
- pmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
-
- nesdev->ccq.cq_vbase = vmem;
- nesdev->ccq.cq_pbase = pmem;
- nesdev->ccq.cq_size = NES_CCQ_SIZE;
- nesdev->ccq.cq_head = 0;
- nesdev->ccq.ce_handler = nes_cqp_ce_handler;
- nesdev->ccq.cq_number = PCI_FUNC(nesdev->pcidev->devfn);
-
- vmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
- pmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
-
- nesdev->ceq_index = PCI_FUNC(nesdev->pcidev->devfn);
- ceq = &nesadapter->ceq[nesdev->ceq_index];
- ceq->ceq_vbase = vmem;
- ceq->ceq_pbase = pmem;
- ceq->ceq_size = NES_CCEQ_SIZE;
- ceq->ceq_head = 0;
-
- vmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
- pmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
-
- nesdev->nic_ceq_index = PCI_FUNC(nesdev->pcidev->devfn) + 8;
- nic_ceq = &nesadapter->ceq[nesdev->nic_ceq_index];
- nic_ceq->ceq_vbase = vmem;
- nic_ceq->ceq_pbase = pmem;
- nic_ceq->ceq_size = NES_NIC_CEQ_SIZE;
- nic_ceq->ceq_head = 0;
-
- vmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
- pmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
-
- aeq = &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)];
- aeq->aeq_vbase = vmem;
- aeq->aeq_pbase = pmem;
- aeq->aeq_size = nesadapter->max_qp;
- aeq->aeq_head = 0;
-
- /* Setup QP Context */
- vmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
- pmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
-
- cqp_qp_context = vmem;
- cqp_qp_context->context_words[0] =
- cpu_to_le32((PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10));
- cqp_qp_context->context_words[1] = 0;
- cqp_qp_context->context_words[2] = cpu_to_le32((u32)nesdev->cqp.sq_pbase);
- cqp_qp_context->context_words[3] = cpu_to_le32(((u64)nesdev->cqp.sq_pbase) >> 32);
-
-
- /* Write the address to Create CQP */
- if ((sizeof(dma_addr_t) > 4)) {
- nes_write_indexed(nesdev,
- NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
- ((u64)pmem) >> 32);
- } else {
- nes_write_indexed(nesdev,
- NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), 0);
- }
- nes_write_indexed(nesdev,
- NES_IDX_CREATE_CQP_LOW + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
- (u32)pmem);
-
- INIT_LIST_HEAD(&nesdev->cqp_avail_reqs);
- INIT_LIST_HEAD(&nesdev->cqp_pending_reqs);
-
- for (count = 0; count < 2*NES_CQP_SQ_SIZE; count++) {
- init_waitqueue_head(&nesdev->nes_cqp_requests[count].waitq);
- list_add_tail(&nesdev->nes_cqp_requests[count].list, &nesdev->cqp_avail_reqs);
- }
-
- /* Write Create CCQ WQE */
- cqp_head = nesdev->cqp.sq_head++;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
- NES_CQP_CQ_CHK_OVERFLOW | ((u32)nesdev->ccq.cq_size << 16)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (nesdev->ccq.cq_number |
- ((u32)nesdev->ceq_index << 16)));
- u64temp = (u64)nesdev->ccq.cq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
- u64temp = (unsigned long)&nesdev->ccq;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] =
- cpu_to_le32((u32)(u64temp >> 1));
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
- cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
-
- /* Write Create CEQ WQE */
- cqp_head = nesdev->cqp.sq_head++;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_CREATE_CEQ + ((u32)nesdev->ceq_index << 8)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, ceq->ceq_size);
- u64temp = (u64)ceq->ceq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
-
- /* Write Create AEQ WQE */
- cqp_head = nesdev->cqp.sq_head++;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_CREATE_AEQ + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX, aeq->aeq_size);
- u64temp = (u64)aeq->aeq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
-
- /* Write Create NIC CEQ WQE */
- cqp_head = nesdev->cqp.sq_head++;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_CREATE_CEQ + ((u32)nesdev->nic_ceq_index << 8)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, nic_ceq->ceq_size);
- u64temp = (u64)nic_ceq->ceq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
-
- /* Poll until CCQP done */
- count = 0;
- do {
- if (count++ > 1000) {
- printk(KERN_ERR PFX "Error creating CQP\n");
- pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
- nesdev->cqp_vbase, nesdev->cqp_pbase);
- return -1;
- }
- udelay(10);
- } while (!(nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn) * 8)) & (1 << 8)));
-
- nes_debug(NES_DBG_INIT, "CQP Status = 0x%08X\n", nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
-
- u32temp = 0x04800000;
- nes_write32(nesdev->regs+NES_WQE_ALLOC, u32temp | nesdev->cqp.qp_id);
-
- /* wait for the CCQ, CEQ, and AEQ to get created */
- count = 0;
- do {
- if (count++ > 1000) {
- printk(KERN_ERR PFX "Error creating CCQ, CEQ, and AEQ\n");
- pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
- nesdev->cqp_vbase, nesdev->cqp_pbase);
- return -1;
- }
- udelay(10);
- } while (((nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8)));
-
- /* dump the QP status value */
- nes_debug(NES_DBG_INIT, "QP Status = 0x%08X\n", nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
-
- nesdev->cqp.sq_tail++;
-
- return 0;
-}
-
-
-/**
- * nes_destroy_cqp
- */
-int nes_destroy_cqp(struct nes_device *nesdev)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 count = 0;
- u32 cqp_head;
- unsigned long flags;
-
- do {
- if (count++ > 1000)
- break;
- udelay(10);
- } while (!(nesdev->cqp.sq_head == nesdev->cqp.sq_tail));
-
- /* Reset CCQ */
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_RESET |
- nesdev->ccq.cq_number);
-
- /* Disable device interrupts */
- nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
-
- /* Destroy the AEQ */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_AEQ |
- ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8));
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
-
- /* Destroy the NIC CEQ */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
- ((u32)nesdev->nic_ceq_index << 8));
-
- /* Destroy the CEQ */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
- (nesdev->ceq_index << 8));
-
- /* Destroy the CCQ */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number |
- ((u32)nesdev->ceq_index << 16));
-
- /* Destroy CQP */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP |
- NES_CQP_QP_TYPE_CQP);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->cqp.qp_id);
-
- barrier();
- /* Ring doorbell (5 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-
- /* wait for the CCQ, CEQ, and AEQ to get destroyed */
- count = 0;
- do {
- if (count++ > 1000) {
- printk(KERN_ERR PFX "Function%d: Error destroying CCQ, CEQ, and AEQ\n",
- PCI_FUNC(nesdev->pcidev->devfn));
- break;
- }
- udelay(10);
- } while (((nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15 << 8)) != 0));
-
- /* dump the QP status value */
- nes_debug(NES_DBG_SHUTDOWN, "Function%d: QP Status = 0x%08X\n",
- PCI_FUNC(nesdev->pcidev->devfn),
- nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
-
- kfree(nesdev->nes_cqp_requests);
-
- /* Free the control structures */
- pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
- nesdev->cqp.sq_pbase);
-
- return 0;
-}
-
-
-/**
- * nes_init_1g_phy
- */
-static int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
-{
- u32 counter = 0;
- u16 phy_data;
- int ret = 0;
-
- nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000);
-
- /* Reset the PHY */
- nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000);
- udelay(100);
- counter = 0;
- do {
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- if (counter++ > 100) {
- ret = -1;
- break;
- }
- } while (phy_data & 0x8000);
-
- /* Setting no phy loopback */
- phy_data &= 0xbfff;
- phy_data |= 0x1140;
- nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data);
- nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data);
-
- /* Setting the interrupt mask */
- nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee);
- nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
-
- /* turning on flow control */
- nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
- nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
-
- /* Clear Half duplex */
- nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100));
- nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
-
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300);
-
- return ret;
-}
-
-
-/**
- * nes_init_2025_phy
- */
-static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
-{
- u32 temp_phy_data = 0;
- u32 temp_phy_data2 = 0;
- u32 counter = 0;
- u32 sds;
- u32 mac_index = nesdev->mac_index;
- int ret = 0;
- unsigned int first_attempt = 1;
-
- /* Check firmware heartbeat */
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- udelay(1500);
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- if (temp_phy_data != temp_phy_data2) {
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if ((temp_phy_data & 0xff) > 0x20)
- return 0;
- printk(PFX "Reinitialize external PHY\n");
- }
-
- /* no heartbeat, configure the PHY */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
-
- switch (phy_type) {
- case NES_PHY_TYPE_ARGUS:
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
-
- /* setup LEDs */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
- break;
-
- case NES_PHY_TYPE_SFP_D:
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
-
- /* setup LEDs */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
- break;
-
- case NES_PHY_TYPE_KR:
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
-
- /* setup LEDs */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004);
-
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020);
- break;
- }
-
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528);
-
- /* Bring PHY out of reset */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002);
-
- /* Check for heartbeat */
- counter = 0;
- mdelay(690);
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- do {
- if (counter++ > 150) {
- printk(PFX "No PHY heartbeat\n");
- break;
- }
- mdelay(1);
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- } while (temp_phy_data2 == temp_phy_data);
-
- /* wait for tracking */
- counter = 0;
- do {
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if (counter++ > 300) {
- if (((temp_phy_data & 0xff) == 0x0) && first_attempt) {
- first_attempt = 0;
- counter = 0;
- /* reset AMCC PHY and try again */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
- continue;
- } else {
- ret = 1;
- break;
- }
- }
- mdelay(10);
- } while ((temp_phy_data & 0xff) < 0x30);
-
- /* setup signal integrity */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
- if (phy_type == NES_PHY_TYPE_KR) {
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C);
- } else {
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063);
- }
-
- /* reset serdes */
- sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200);
- sds |= 0x1;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
- sds &= 0xfffffffe;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
-
- counter = 0;
- while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
- && (counter++ < 5000))
- ;
-
- return ret;
-}
-
-
-/**
- * nes_init_phy
- */
-int nes_init_phy(struct nes_device *nesdev)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 mac_index = nesdev->mac_index;
- u32 tx_config = 0;
- unsigned long flags;
- u8 phy_type = nesadapter->phy_type[mac_index];
- u8 phy_index = nesadapter->phy_index[mac_index];
- int ret = 0;
-
- tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
- if (phy_type == NES_PHY_TYPE_1G) {
- /* setup 1G MDIO operation */
- tx_config &= 0xFFFFFFE3;
- tx_config |= 0x04;
- } else {
- /* setup 10G MDIO operation */
- tx_config &= 0xFFFFFFE3;
- tx_config |= 0x1D;
- }
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
-
- spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
-
- switch (phy_type) {
- case NES_PHY_TYPE_1G:
- ret = nes_init_1g_phy(nesdev, phy_type, phy_index);
- break;
- case NES_PHY_TYPE_ARGUS:
- case NES_PHY_TYPE_SFP_D:
- case NES_PHY_TYPE_KR:
- ret = nes_init_2025_phy(nesdev, phy_type, phy_index);
- break;
- }
-
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
-
- return ret;
-}
-
-
-/**
- * nes_replenish_nic_rq
- */
-static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
-{
- unsigned long flags;
- dma_addr_t bus_address;
- struct sk_buff *skb;
- struct nes_hw_nic_rq_wqe *nic_rqe;
- struct nes_hw_nic *nesnic;
- struct nes_device *nesdev;
- struct nes_rskb_cb *cb;
- u32 rx_wqes_posted = 0;
-
- nesnic = &nesvnic->nic;
- nesdev = nesvnic->nesdev;
- spin_lock_irqsave(&nesnic->rq_lock, flags);
- if (nesnic->replenishing_rq !=0) {
- if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) &&
- (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) {
- atomic_set(&nesvnic->rx_skb_timer_running, 1);
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */
- add_timer(&nesvnic->rq_wqes_timer);
- } else
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- return;
- }
- nesnic->replenishing_rq = 1;
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- do {
- skb = dev_alloc_skb(nesvnic->max_frame_size);
- if (skb) {
- skb->dev = nesvnic->netdev;
-
- bus_address = pci_map_single(nesdev->pcidev,
- skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->busaddr = bus_address;
- cb->maplen = nesvnic->max_frame_size;
-
- nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head];
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
- cpu_to_le32(nesvnic->max_frame_size);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] =
- cpu_to_le32((u32)bus_address);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] =
- cpu_to_le32((u32)((u64)bus_address >> 32));
- nesnic->rx_skb[nesnic->rq_head] = skb;
- nesnic->rq_head++;
- nesnic->rq_head &= nesnic->rq_size - 1;
- atomic_dec(&nesvnic->rx_skbs_needed);
- barrier();
- if (++rx_wqes_posted == 255) {
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
- rx_wqes_posted = 0;
- }
- } else {
- spin_lock_irqsave(&nesnic->rq_lock, flags);
- if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) &&
- (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) {
- atomic_set(&nesvnic->rx_skb_timer_running, 1);
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */
- add_timer(&nesvnic->rq_wqes_timer);
- } else
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- break;
- }
- } while (atomic_read(&nesvnic->rx_skbs_needed));
- barrier();
- if (rx_wqes_posted)
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
- nesnic->replenishing_rq = 0;
-}
-
-
-/**
- * nes_rq_wqes_timeout
- */
-static void nes_rq_wqes_timeout(struct timer_list *t)
-{
- struct nes_vnic *nesvnic = from_timer(nesvnic, t, rq_wqes_timer);
- printk("%s: Timer fired.\n", __func__);
- atomic_set(&nesvnic->rx_skb_timer_running, 0);
- if (atomic_read(&nesvnic->rx_skbs_needed))
- nes_replenish_nic_rq(nesvnic);
-}
-
-
-/**
- * nes_init_nic_qp
- */
-int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- struct nes_hw_nic_qp_context *nic_context;
- struct sk_buff *skb;
- struct nes_hw_nic_rq_wqe *nic_rqe;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- unsigned long flags;
- void *vmem;
- dma_addr_t pmem;
- u64 u64temp;
- int ret;
- u32 cqp_head;
- u32 counter;
- u32 wqe_count;
- struct nes_rskb_cb *cb;
- u8 jumbomode=0;
-
- /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */
- nesvnic->nic_mem_size = 256 +
- (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)) +
- (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)) +
- (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)) +
- (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) +
- sizeof(struct nes_hw_nic_qp_context);
-
- nesvnic->nic_vbase = pci_zalloc_consistent(nesdev->pcidev,
- nesvnic->nic_mem_size,
- &nesvnic->nic_pbase);
- if (!nesvnic->nic_vbase) {
- nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n");
- return -ENOMEM;
- }
- nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n",
- nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size);
-
- vmem = (void *)(((unsigned long)nesvnic->nic_vbase + (256 - 1)) &
- ~(unsigned long)(256 - 1));
- pmem = (dma_addr_t)(((unsigned long long)nesvnic->nic_pbase + (256 - 1)) &
- ~(unsigned long long)(256 - 1));
-
- /* Setup the first Fragment buffers */
- nesvnic->nic.first_frag_vbase = vmem;
-
- for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) {
- nesvnic->nic.frag_paddr[counter] = pmem;
- pmem += sizeof(struct nes_first_frag);
- }
-
- /* setup the SQ */
- vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag));
-
- nesvnic->nic.sq_vbase = (void *)vmem;
- nesvnic->nic.sq_pbase = pmem;
- nesvnic->nic.sq_head = 0;
- nesvnic->nic.sq_tail = 0;
- nesvnic->nic.sq_size = NES_NIC_WQ_SIZE;
- for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) {
- nic_sqe = &nesvnic->nic.sq_vbase[counter];
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_NIC_SQ_WQE_DISABLE_CHKSUM |
- NES_NIC_SQ_WQE_COMPLETION);
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX] =
- cpu_to_le32((u32)NES_FIRST_FRAG_SIZE << 16);
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX] =
- cpu_to_le32((u32)nesvnic->nic.frag_paddr[counter]);
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX] =
- cpu_to_le32((u32)((u64)nesvnic->nic.frag_paddr[counter] >> 32));
- }
-
- nesvnic->get_cqp_request = nes_get_cqp_request;
- nesvnic->post_cqp_request = nes_post_cqp_request;
- nesvnic->mcrq_mcast_filter = NULL;
-
- spin_lock_init(&nesvnic->nic.rq_lock);
-
- /* setup the RQ */
- vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe));
- pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe));
-
-
- nesvnic->nic.rq_vbase = vmem;
- nesvnic->nic.rq_pbase = pmem;
- nesvnic->nic.rq_head = 0;
- nesvnic->nic.rq_tail = 0;
- nesvnic->nic.rq_size = NES_NIC_WQ_SIZE;
-
- /* setup the CQ */
- vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe));
- pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe));
-
- if (nesdev->nesadapter->netdev_count > 2)
- nesvnic->mcrq_qp_id = nesvnic->nic_index + 32;
- else
- nesvnic->mcrq_qp_id = nesvnic->nic.qp_id + 4;
-
- nesvnic->nic_cq.cq_vbase = vmem;
- nesvnic->nic_cq.cq_pbase = pmem;
- nesvnic->nic_cq.cq_head = 0;
- nesvnic->nic_cq.cq_size = NES_NIC_WQ_SIZE * 2;
-
- nesvnic->nic_cq.ce_handler = nes_nic_napi_ce_handler;
-
- /* Send CreateCQ request to CQP */
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- cqp_head = nesdev->cqp.sq_head;
-
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
- NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
- ((u32)nesvnic->nic_cq.cq_size << 16));
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(
- nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16));
- u64temp = (u64)nesvnic->nic_cq.cq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
- u64temp = (unsigned long)&nesvnic->nic_cq;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1));
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
- cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- /* Send CreateQP request to CQP */
- nic_context = (void *)(&nesvnic->nic_cq.cq_vbase[nesvnic->nic_cq.cq_size]);
- nic_context->context_words[NES_NIC_CTX_MISC_IDX] =
- cpu_to_le32((u32)NES_NIC_CTX_SIZE |
- ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12));
- nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n",
- nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE),
- nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE));
- if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) {
- nic_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE);
- }
-
- u64temp = (u64)nesvnic->nic.sq_pbase;
- nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
- nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
- u64temp = (u64)nesvnic->nic.rq_pbase;
- nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
- nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
- NES_CQP_QP_TYPE_NIC);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic.qp_id);
- u64temp = (u64)nesvnic->nic_cq.cq_pbase +
- (nesvnic->nic_cq.cq_size * sizeof(struct nes_hw_nic_cqe));
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
- nesdev->cqp.sq_head = cqp_head;
-
- barrier();
-
- /* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- nes_debug(NES_DBG_INIT, "Waiting for create NIC QP%u to complete.\n",
- nesvnic->nic.qp_id);
-
- ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_INIT, "Create NIC QP%u completed, wait_event_timeout ret = %u.\n",
- nesvnic->nic.qp_id, ret);
- if (!ret) {
- nes_debug(NES_DBG_INIT, "NIC QP%u create timeout expired\n", nesvnic->nic.qp_id);
- pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase,
- nesvnic->nic_pbase);
- return -EIO;
- }
-
- /* Populate the RQ */
- for (counter = 0; counter < (NES_NIC_WQ_SIZE - 1); counter++) {
- skb = dev_alloc_skb(nesvnic->max_frame_size);
- if (!skb) {
- nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name);
-
- nes_destroy_nic_qp(nesvnic);
- return -ENOMEM;
- }
-
- skb->dev = netdev;
-
- pmem = pci_map_single(nesdev->pcidev, skb->data,
- nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->busaddr = pmem;
- cb->maplen = nesvnic->max_frame_size;
-
- nic_rqe = &nesvnic->nic.rq_vbase[counter];
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
- nesvnic->nic.rx_skb[counter] = skb;
- }
-
- wqe_count = NES_NIC_WQ_SIZE - 1;
- nesvnic->nic.rq_head = wqe_count;
- barrier();
- do {
- counter = min(wqe_count, ((u32)255));
- wqe_count -= counter;
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id);
- } while (wqe_count);
- timer_setup(&nesvnic->rq_wqes_timer, nes_rq_wqes_timeout, 0);
- nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
- if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
- {
- nes_nic_init_timer(nesdev);
- if (netdev->mtu > 1500)
- jumbomode = 1;
- nes_nic_init_timer_defaults(nesdev, jumbomode);
- }
- if ((nesdev->nesadapter->allow_unaligned_fpdus) &&
- (nes_init_mgt_qp(nesdev, netdev, nesvnic))) {
- nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n",
- netdev->name);
- nes_destroy_nic_qp(nesvnic);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-
-/**
- * nes_destroy_nic_qp
- */
-void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
-{
- u64 u64temp;
- dma_addr_t bus_address;
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- __le16 *wqe_fragment_length;
- u16 wqe_fragment_index;
- u32 cqp_head;
- u32 wqm_cfg0;
- unsigned long flags;
- struct sk_buff *rx_skb;
- struct nes_rskb_cb *cb;
- int ret;
-
- if (nesdev->nesadapter->allow_unaligned_fpdus)
- nes_destroy_mgt(nesvnic);
-
- /* clear wqe stall before destroying NIC QP */
- wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0);
- nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF);
-
- /* Free remaining NIC receive buffers */
- while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
- rx_skb = nesvnic->nic.rx_skb[nesvnic->nic.rq_tail];
- cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
- pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen,
- PCI_DMA_FROMDEVICE);
-
- dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]);
- nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1);
- }
-
- /* Free remaining NIC transmit buffers */
- while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) {
- nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail];
- wqe_fragment_index = 1;
- wqe_fragment_length = (__le16 *)
- &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
- /* bump past the vlan tag */
- wqe_fragment_length++;
- if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
- u64temp = (u64)le32_to_cpu(
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
- wqe_fragment_index*2]);
- u64temp += ((u64)le32_to_cpu(
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
- + wqe_fragment_index*2]))<<32;
- bus_address = (dma_addr_t)u64temp;
- if (test_and_clear_bit(nesvnic->nic.sq_tail,
- nesvnic->nic.first_frag_overflow)) {
- pci_unmap_single(nesdev->pcidev,
- bus_address,
- le16_to_cpu(wqe_fragment_length[
- wqe_fragment_index++]),
- PCI_DMA_TODEVICE);
- }
- for (; wqe_fragment_index < 5; wqe_fragment_index++) {
- if (wqe_fragment_length[wqe_fragment_index]) {
- u64temp = le32_to_cpu(
- nic_sqe->wqe_words[
- NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
- wqe_fragment_index*2]);
- u64temp += ((u64)le32_to_cpu(
- nic_sqe->wqe_words[
- NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+
- wqe_fragment_index*2]))<<32;
- bus_address = (dma_addr_t)u64temp;
- pci_unmap_page(nesdev->pcidev,
- bus_address,
- le16_to_cpu(
- wqe_fragment_length[
- wqe_fragment_index]),
- PCI_DMA_TODEVICE);
- } else
- break;
- }
- }
- if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail])
- dev_kfree_skb(
- nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]);
-
- nesvnic->nic.sq_tail = (nesvnic->nic.sq_tail + 1)
- & (nesvnic->nic.sq_size - 1);
- }
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
-
- /* Destroy NIC QP */
- cqp_head = nesdev->cqp.sq_head;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- nesvnic->nic.qp_id);
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
-
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
-
- /* Destroy NIC CQ */
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_DESTROY_CQ | ((u32)nesvnic->nic_cq.cq_size << 16)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16)));
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
-
- nesdev->cqp.sq_head = cqp_head;
- barrier();
-
- /* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
- " cqp.sq_tail=%u, cqp.sq_size=%u\n",
- cqp_head, nesdev->cqp.sq_head,
- nesdev->cqp.sq_tail, nesdev->cqp.sq_size);
-
- ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
- NES_EVENT_TIMEOUT);
-
- nes_debug(NES_DBG_SHUTDOWN, "Destroy NIC QP returned, wait_event_timeout ret = %u, cqp_head=%u,"
- " cqp.sq_head=%u, cqp.sq_tail=%u\n",
- ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
- if (!ret) {
- nes_debug(NES_DBG_SHUTDOWN, "NIC QP%u destroy timeout expired\n",
- nesvnic->nic.qp_id);
- }
-
- pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase,
- nesvnic->nic_pbase);
-
- /* restore old wqm_cfg0 value */
- nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0);
-}
-
-/**
- * nes_napi_isr
- */
-int nes_napi_isr(struct nes_device *nesdev)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 int_stat;
-
- if (nesdev->napi_isr_ran) {
- /* interrupt status has already been read in ISR */
- int_stat = nesdev->int_stat;
- } else {
- int_stat = nes_read32(nesdev->regs + NES_INT_STAT);
- nesdev->int_stat = int_stat;
- nesdev->napi_isr_ran = 1;
- }
-
- int_stat &= nesdev->int_req;
- /* iff NIC, process here, else wait for DPC */
- if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) {
- nesdev->napi_isr_ran = 0;
- nes_write32(nesdev->regs + NES_INT_STAT,
- (int_stat &
- ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
-
- /* Process the CEQs */
- nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]);
-
- if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) &&
- (!nesadapter->et_use_adaptive_rx_coalesce)) ||
- ((nesadapter->et_use_adaptive_rx_coalesce) &&
- (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) {
- if ((nesdev->int_req & NES_INT_TIMER) == 0) {
- /* Enable Periodic timer interrupts */
- nesdev->int_req |= NES_INT_TIMER;
- /* ack any pending periodic timer interrupts so we don't get an immediate interrupt */
- /* TODO: need to also ack other unused periodic timer values, get from nesadapter */
- nes_write32(nesdev->regs+NES_TIMER_STAT,
- nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
- nes_write32(nesdev->regs+NES_INTF_INT_MASK,
- ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER));
- }
-
- if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
- {
- nes_nic_init_timer(nesdev);
- }
- /* Enable interrupts, except CEQs */
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
- } else {
- /* Enable interrupts, make sure timer is off */
- nesdev->int_req &= ~NES_INT_TIMER;
- nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
- }
- nesdev->deepcq_count = 0;
- return 1;
- } else {
- return 0;
- }
-}
-
-static void process_critical_error(struct nes_device *nesdev)
-{
- u32 debug_error;
- u32 nes_idx_debug_error_masks0 = 0;
- u16 error_module = 0;
-
- debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS);
- printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n",
- (u16)debug_error);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS,
- 0x01010000 | (debug_error & 0x0000ffff));
- if (crit_err_count++ > 10)
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17);
- error_module = (u16) (debug_error & 0x1F00) >> 8;
- if (++nesdev->nesadapter->crit_error_count[error_module-1] >=
- nes_max_critical_error_count) {
- printk(KERN_ERR PFX "Masking off critical error for module "
- "0x%02X\n", (u16)error_module);
- nes_idx_debug_error_masks0 = nes_read_indexed(nesdev,
- NES_IDX_DEBUG_ERROR_MASKS0);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0,
- nes_idx_debug_error_masks0 | (1 << error_module));
- }
-}
-/**
- * nes_dpc
- */
-void nes_dpc(unsigned long param)
-{
- struct nes_device *nesdev = (struct nes_device *)param;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 counter;
- u32 loop_counter = 0;
- u32 int_status_bit;
- u32 int_stat;
- u32 timer_stat;
- u32 temp_int_stat;
- u32 intf_int_stat;
- u32 processed_intf_int = 0;
- u16 processed_timer_int = 0;
- u16 completion_ints = 0;
- u16 timer_ints = 0;
-
- /* nes_debug(NES_DBG_ISR, "\n"); */
-
- do {
- timer_stat = 0;
- if (nesdev->napi_isr_ran) {
- nesdev->napi_isr_ran = 0;
- int_stat = nesdev->int_stat;
- } else
- int_stat = nes_read32(nesdev->regs+NES_INT_STAT);
- if (processed_intf_int != 0)
- int_stat &= nesdev->int_req & ~NES_INT_INTF;
- else
- int_stat &= nesdev->int_req;
- if (processed_timer_int == 0) {
- processed_timer_int = 1;
- if (int_stat & NES_INT_TIMER) {
- timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT);
- if ((timer_stat & nesdev->timer_int_req) == 0) {
- int_stat &= ~NES_INT_TIMER;
- }
- }
- } else {
- int_stat &= ~NES_INT_TIMER;
- }
-
- if (int_stat) {
- if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
- NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) {
- /* Ack the interrupts */
- nes_write32(nesdev->regs+NES_INT_STAT,
- (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
- NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
- }
-
- temp_int_stat = int_stat;
- for (counter = 0, int_status_bit = 1; counter < 16; counter++) {
- if (int_stat & int_status_bit) {
- nes_process_ceq(nesdev, &nesadapter->ceq[counter]);
- temp_int_stat &= ~int_status_bit;
- completion_ints = 1;
- }
- if (!(temp_int_stat & 0x0000ffff))
- break;
- int_status_bit <<= 1;
- }
-
- /* Process the AEQ for this pci function */
- int_status_bit = 1 << (16 + PCI_FUNC(nesdev->pcidev->devfn));
- if (int_stat & int_status_bit) {
- nes_process_aeq(nesdev, &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]);
- }
-
- /* Process the MAC interrupt for this pci function */
- int_status_bit = 1 << (24 + nesdev->mac_index);
- if (int_stat & int_status_bit) {
- nes_process_mac_intr(nesdev, nesdev->mac_index);
- }
-
- if (int_stat & NES_INT_TIMER) {
- if (timer_stat & nesdev->timer_int_req) {
- nes_write32(nesdev->regs + NES_TIMER_STAT,
- (timer_stat & nesdev->timer_int_req) |
- ~(nesdev->nesadapter->timer_int_req));
- timer_ints = 1;
- }
- }
-
- if (int_stat & NES_INT_INTF) {
- processed_intf_int = 1;
- intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
- intf_int_stat &= nesdev->intf_int_req;
- if (NES_INTF_INT_CRITERR & intf_int_stat) {
- process_critical_error(nesdev);
- }
- if (NES_INTF_INT_PCIERR & intf_int_stat) {
- printk(KERN_ERR PFX "PCI Error reported by device!!!\n");
- BUG();
- }
- if (NES_INTF_INT_AEQ_OFLOW & intf_int_stat) {
- printk(KERN_ERR PFX "AEQ Overflow reported by device!!!\n");
- BUG();
- }
- nes_write32(nesdev->regs+NES_INTF_INT_STAT, intf_int_stat);
- }
-
- if (int_stat & NES_INT_TSW) {
- }
- }
- /* Don't use the interface interrupt bit stay in loop */
- int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 |
- NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3;
- } while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS));
-
- if (timer_ints == 1) {
- if ((nesadapter->et_rx_coalesce_usecs_irq) || (nesadapter->et_use_adaptive_rx_coalesce)) {
- if (completion_ints == 0) {
- nesdev->timer_only_int_count++;
- if (nesdev->timer_only_int_count>=nesadapter->timer_int_limit) {
- nesdev->timer_only_int_count = 0;
- nesdev->int_req &= ~NES_INT_TIMER;
- nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
- nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req);
- } else {
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
- }
- } else {
- if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
- {
- nes_nic_init_timer(nesdev);
- }
- nesdev->timer_only_int_count = 0;
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
- }
- } else {
- nesdev->timer_only_int_count = 0;
- nesdev->int_req &= ~NES_INT_TIMER;
- nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
- nes_write32(nesdev->regs+NES_TIMER_STAT,
- nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
- }
- } else {
- if ( (completion_ints == 1) &&
- (((nesadapter->et_rx_coalesce_usecs_irq) &&
- (!nesadapter->et_use_adaptive_rx_coalesce)) ||
- ((nesdev->deepcq_count > nesadapter->et_pkt_rate_low) &&
- (nesadapter->et_use_adaptive_rx_coalesce) )) ) {
- /* nes_debug(NES_DBG_ISR, "Enabling periodic timer interrupt.\n" ); */
- nesdev->timer_only_int_count = 0;
- nesdev->int_req |= NES_INT_TIMER;
- nes_write32(nesdev->regs+NES_TIMER_STAT,
- nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
- nes_write32(nesdev->regs+NES_INTF_INT_MASK,
- ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER));
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
- } else {
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
- }
- }
- nesdev->deepcq_count = 0;
-}
-
-
-/**
- * nes_process_ceq
- */
-static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
-{
- u64 u64temp;
- struct nes_hw_cq *cq;
- u32 head;
- u32 ceq_size;
-
- /* nes_debug(NES_DBG_CQ, "\n"); */
- head = ceq->ceq_head;
- ceq_size = ceq->ceq_size;
-
- do {
- if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) &
- NES_CEQE_VALID) {
- u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) |
- ((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX])));
- u64temp <<= 1;
- cq = *((struct nes_hw_cq **)&u64temp);
- /* nes_debug(NES_DBG_CQ, "pCQ = %p\n", cq); */
- barrier();
- ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX] = 0;
-
- /* call the event handler */
- cq->ce_handler(nesdev, cq);
-
- if (++head >= ceq_size)
- head = 0;
- } else {
- break;
- }
-
- } while (1);
-
- ceq->ceq_head = head;
-}
-
-
-/**
- * nes_process_aeq
- */
-static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
-{
- /* u64 u64temp; */
- u32 head;
- u32 aeq_size;
- u32 aeqe_misc;
- u32 aeqe_cq_id;
- struct nes_hw_aeqe volatile *aeqe;
-
- head = aeq->aeq_head;
- aeq_size = aeq->aeq_size;
-
- do {
- aeqe = &aeq->aeq_vbase[head];
- if ((le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]) & NES_AEQE_VALID) == 0)
- break;
- aeqe_misc = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
- if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) {
- if (aeqe_cq_id >= NES_FIRST_QPN) {
- /* dealing with an accelerated QP related AE */
- /*
- * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) |
- * ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
- */
- nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe);
- } else {
- /* TODO: dealing with a CQP related AE */
- nes_debug(NES_DBG_AEQ, "Processing CQP related AE, misc = 0x%04X\n",
- (u16)(aeqe_misc >> 16));
- }
- }
-
- aeqe->aeqe_words[NES_AEQE_MISC_IDX] = 0;
-
- if (++head >= aeq_size)
- head = 0;
-
- nes_write32(nesdev->regs + NES_AEQ_ALLOC, 1 << 16);
- }
- while (1);
- aeq->aeq_head = head;
-}
-
-static void nes_reset_link(struct nes_device *nesdev, u32 mac_index)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 reset_value;
- u32 i=0;
- u32 u32temp;
-
- if (nesadapter->hw_rev == NE020_REV) {
- return;
- }
- mh_detected++;
-
- reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
-
- if ((mac_index == 0) || ((mac_index == 1) && (nesadapter->OneG_Mode)))
- reset_value |= 0x0000001d;
- else
- reset_value |= 0x0000002d;
-
- if (4 <= (nesadapter->link_interrupt_count[mac_index] / ((u16)NES_MAX_LINK_INTERRUPTS))) {
- if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) {
- nesadapter->link_interrupt_count[0] = 0;
- nesadapter->link_interrupt_count[1] = 0;
- u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
- if (0x00000040 & u32temp)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
- else
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
-
- reset_value |= 0x0000003d;
- }
- nesadapter->link_interrupt_count[mac_index] = 0;
- }
-
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
-
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (i++ < 5000));
-
- if (0x0000003d == (reset_value & 0x0000003d)) {
- u32 pcs_control_status0, pcs_control_status1;
-
- for (i = 0; i < 10; i++) {
- pcs_control_status0 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
- if (((0x0F000000 == (pcs_control_status0 & 0x0F000000))
- && (pcs_control_status0 & 0x00100000))
- || ((0x0F000000 == (pcs_control_status1 & 0x0F000000))
- && (pcs_control_status1 & 0x00100000)))
- continue;
- else
- break;
- }
- if (10 == i) {
- u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
- if (0x00000040 & u32temp)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
- else
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
-
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
-
- while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (i++ < 5000));
- }
- }
-}
-
-/**
- * nes_process_mac_intr
- */
-static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
-{
- unsigned long flags;
- u32 pcs_control_status;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_vnic *nesvnic;
- u32 mac_status;
- u32 mac_index = nesdev->mac_index;
- u32 u32temp;
- u16 phy_data;
- u16 temp_phy_data;
- u32 pcs_val = 0x0f0f0000;
- u32 pcs_mask = 0x0f1f0000;
- u32 cdr_ctrl;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) {
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- return;
- }
- nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_INTERRUPT;
-
- /* ack the MAC interrupt */
- mac_status = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200));
- /* Clear the interrupt */
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200), mac_status);
-
- nes_debug(NES_DBG_PHY, "MAC%u interrupt status = 0x%X.\n", mac_number, mac_status);
-
- if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) {
- nesdev->link_status_interrupts++;
- if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS)))
- nes_reset_link(nesdev, mac_index);
-
- /* read the PHY interrupt status register */
- if ((nesadapter->OneG_Mode) &&
- (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
- do {
- nes_read_1G_phy_reg(nesdev, 0x1a,
- nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1a = 0x%X.\n",
- nesadapter->phy_index[mac_index], phy_data);
- } while (phy_data&0x8000);
-
- temp_phy_data = 0;
- do {
- nes_read_1G_phy_reg(nesdev, 0x11,
- nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy%d data from register 0x11 = 0x%X.\n",
- nesadapter->phy_index[mac_index], phy_data);
- if (temp_phy_data == phy_data)
- break;
- temp_phy_data = phy_data;
- } while (1);
-
- nes_read_1G_phy_reg(nesdev, 0x1e,
- nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1e = 0x%X.\n",
- nesadapter->phy_index[mac_index], phy_data);
-
- nes_read_1G_phy_reg(nesdev, 1,
- nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "1G phy%u data from register 1 = 0x%X\n",
- nesadapter->phy_index[mac_index], phy_data);
-
- if (temp_phy_data & 0x1000) {
- nes_debug(NES_DBG_PHY, "The Link is up according to the PHY\n");
- phy_data = 4;
- } else {
- nes_debug(NES_DBG_PHY, "The Link is down according to the PHY\n");
- }
- }
- nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n",
- nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0),
- nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200));
-
- if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) {
- switch (mac_index) {
- case 1:
- case 3:
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
- break;
- default:
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- break;
- }
- } else {
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
- }
-
- nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n",
- mac_index, pcs_control_status);
- if ((nesadapter->OneG_Mode) &&
- (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
- u32temp = 0x01010000;
- if (nesadapter->port_count > 2) {
- u32temp |= 0x02020000;
- }
- if ((pcs_control_status & u32temp)!= u32temp) {
- phy_data = 0;
- nes_debug(NES_DBG_PHY, "PCS says the link is down\n");
- }
- } else {
- switch (nesadapter->phy_type[mac_index]) {
- case NES_PHY_TYPE_ARGUS:
- case NES_PHY_TYPE_SFP_D:
- case NES_PHY_TYPE_KR:
- /* clear the alarms */
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005);
- /* check link status */
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
- nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
- phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
-
- nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
- __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
- break;
-
- case NES_PHY_TYPE_PUMA_1G:
- if (mac_index < 2)
- pcs_val = pcs_mask = 0x01010000;
- else
- pcs_val = pcs_mask = 0x02020000;
- /* fall through */
- default:
- phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0;
- break;
- }
- }
-
- if (phy_data & 0x0004) {
- if (wide_ppm_offset &&
- (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) &&
- (nesadapter->hw_rev != NE020_REV)) {
- cdr_ctrl = nes_read_indexed(nesdev,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 +
- mac_index * 0x200);
- nes_write_indexed(nesdev,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 +
- mac_index * 0x200,
- cdr_ctrl | 0x000F0000);
- }
- nesadapter->mac_link_down[mac_index] = 0;
- list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
- nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n",
- nesvnic->linkup);
- if (nesvnic->linkup == 0) {
- printk(PFX "The Link is now up for port %s, netdev %p.\n",
- nesvnic->netdev->name, nesvnic->netdev);
- if (netif_queue_stopped(nesvnic->netdev))
- netif_start_queue(nesvnic->netdev);
- nesvnic->linkup = 1;
- netif_carrier_on(nesvnic->netdev);
-
- spin_lock(&nesvnic->port_ibevent_lock);
- if (nesvnic->of_device_registered) {
- if (nesdev->iw_status == 0) {
- nesdev->iw_status = 1;
- nes_port_ibevent(nesvnic);
- }
- }
- spin_unlock(&nesvnic->port_ibevent_lock);
- }
- }
- } else {
- if (wide_ppm_offset &&
- (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) &&
- (nesadapter->hw_rev != NE020_REV)) {
- cdr_ctrl = nes_read_indexed(nesdev,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 +
- mac_index * 0x200);
- nes_write_indexed(nesdev,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 +
- mac_index * 0x200,
- cdr_ctrl & 0xFFF0FFFF);
- }
- nesadapter->mac_link_down[mac_index] = 1;
- list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
- nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n",
- nesvnic->linkup);
- if (nesvnic->linkup == 1) {
- printk(PFX "The Link is now down for port %s, netdev %p.\n",
- nesvnic->netdev->name, nesvnic->netdev);
- if (!(netif_queue_stopped(nesvnic->netdev)))
- netif_stop_queue(nesvnic->netdev);
- nesvnic->linkup = 0;
- netif_carrier_off(nesvnic->netdev);
-
- spin_lock(&nesvnic->port_ibevent_lock);
- if (nesvnic->of_device_registered) {
- if (nesdev->iw_status == 1) {
- nesdev->iw_status = 0;
- nes_port_ibevent(nesvnic);
- }
- }
- spin_unlock(&nesvnic->port_ibevent_lock);
- }
- }
- }
- if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) {
- nesdev->link_recheck = 1;
- mod_delayed_work(system_wq, &nesdev->work,
- NES_LINK_RECHECK_DELAY);
- }
- }
-
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-
- nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE;
-}
-
-void nes_recheck_link_status(struct work_struct *work)
-{
- unsigned long flags;
- struct nes_device *nesdev = container_of(work, struct nes_device, work.work);
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_vnic *nesvnic;
- u32 mac_index = nesdev->mac_index;
- u16 phy_data;
- u16 temp_phy_data;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
-
- /* check link status */
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
- nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
- phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
-
- nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
- __func__, phy_data,
- nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
-
- if (phy_data & 0x0004) {
- nesadapter->mac_link_down[mac_index] = 0;
- list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
- if (nesvnic->linkup == 0) {
- printk(PFX "The Link is now up for port %s, netdev %p.\n",
- nesvnic->netdev->name, nesvnic->netdev);
- if (netif_queue_stopped(nesvnic->netdev))
- netif_start_queue(nesvnic->netdev);
- nesvnic->linkup = 1;
- netif_carrier_on(nesvnic->netdev);
-
- spin_lock(&nesvnic->port_ibevent_lock);
- if (nesvnic->of_device_registered) {
- if (nesdev->iw_status == 0) {
- nesdev->iw_status = 1;
- nes_port_ibevent(nesvnic);
- }
- }
- spin_unlock(&nesvnic->port_ibevent_lock);
- }
- }
-
- } else {
- nesadapter->mac_link_down[mac_index] = 1;
- list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
- if (nesvnic->linkup == 1) {
- printk(PFX "The Link is now down for port %s, netdev %p.\n",
- nesvnic->netdev->name, nesvnic->netdev);
- if (!(netif_queue_stopped(nesvnic->netdev)))
- netif_stop_queue(nesvnic->netdev);
- nesvnic->linkup = 0;
- netif_carrier_off(nesvnic->netdev);
-
- spin_lock(&nesvnic->port_ibevent_lock);
- if (nesvnic->of_device_registered) {
- if (nesdev->iw_status == 1) {
- nesdev->iw_status = 0;
- nes_port_ibevent(nesvnic);
- }
- }
- spin_unlock(&nesvnic->port_ibevent_lock);
- }
- }
- }
- if (nesdev->link_recheck++ < NES_LINK_RECHECK_MAX)
- schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY);
- else
- nesdev->link_recheck = 0;
-
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-}
-
-
-static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
-{
- struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
-
- napi_schedule(&nesvnic->napi);
-}
-
-
-/* The MAX_RQES_TO_PROCESS defines how many max read requests to complete before
-* getting out of nic_ce_handler
-*/
-#define MAX_RQES_TO_PROCESS 384
-
-/**
- * nes_nic_ce_handler
- */
-void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
-{
- u64 u64temp;
- dma_addr_t bus_address;
- struct nes_hw_nic *nesnic;
- struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_nic_rq_wqe *nic_rqe;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- struct sk_buff *skb;
- struct sk_buff *rx_skb;
- struct nes_rskb_cb *cb;
- __le16 *wqe_fragment_length;
- u32 head;
- u32 cq_size;
- u32 rx_pkt_size;
- u32 cqe_count=0;
- u32 cqe_errv;
- u32 cqe_misc;
- u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */
- u16 vlan_tag;
- u16 pkt_type;
- u16 rqes_processed = 0;
- u8 sq_cqes = 0;
-
- head = cq->cq_head;
- cq_size = cq->cq_size;
- cq->cqes_pending = 1;
- do {
- if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
- NES_NIC_CQE_VALID) {
- nesnic = &nesvnic->nic;
- cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]);
- if (cqe_misc & NES_NIC_CQE_SQ) {
- sq_cqes++;
- wqe_fragment_index = 1;
- nic_sqe = &nesnic->sq_vbase[nesnic->sq_tail];
- skb = nesnic->tx_skb[nesnic->sq_tail];
- wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
- /* bump past the vlan tag */
- wqe_fragment_length++;
- if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
- u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
- wqe_fragment_index * 2]);
- u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX +
- wqe_fragment_index * 2])) << 32;
- bus_address = (dma_addr_t)u64temp;
- if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) {
- pci_unmap_single(nesdev->pcidev,
- bus_address,
- le16_to_cpu(wqe_fragment_length[wqe_fragment_index++]),
- PCI_DMA_TODEVICE);
- }
- for (; wqe_fragment_index < 5; wqe_fragment_index++) {
- if (wqe_fragment_length[wqe_fragment_index]) {
- u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
- wqe_fragment_index * 2]);
- u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
- + wqe_fragment_index * 2])) <<32;
- bus_address = (dma_addr_t)u64temp;
- pci_unmap_page(nesdev->pcidev,
- bus_address,
- le16_to_cpu(wqe_fragment_length[wqe_fragment_index]),
- PCI_DMA_TODEVICE);
- } else
- break;
- }
- }
- if (skb)
- dev_kfree_skb_any(skb);
- nesnic->sq_tail++;
- nesnic->sq_tail &= nesnic->sq_size-1;
- if (sq_cqes > 128) {
- barrier();
- /* restart the queue if it had been stopped */
- if (netif_queue_stopped(nesvnic->netdev))
- netif_wake_queue(nesvnic->netdev);
- sq_cqes = 0;
- }
- } else {
- rqes_processed ++;
-
- cq->rx_cqes_completed++;
- cq->rx_pkts_indicated++;
- rx_pkt_size = cqe_misc & 0x0000ffff;
- nic_rqe = &nesnic->rq_vbase[nesnic->rq_tail];
- /* Get the skb */
- rx_skb = nesnic->rx_skb[nesnic->rq_tail];
- nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_tail];
- bus_address = (dma_addr_t)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
- bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
- pci_unmap_single(nesdev->pcidev, bus_address,
- nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
- cb->busaddr = 0;
- /* rx_skb->tail = rx_skb->data + rx_pkt_size; */
- /* rx_skb->len = rx_pkt_size; */
- rx_skb->len = 0; /* TODO: see if this is necessary */
- skb_put(rx_skb, rx_pkt_size);
- rx_skb->protocol = eth_type_trans(rx_skb, nesvnic->netdev);
- nesnic->rq_tail++;
- nesnic->rq_tail &= nesnic->rq_size - 1;
-
- atomic_inc(&nesvnic->rx_skbs_needed);
- if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) {
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- cq->cq_number | (cqe_count << 16));
- /* nesadapter->tune_timer.cq_count += cqe_count; */
- nesdev->currcq_count += cqe_count;
- cqe_count = 0;
- nes_replenish_nic_rq(nesvnic);
- }
- pkt_type = (u16)(le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]));
- cqe_errv = (cqe_misc & NES_NIC_CQE_ERRV_MASK) >> NES_NIC_CQE_ERRV_SHIFT;
- rx_skb->ip_summed = CHECKSUM_NONE;
-
- if ((NES_PKT_TYPE_TCPV4_BITS == (pkt_type & NES_PKT_TYPE_TCPV4_MASK)) ||
- (NES_PKT_TYPE_UDPV4_BITS == (pkt_type & NES_PKT_TYPE_UDPV4_MASK))) {
- if ((cqe_errv &
- (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR |
- NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
- if (nesvnic->netdev->features & NETIF_F_RXCSUM)
- rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
- } else
- nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet."
- " errv = 0x%X, pkt_type = 0x%X.\n",
- nesvnic->netdev->name, cqe_errv, pkt_type);
-
- } else if ((pkt_type & NES_PKT_TYPE_IPV4_MASK) == NES_PKT_TYPE_IPV4_BITS) {
- if ((cqe_errv &
- (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR |
- NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
- if (nesvnic->netdev->features & NETIF_F_RXCSUM) {
- rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
- /* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n",
- nesvnic->netdev->name); */
- }
- } else
- nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet."
- " errv = 0x%X, pkt_type = 0x%X.\n",
- nesvnic->netdev->name, cqe_errv, pkt_type);
- }
- /* nes_debug(NES_DBG_CQ, "pkt_type=%x, APBVT_MASK=%x\n",
- pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */
-
- if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) {
- if (nes_cm_recv(rx_skb, nesvnic->netdev))
- rx_skb = NULL;
- }
- if (rx_skb == NULL)
- goto skip_rx_indicate0;
-
-
- if (cqe_misc & NES_NIC_CQE_TAG_VALID) {
- vlan_tag = (u16)(le32_to_cpu(
- cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
- >> 16);
- nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
- nesvnic->netdev->name, vlan_tag);
-
- __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag);
- }
- napi_gro_receive(&nesvnic->napi, rx_skb);
-
-skip_rx_indicate0:
- ;
- /* nesvnic->netstats.rx_packets++; */
- /* nesvnic->netstats.rx_bytes += rx_pkt_size; */
- }
-
- cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0;
- /* Accounting... */
- cqe_count++;
- if (++head >= cq_size)
- head = 0;
- if (cqe_count == 255) {
- /* Replenish Nic CQ */
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- cq->cq_number | (cqe_count << 16));
- /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
- nesdev->currcq_count += cqe_count;
- cqe_count = 0;
- }
-
- if (cq->rx_cqes_completed >= nesvnic->budget)
- break;
- } else {
- cq->cqes_pending = 0;
- break;
- }
-
- } while (1);
-
- if (sq_cqes) {
- barrier();
- /* restart the queue if it had been stopped */
- if (netif_queue_stopped(nesvnic->netdev))
- netif_wake_queue(nesvnic->netdev);
- }
- cq->cq_head = head;
- /* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n",
- cq->cq_number, cqe_count, cq->cq_head); */
- cq->cqe_allocs_pending = cqe_count;
- if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
- {
- /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
- nesdev->currcq_count += cqe_count;
- nes_nic_tune_timer(nesdev);
- }
- if (atomic_read(&nesvnic->rx_skbs_needed))
- nes_replenish_nic_rq(nesvnic);
-}
-
-
-
-/**
- * nes_cqp_ce_handler
- */
-static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
-{
- u64 u64temp;
- unsigned long flags;
- struct nes_hw_cqp *cqp = NULL;
- struct nes_cqp_request *cqp_request;
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 head;
- u32 cq_size;
- u32 cqe_count=0;
- u32 error_code;
- u32 opcode;
- u32 ctx_index;
- /* u32 counter; */
-
- head = cq->cq_head;
- cq_size = cq->cq_size;
-
- do {
- /* process the CQE */
- /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head,
- le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */
-
- opcode = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]);
- if (opcode & NES_CQE_VALID) {
- cqp = &nesdev->cqp;
-
- error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]);
- if (error_code) {
- nes_debug(NES_DBG_CQP, "Bad Completion code for opcode 0x%02X from CQP,"
- " Major/Minor codes = 0x%04X:%04X.\n",
- le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f,
- (u16)(error_code >> 16),
- (u16)error_code);
- }
-
- u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
- cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
- ((u64)(le32_to_cpu(cq->cq_vbase[head].
- cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
-
- cqp_request = (struct nes_cqp_request *)(unsigned long)u64temp;
- if (cqp_request) {
- if (cqp_request->waiting) {
- /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */
- cqp_request->major_code = (u16)(error_code >> 16);
- cqp_request->minor_code = (u16)error_code;
- barrier();
- cqp_request->request_done = 1;
- wake_up(&cqp_request->waitq);
- nes_put_cqp_request(nesdev, cqp_request);
- } else {
- if (cqp_request->callback)
- cqp_request->cqp_callback(nesdev, cqp_request);
- nes_free_cqp_request(nesdev, cqp_request);
- }
- } else {
- wake_up(&nesdev->cqp.waitq);
- }
-
- cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
- nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16));
- if (++cqp->sq_tail >= cqp->sq_size)
- cqp->sq_tail = 0;
-
- /* Accounting... */
- cqe_count++;
- if (++head >= cq_size)
- head = 0;
- } else {
- break;
- }
- } while (1);
- cq->cq_head = head;
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- while ((!list_empty(&nesdev->cqp_pending_reqs)) &&
- ((((nesdev->cqp.sq_tail+nesdev->cqp.sq_size)-nesdev->cqp.sq_head) &
- (nesdev->cqp.sq_size - 1)) != 1)) {
- cqp_request = list_entry(nesdev->cqp_pending_reqs.next,
- struct nes_cqp_request, list);
- list_del_init(&cqp_request->list);
- head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[head];
- memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
- barrier();
-
- opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]);
- if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
- ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
- else
- ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
- cqp_wqe->wqe_words[ctx_index] =
- cpu_to_le32((u32)((unsigned long)cqp_request));
- cqp_wqe->wqe_words[ctx_index + 1] =
- cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request)));
- nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n",
- cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head);
- /* Ring doorbell (1 WQEs) */
- barrier();
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
- }
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-
- /* Arm the CCQ */
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- cq->cq_number);
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
-}
-
-static u8 *locate_mpa(u8 *pkt, u32 aeq_info)
-{
- if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) {
- /* skip over ethernet header */
- pkt += ETH_HLEN;
-
- /* Skip over IP and TCP headers */
- pkt += 4 * (pkt[0] & 0x0f);
- pkt += 4 * ((pkt[12] >> 4) & 0x0f);
- }
- return pkt;
-}
-
-/* Determine if incoming error pkt is rdma layer */
-static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info)
-{
- u8 *pkt;
- u16 *mpa;
- u32 opcode = 0xffffffff;
-
- if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
- pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
- mpa = (u16 *)locate_mpa(pkt, aeq_info);
- opcode = be16_to_cpu(mpa[1]) & 0xf;
- }
-
- return opcode;
-}
-
-/* Build iWARP terminate header */
-static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info)
-{
- u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
- u16 ddp_seg_len;
- int copy_len = 0;
- u8 is_tagged = 0;
- u8 flush_code = 0;
- struct nes_terminate_hdr *termhdr;
-
- termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase;
- memset(termhdr, 0, 64);
-
- if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
-
- /* Use data from offending packet to fill in ddp & rdma hdrs */
- pkt = locate_mpa(pkt, aeq_info);
- ddp_seg_len = be16_to_cpu(*(u16 *)pkt);
- if (ddp_seg_len) {
- copy_len = 2;
- termhdr->hdrct = DDP_LEN_FLAG;
- if (pkt[2] & 0x80) {
- is_tagged = 1;
- if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
- copy_len += TERM_DDP_LEN_TAGGED;
- termhdr->hdrct |= DDP_HDR_FLAG;
- }
- } else {
- if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
- copy_len += TERM_DDP_LEN_UNTAGGED;
- termhdr->hdrct |= DDP_HDR_FLAG;
- }
-
- if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
- if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
- copy_len += TERM_RDMA_LEN;
- termhdr->hdrct |= RDMA_HDR_FLAG;
- }
- }
- }
- }
- }
-
- switch (async_event_id) {
- case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
- switch (iwarp_opcode(nesqp, aeq_info)) {
- case IWARP_OPCODE_WRITE:
- flush_code = IB_WC_LOC_PROT_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
- termhdr->error_code = DDP_TAGGED_INV_STAG;
- break;
- default:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_INV_STAG;
- }
- break;
- case NES_AEQE_AEID_AMP_INVALID_STAG:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_INV_STAG;
- break;
- case NES_AEQE_AEID_AMP_BAD_QP:
- flush_code = IB_WC_LOC_QP_OP_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_QN;
- break;
- case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
- case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
- switch (iwarp_opcode(nesqp, aeq_info)) {
- case IWARP_OPCODE_SEND_INV:
- case IWARP_OPCODE_SEND_SE_INV:
- flush_code = IB_WC_REM_OP_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
- termhdr->error_code = RDMAP_CANT_INV_STAG;
- break;
- default:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_INV_STAG;
- }
- break;
- case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
- if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) {
- flush_code = IB_WC_LOC_PROT_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
- termhdr->error_code = DDP_TAGGED_BOUNDS;
- } else {
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_INV_BOUNDS;
- }
- break;
- case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
- case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
- case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_ACCESS;
- break;
- case NES_AEQE_AEID_AMP_TO_WRAP:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_TO_WRAP;
- break;
- case NES_AEQE_AEID_AMP_BAD_PD:
- switch (iwarp_opcode(nesqp, aeq_info)) {
- case IWARP_OPCODE_WRITE:
- flush_code = IB_WC_LOC_PROT_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
- termhdr->error_code = DDP_TAGGED_UNASSOC_STAG;
- break;
- case IWARP_OPCODE_SEND_INV:
- case IWARP_OPCODE_SEND_SE_INV:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_CANT_INV_STAG;
- break;
- default:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_UNASSOC_STAG;
- }
- break;
- case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
- flush_code = IB_WC_LOC_LEN_ERR;
- termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
- termhdr->error_code = MPA_MARKER;
- break;
- case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
- termhdr->error_code = MPA_CRC;
- break;
- case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
- case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
- flush_code = IB_WC_LOC_LEN_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
- termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
- break;
- case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
- case NES_AEQE_AEID_DDP_NO_L_BIT:
- flush_code = IB_WC_FATAL_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
- termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
- break;
- case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
- case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE;
- break;
- case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- flush_code = IB_WC_LOC_LEN_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG;
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
- flush_code = IB_WC_GENERAL_ERR;
- if (is_tagged) {
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
- termhdr->error_code = DDP_TAGGED_INV_DDP_VER;
- } else {
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER;
- }
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_MO;
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
- flush_code = IB_WC_REM_OP_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF;
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_QN;
- break;
- case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
- termhdr->error_code = RDMAP_INV_RDMAP_VER;
- break;
- case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
- flush_code = IB_WC_LOC_QP_OP_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
- termhdr->error_code = RDMAP_UNEXPECTED_OP;
- break;
- default:
- flush_code = IB_WC_FATAL_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
- termhdr->error_code = RDMAP_UNSPECIFIED;
- break;
- }
-
- if (copy_len)
- memcpy(termhdr + 1, pkt, copy_len);
-
- if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) {
- if (aeq_info & NES_AEQE_SQ)
- nesqp->term_sq_flush_code = flush_code;
- else
- nesqp->term_rq_flush_code = flush_code;
- }
-
- return sizeof(struct nes_terminate_hdr) + copy_len;
-}
-
-static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp,
- struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype)
-{
- u64 context;
- unsigned long flags;
- u32 aeq_info;
- u16 async_event_id;
- u8 tcp_state;
- u8 iwarp_state;
- u32 termlen = 0;
- u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE |
- NES_CQP_QP_TERM_DONT_SEND_FIN;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- if (nesqp->term_flags & NES_TERM_SENT)
- return; /* Sanity check */
-
- aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
- iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
- async_event_id = (u16)aeq_info;
-
- context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
- if (!context) {
- WARN_ON(!context);
- return;
- }
-
- nesqp = (struct nes_qp *)(unsigned long)context;
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- nesqp->terminate_eventtype = eventtype;
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- if (nesadapter->send_term_ok)
- termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info);
- else
- mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG;
-
- if (!nesdev->iw_status) {
- nesqp->term_flags = NES_TERM_DONE;
- nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_ERROR, 0, 0);
- nes_cm_disconn(nesqp);
- } else {
- nes_terminate_start_timer(nesqp);
- nesqp->term_flags |= NES_TERM_SENT;
- nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0);
- }
-}
-
-static void nes_terminate_send_fin(struct nes_device *nesdev,
- struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
-{
- u32 aeq_info;
- u16 async_event_id;
- u8 tcp_state;
- u8 iwarp_state;
- unsigned long flags;
-
- aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
- iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
- async_event_id = (u16)aeq_info;
-
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- /* Send the fin only */
- nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE |
- NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0);
-}
-
-/* Cleanup after a terminate sent or received */
-static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred)
-{
- u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
- unsigned long flags;
- struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device);
- struct nes_device *nesdev = nesvnic->nesdev;
- u8 first_time = 0;
-
- spin_lock_irqsave(&nesqp->lock, flags);
- if (nesqp->hte_added) {
- nesqp->hte_added = 0;
- next_iwarp_state |= NES_CQP_QP_DEL_HTE;
- }
-
- first_time = (nesqp->term_flags & NES_TERM_DONE) == 0;
- nesqp->term_flags |= NES_TERM_DONE;
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- /* Make sure we go through this only once */
- if (first_time) {
- if (timeout_occurred == 0)
- del_timer(&nesqp->terminate_timer);
- else
- next_iwarp_state |= NES_CQP_QP_RESET;
-
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
- nes_cm_disconn(nesqp);
- }
-}
-
-static void nes_terminate_received(struct nes_device *nesdev,
- struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
-{
- u32 aeq_info;
- u8 *pkt;
- u32 *mpa;
- u8 ddp_ctl;
- u8 rdma_ctl;
- u16 aeq_id = 0;
-
- aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
- /* Terminate is not a performance path so the silicon */
- /* did not validate the frame - do it now */
- pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
- mpa = (u32 *)locate_mpa(pkt, aeq_info);
- ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff;
- rdma_ctl = be32_to_cpu(mpa[0]) & 0xff;
- if ((ddp_ctl & 0xc0) != 0x40)
- aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC;
- else if ((ddp_ctl & 0x03) != 1)
- aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION;
- else if (be32_to_cpu(mpa[2]) != 2)
- aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN;
- else if (be32_to_cpu(mpa[3]) != 1)
- aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN;
- else if (be32_to_cpu(mpa[4]) != 0)
- aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO;
- else if ((rdma_ctl & 0xc0) != 0x40)
- aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION;
-
- if (aeq_id) {
- /* Bad terminate recvd - send back a terminate */
- aeq_info = (aeq_info & 0xffff0000) | aeq_id;
- aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
- nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
- return;
- }
- }
-
- nesqp->term_flags |= NES_TERM_RCVD;
- nesqp->terminate_eventtype = IB_EVENT_QP_FATAL;
- nes_terminate_start_timer(nesqp);
- nes_terminate_send_fin(nesdev, nesqp, aeqe);
-}
-
-/* Timeout routine in case terminate fails to complete */
-void nes_terminate_timeout(struct timer_list *t)
-{
- struct nes_qp *nesqp = from_timer(nesqp, t, terminate_timer);
-
- nes_terminate_done(nesqp, 1);
-}
-
-/* Set a timer in case hw cannot complete the terminate sequence */
-static void nes_terminate_start_timer(struct nes_qp *nesqp)
-{
- mod_timer(&nesqp->terminate_timer, (jiffies + HZ));
-}
-
-/**
- * nes_process_iwarp_aeqe
- */
-static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
- struct nes_hw_aeqe *aeqe)
-{
- u64 context;
- unsigned long flags;
- struct nes_qp *nesqp;
- struct nes_hw_cq *hw_cq;
- struct nes_cq *nescq;
- int resource_allocated;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 aeq_info;
- u32 next_iwarp_state = 0;
- u32 aeqe_cq_id;
- u16 async_event_id;
- u8 tcp_state;
- u8 iwarp_state;
- struct ib_event ibevent;
-
- nes_debug(NES_DBG_AEQ, "\n");
- aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) {
- context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
- context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
- } else {
- context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
- BUG_ON(!context);
- }
-
- /* context is nesqp unless async_event_id == CQ ERROR */
- nesqp = (struct nes_qp *)(unsigned long)context;
- async_event_id = (u16)aeq_info;
- tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
- iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
- nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p,"
- " Tcp state = %s, iWARP state = %s\n",
- async_event_id,
- le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
- nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
-
- aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
- if (aeq_info & NES_AEQE_QP) {
- if (!nes_is_resource_allocated(nesadapter,
- nesadapter->allocated_qps,
- aeqe_cq_id))
- return;
- }
-
- switch (async_event_id) {
- case NES_AEQE_AEID_LLP_FIN_RECEIVED:
- if (nesqp->term_flags)
- return; /* Ignore it, wait for close complete */
-
- if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
- if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) &&
- (nesqp->ibqp_state == IB_QPS_RTS)) {
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
- nes_cm_disconn(nesqp);
- }
- nesqp->cm_id->add_ref(nesqp->cm_id);
- schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
- NES_TIMER_TYPE_CLOSE, 1, 0);
- nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d),"
- " need ae to finish up, original_last_aeq = 0x%04X."
- " last_aeq = 0x%04X, scheduling timer. TCP state = %d\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- async_event_id, nesqp->last_aeq, tcp_state);
- }
- break;
- case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_cm_disconn(nesqp);
- break;
-
- case NES_AEQE_AEID_RESET_SENT:
- tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- nesqp->hte_added = 0;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
- nes_cm_disconn(nesqp);
- break;
-
- case NES_AEQE_AEID_LLP_CONNECTION_RESET:
- if (atomic_read(&nesqp->close_timer_started))
- return;
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_cm_disconn(nesqp);
- break;
-
- case NES_AEQE_AEID_TERMINATE_SENT:
- nes_terminate_send_fin(nesdev, nesqp, aeqe);
- break;
-
- case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
- nes_terminate_received(nesdev, nesqp, aeqe);
- break;
-
- case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
- case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
- case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
- case NES_AEQE_AEID_AMP_INVALID_STAG:
- case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
- case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
- case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
- case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
- case NES_AEQE_AEID_AMP_TO_WRAP:
- printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n",
- nesqp->hwqp.qp_id, async_event_id);
- nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
- break;
-
- case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
- case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
- case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
- case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
- if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
- aeq_info &= 0xffff0000;
- aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
- aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
- }
- /* fall through */
- case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
- case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
- case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
- case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
- case NES_AEQE_AEID_AMP_BAD_QP:
- case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
- case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
- case NES_AEQE_AEID_DDP_NO_L_BIT:
- case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
- case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
- case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
- case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
- case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
- case NES_AEQE_AEID_AMP_BAD_PD:
- case NES_AEQE_AEID_AMP_FASTREG_SHARED:
- case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG:
- case NES_AEQE_AEID_AMP_FASTREG_MW_STAG:
- case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS:
- case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW:
- case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH:
- case NES_AEQE_AEID_AMP_INVALIDATE_SHARED:
- case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS:
- case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG:
- case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG:
- case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG:
- case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG:
- case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS:
- case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS:
- case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT:
- case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED:
- case NES_AEQE_AEID_BAD_CLOSE:
- case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO:
- case NES_AEQE_AEID_STAG_ZERO_INVALID:
- case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
- case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
- printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n",
- nesqp->hwqp.qp_id, async_event_id);
- print_ip(nesqp->cm_node);
- if (!atomic_read(&nesqp->close_timer_started))
- nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
- break;
-
- case NES_AEQE_AEID_CQ_OPERATION_ERROR:
- context <<= 1;
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n",
- le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), (void *)(unsigned long)context);
- resource_allocated = nes_is_resource_allocated(nesadapter, nesadapter->allocated_cqs,
- le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
- if (resource_allocated) {
- printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
- __func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
- hw_cq = (struct nes_hw_cq *)(unsigned long)context;
- if (hw_cq) {
- nescq = container_of(hw_cq, struct nes_cq, hw_cq);
- if (nescq->ibcq.event_handler) {
- ibevent.device = nescq->ibcq.device;
- ibevent.event = IB_EVENT_CQ_ERR;
- ibevent.element.cq = &nescq->ibcq;
- nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context);
- }
- }
- }
- break;
-
- default:
- nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
- async_event_id);
- break;
- }
-
-}
-
-/**
- * nes_iwarp_ce_handler
- */
-void nes_iwarp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *hw_cq)
-{
- struct nes_cq *nescq = container_of(hw_cq, struct nes_cq, hw_cq);
-
- /* nes_debug(NES_DBG_CQ, "Processing completion event for iWARP CQ%u.\n",
- nescq->hw_cq.cq_number); */
- nes_write32(nesdev->regs+NES_CQ_ACK, nescq->hw_cq.cq_number);
-
- if (nescq->ibcq.comp_handler)
- nescq->ibcq.comp_handler(&nescq->ibcq, nescq->ibcq.cq_context);
-
- return;
-}
-
-
-/**
- * nes_manage_apbvt()
- */
-int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
- u32 nic_index, u32 add_port)
-{
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- int ret = 0;
- u16 major_code;
-
- /* Send manage APBVT request to CQP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- nes_debug(NES_DBG_QP, "%s APBV for local port=%u(0x%04x), nic_index=%u\n",
- (add_port == NES_MANAGE_APBVT_ADD) ? "ADD" : "DEL",
- accel_local_port, accel_local_port, nic_index);
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, (NES_CQP_MANAGE_APBVT |
- ((add_port == NES_MANAGE_APBVT_ADD) ? NES_CQP_APBVT_ADD : 0)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- ((nic_index << NES_CQP_APBVT_NIC_SHIFT) | accel_local_port));
-
- nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n");
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- if (add_port == NES_MANAGE_APBVT_ADD)
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n",
- ret, cqp_request->major_code, cqp_request->minor_code);
- major_code = cqp_request->major_code;
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (!ret)
- return -ETIME;
- else if (major_code)
- return -EIO;
- else
- return 0;
-}
-
-
-/**
- * nes_manage_arp_cache
- */
-void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
- u32 ip_addr, u32 action)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev;
- struct nes_cqp_request *cqp_request;
- int arp_index;
-
- nesdev = nesvnic->nesdev;
- arp_index = nes_arp_table(nesdev, ip_addr, mac_addr, action);
- if (arp_index == -1) {
- return;
- }
-
- /* update the ARP entry */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_NETDEV, "Failed to get a cqp_request.\n");
- return;
- }
- cqp_request->waiting = 0;
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
- NES_CQP_MANAGE_ARP_CACHE | NES_CQP_ARP_PERM);
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_CQP_ARP_AEQ_INDEX_SHIFT);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(arp_index);
-
- if (action == NES_ARP_ADD) {
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID);
- cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32(
- (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) |
- (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]);
- cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32(
- (((u32)mac_addr[0]) << 8) | (u32)mac_addr[1]);
- } else {
- cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0;
- }
-
- nes_debug(NES_DBG_NETDEV, "Not waiting for CQP, cqp.sq_head=%u, cqp.sq_tail=%u\n",
- nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
-
- atomic_set(&cqp_request->refcount, 1);
- nes_post_cqp_request(nesdev, cqp_request);
-}
-
-
-/**
- * flush_wqes
- */
-void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
- u32 which_wq, u32 wait_completion)
-{
- struct nes_cqp_request *cqp_request;
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
- u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
- int ret;
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
- return;
- }
- if (wait_completion) {
- cqp_request->waiting = 1;
- atomic_set(&cqp_request->refcount, 2);
- } else {
- cqp_request->waiting = 0;
- }
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- /* If wqe in error was identified, set code to be put into cqe */
- if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) {
- which_wq |= NES_CQP_FLUSH_MAJ_MIN;
- sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code;
- nesqp->term_sq_flush_code = 0;
- }
-
- if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) {
- which_wq |= NES_CQP_FLUSH_MAJ_MIN;
- rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code;
- nesqp->term_rq_flush_code = 0;
- }
-
- if (which_wq & NES_CQP_FLUSH_MAJ_MIN) {
- cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code);
- cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code);
- }
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
- cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
-
- nes_post_cqp_request(nesdev, cqp_request);
-
- if (wait_completion) {
- /* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X\n",
- ret, cqp_request->major_code, cqp_request->minor_code);
- nes_put_cqp_request(nesdev, cqp_request);
- }
-}
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
deleted file mode 100644
index 3c56470816a8..000000000000
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ /dev/null
@@ -1,1380 +0,0 @@
-/*
-* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenIB.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
-
-#ifndef __NES_HW_H
-#define __NES_HW_H
-
-#define NES_PHY_TYPE_CX4 1
-#define NES_PHY_TYPE_1G 2
-#define NES_PHY_TYPE_ARGUS 4
-#define NES_PHY_TYPE_PUMA_1G 5
-#define NES_PHY_TYPE_PUMA_10G 6
-#define NES_PHY_TYPE_GLADIUS 7
-#define NES_PHY_TYPE_SFP_D 8
-#define NES_PHY_TYPE_KR 9
-
-#define NES_MULTICAST_PF_MAX 8
-#define NES_A0 3
-
-#define NES_ENABLE_PAU 0x07000001
-#define NES_DISABLE_PAU 0x07000000
-#define NES_PAU_COUNTER 10
-#define NES_CQP_OPCODE_MASK 0x3f
-
-enum pci_regs {
- NES_INT_STAT = 0x0000,
- NES_INT_MASK = 0x0004,
- NES_INT_PENDING = 0x0008,
- NES_INTF_INT_STAT = 0x000C,
- NES_INTF_INT_MASK = 0x0010,
- NES_TIMER_STAT = 0x0014,
- NES_PERIODIC_CONTROL = 0x0018,
- NES_ONE_SHOT_CONTROL = 0x001C,
- NES_EEPROM_COMMAND = 0x0020,
- NES_EEPROM_DATA = 0x0024,
- NES_FLASH_COMMAND = 0x0028,
- NES_FLASH_DATA = 0x002C,
- NES_SOFTWARE_RESET = 0x0030,
- NES_CQ_ACK = 0x0034,
- NES_WQE_ALLOC = 0x0040,
- NES_CQE_ALLOC = 0x0044,
- NES_AEQ_ALLOC = 0x0048
-};
-
-enum indexed_regs {
- NES_IDX_CREATE_CQP_LOW = 0x0000,
- NES_IDX_CREATE_CQP_HIGH = 0x0004,
- NES_IDX_QP_CONTROL = 0x0040,
- NES_IDX_FLM_CONTROL = 0x0080,
- NES_IDX_INT_CPU_STATUS = 0x00a0,
- NES_IDX_GPR_TRIGGER = 0x00bc,
- NES_IDX_GPIO_CONTROL = 0x00f0,
- NES_IDX_GPIO_DATA = 0x00f4,
- NES_IDX_GPR2 = 0x010c,
- NES_IDX_TCP_CONFIG0 = 0x01e4,
- NES_IDX_TCP_TIMER_CONFIG = 0x01ec,
- NES_IDX_TCP_NOW = 0x01f0,
- NES_IDX_QP_MAX_CFG_SIZES = 0x0200,
- NES_IDX_QP_CTX_SIZE = 0x0218,
- NES_IDX_TCP_TIMER_SIZE0 = 0x0238,
- NES_IDX_TCP_TIMER_SIZE1 = 0x0240,
- NES_IDX_ARP_CACHE_SIZE = 0x0258,
- NES_IDX_CQ_CTX_SIZE = 0x0260,
- NES_IDX_MRT_SIZE = 0x0278,
- NES_IDX_PBL_REGION_SIZE = 0x0280,
- NES_IDX_IRRQ_COUNT = 0x02b0,
- NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x02f0,
- NES_IDX_RX_WINDOW_BUFFER_SIZE = 0x0300,
- NES_IDX_DST_IP_ADDR = 0x0400,
- NES_IDX_PCIX_DIAG = 0x08e8,
- NES_IDX_MPP_DEBUG = 0x0a00,
- NES_IDX_PORT_RX_DISCARDS = 0x0a30,
- NES_IDX_PORT_TX_DISCARDS = 0x0a34,
- NES_IDX_MPP_LB_DEBUG = 0x0b00,
- NES_IDX_DENALI_CTL_22 = 0x1058,
- NES_IDX_MAC_TX_CONTROL = 0x2000,
- NES_IDX_MAC_TX_CONFIG = 0x2004,
- NES_IDX_MAC_TX_PAUSE_QUANTA = 0x2008,
- NES_IDX_MAC_RX_CONTROL = 0x200c,
- NES_IDX_MAC_RX_CONFIG = 0x2010,
- NES_IDX_MAC_EXACT_MATCH_BOTTOM = 0x201c,
- NES_IDX_MAC_MDIO_CONTROL = 0x2084,
- NES_IDX_MAC_TX_OCTETS_LOW = 0x2100,
- NES_IDX_MAC_TX_OCTETS_HIGH = 0x2104,
- NES_IDX_MAC_TX_FRAMES_LOW = 0x2108,
- NES_IDX_MAC_TX_FRAMES_HIGH = 0x210c,
- NES_IDX_MAC_TX_PAUSE_FRAMES = 0x2118,
- NES_IDX_MAC_TX_ERRORS = 0x2138,
- NES_IDX_MAC_RX_OCTETS_LOW = 0x213c,
- NES_IDX_MAC_RX_OCTETS_HIGH = 0x2140,
- NES_IDX_MAC_RX_FRAMES_LOW = 0x2144,
- NES_IDX_MAC_RX_FRAMES_HIGH = 0x2148,
- NES_IDX_MAC_RX_BC_FRAMES_LOW = 0x214c,
- NES_IDX_MAC_RX_MC_FRAMES_HIGH = 0x2150,
- NES_IDX_MAC_RX_PAUSE_FRAMES = 0x2154,
- NES_IDX_MAC_RX_SHORT_FRAMES = 0x2174,
- NES_IDX_MAC_RX_OVERSIZED_FRAMES = 0x2178,
- NES_IDX_MAC_RX_JABBER_FRAMES = 0x217c,
- NES_IDX_MAC_RX_CRC_ERR_FRAMES = 0x2180,
- NES_IDX_MAC_RX_LENGTH_ERR_FRAMES = 0x2184,
- NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES = 0x2188,
- NES_IDX_MAC_INT_STATUS = 0x21f0,
- NES_IDX_MAC_INT_MASK = 0x21f4,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 = 0x2800,
- NES_IDX_PHY_PCS_CONTROL_STATUS1 = 0x2a00,
- NES_IDX_ETH_SERDES_COMMON_CONTROL0 = 0x2808,
- NES_IDX_ETH_SERDES_COMMON_CONTROL1 = 0x2a08,
- NES_IDX_ETH_SERDES_COMMON_STATUS0 = 0x280c,
- NES_IDX_ETH_SERDES_COMMON_STATUS1 = 0x2a0c,
- NES_IDX_ETH_SERDES_TX_EMP0 = 0x2810,
- NES_IDX_ETH_SERDES_TX_EMP1 = 0x2a10,
- NES_IDX_ETH_SERDES_TX_DRIVE0 = 0x2814,
- NES_IDX_ETH_SERDES_TX_DRIVE1 = 0x2a14,
- NES_IDX_ETH_SERDES_RX_MODE0 = 0x2818,
- NES_IDX_ETH_SERDES_RX_MODE1 = 0x2a18,
- NES_IDX_ETH_SERDES_RX_SIGDET0 = 0x281c,
- NES_IDX_ETH_SERDES_RX_SIGDET1 = 0x2a1c,
- NES_IDX_ETH_SERDES_BYPASS0 = 0x2820,
- NES_IDX_ETH_SERDES_BYPASS1 = 0x2a20,
- NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0 = 0x2824,
- NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1 = 0x2a24,
- NES_IDX_ETH_SERDES_RX_EQ_CONTROL0 = 0x2828,
- NES_IDX_ETH_SERDES_RX_EQ_CONTROL1 = 0x2a28,
- NES_IDX_ETH_SERDES_RX_EQ_STATUS0 = 0x282c,
- NES_IDX_ETH_SERDES_RX_EQ_STATUS1 = 0x2a2c,
- NES_IDX_ETH_SERDES_CDR_RESET0 = 0x2830,
- NES_IDX_ETH_SERDES_CDR_RESET1 = 0x2a30,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 = 0x2834,
- NES_IDX_ETH_SERDES_CDR_CONTROL1 = 0x2a34,
- NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0 = 0x2838,
- NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1 = 0x2a38,
- NES_IDX_ENDNODE0_NSTAT_RX_DISCARD = 0x3080,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO = 0x3000,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI = 0x3004,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO = 0x3008,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI = 0x300c,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO = 0x7000,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c,
- NES_IDX_WQM_CONFIG0 = 0x5000,
- NES_IDX_WQM_CONFIG1 = 0x5004,
- NES_IDX_CM_CONFIG = 0x5100,
- NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000,
- NES_IDX_NIC_PHYPORT_TO_USW = 0x6008,
- NES_IDX_NIC_ACTIVE = 0x6010,
- NES_IDX_NIC_UNICAST_ALL = 0x6018,
- NES_IDX_NIC_MULTICAST_ALL = 0x6020,
- NES_IDX_NIC_MULTICAST_ENABLE = 0x6028,
- NES_IDX_NIC_BROADCAST_ON = 0x6030,
- NES_IDX_USED_CHUNKS_TX = 0x60b0,
- NES_IDX_TX_POOL_SIZE = 0x60b8,
- NES_IDX_QUAD_HASH_TABLE_SIZE = 0x6148,
- NES_IDX_PERFECT_FILTER_LOW = 0x6200,
- NES_IDX_PERFECT_FILTER_HIGH = 0x6204,
- NES_IDX_IPV4_TCP_REXMITS = 0x7080,
- NES_IDX_DEBUG_ERROR_CONTROL_STATUS = 0x913c,
- NES_IDX_DEBUG_ERROR_MASKS0 = 0x9140,
- NES_IDX_DEBUG_ERROR_MASKS1 = 0x9144,
- NES_IDX_DEBUG_ERROR_MASKS2 = 0x9148,
- NES_IDX_DEBUG_ERROR_MASKS3 = 0x914c,
- NES_IDX_DEBUG_ERROR_MASKS4 = 0x9150,
- NES_IDX_DEBUG_ERROR_MASKS5 = 0x9154,
-};
-
-#define NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE 1
-#define NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE (1 << 17)
-
-enum nes_cqp_opcodes {
- NES_CQP_CREATE_QP = 0x00,
- NES_CQP_MODIFY_QP = 0x01,
- NES_CQP_DESTROY_QP = 0x02,
- NES_CQP_CREATE_CQ = 0x03,
- NES_CQP_MODIFY_CQ = 0x04,
- NES_CQP_DESTROY_CQ = 0x05,
- NES_CQP_ALLOCATE_STAG = 0x09,
- NES_CQP_REGISTER_STAG = 0x0a,
- NES_CQP_QUERY_STAG = 0x0b,
- NES_CQP_REGISTER_SHARED_STAG = 0x0c,
- NES_CQP_DEALLOCATE_STAG = 0x0d,
- NES_CQP_MANAGE_ARP_CACHE = 0x0f,
- NES_CQP_DOWNLOAD_SEGMENT = 0x10,
- NES_CQP_SUSPEND_QPS = 0x11,
- NES_CQP_UPLOAD_CONTEXT = 0x13,
- NES_CQP_CREATE_CEQ = 0x16,
- NES_CQP_DESTROY_CEQ = 0x18,
- NES_CQP_CREATE_AEQ = 0x19,
- NES_CQP_DESTROY_AEQ = 0x1b,
- NES_CQP_LMI_ACCESS = 0x20,
- NES_CQP_FLUSH_WQES = 0x22,
- NES_CQP_MANAGE_APBVT = 0x23,
- NES_CQP_MANAGE_QUAD_HASH = 0x25
-};
-
-enum nes_cqp_wqe_word_idx {
- NES_CQP_WQE_OPCODE_IDX = 0,
- NES_CQP_WQE_ID_IDX = 1,
- NES_CQP_WQE_COMP_CTX_LOW_IDX = 2,
- NES_CQP_WQE_COMP_CTX_HIGH_IDX = 3,
- NES_CQP_WQE_COMP_SCRATCH_LOW_IDX = 4,
- NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5,
-};
-
-enum nes_cqp_wqe_word_download_idx { /* format differs from other cqp ops */
- NES_CQP_WQE_DL_OPCODE_IDX = 0,
- NES_CQP_WQE_DL_COMP_CTX_LOW_IDX = 1,
- NES_CQP_WQE_DL_COMP_CTX_HIGH_IDX = 2,
- NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX = 3
- /* For index values 4-15 use NES_NIC_SQ_WQE_ values */
-};
-
-enum nes_cqp_cq_wqeword_idx {
- NES_CQP_CQ_WQE_PBL_LOW_IDX = 6,
- NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7,
- NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX = 8,
- NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX = 9,
- NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX = 10,
-};
-
-enum nes_cqp_stag_wqeword_idx {
- NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX = 1,
- NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX = 6,
- NES_CQP_STAG_WQE_LEN_LOW_IDX = 7,
- NES_CQP_STAG_WQE_STAG_IDX = 8,
- NES_CQP_STAG_WQE_VA_LOW_IDX = 10,
- NES_CQP_STAG_WQE_VA_HIGH_IDX = 11,
- NES_CQP_STAG_WQE_PA_LOW_IDX = 12,
- NES_CQP_STAG_WQE_PA_HIGH_IDX = 13,
- NES_CQP_STAG_WQE_PBL_LEN_IDX = 14
-};
-
-#define NES_CQP_OP_LOGICAL_PORT_SHIFT 26
-#define NES_CQP_OP_IWARP_STATE_SHIFT 28
-#define NES_CQP_OP_TERMLEN_SHIFT 28
-
-enum nes_cqp_qp_bits {
- NES_CQP_QP_ARP_VALID = (1<<8),
- NES_CQP_QP_WINBUF_VALID = (1<<9),
- NES_CQP_QP_CONTEXT_VALID = (1<<10),
- NES_CQP_QP_ORD_VALID = (1<<11),
- NES_CQP_QP_WINBUF_DATAIND_EN = (1<<12),
- NES_CQP_QP_VIRT_WQS = (1<<13),
- NES_CQP_QP_DEL_HTE = (1<<14),
- NES_CQP_QP_CQS_VALID = (1<<15),
- NES_CQP_QP_TYPE_TSA = 0,
- NES_CQP_QP_TYPE_IWARP = (1<<16),
- NES_CQP_QP_TYPE_CQP = (4<<16),
- NES_CQP_QP_TYPE_NIC = (5<<16),
- NES_CQP_QP_MSS_CHG = (1<<20),
- NES_CQP_QP_STATIC_RESOURCES = (1<<21),
- NES_CQP_QP_IGNORE_MW_BOUND = (1<<22),
- NES_CQP_QP_VWQ_USE_LMI = (1<<23),
- NES_CQP_QP_IWARP_STATE_IDLE = (1<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_RTS = (2<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_CLOSING = (3<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_TERM_DONT_SEND_FIN = (1<<24),
- NES_CQP_QP_TERM_DONT_SEND_TERM_MSG = (1<<25),
- NES_CQP_QP_RESET = (1<<31),
-};
-
-enum nes_cqp_qp_wqe_word_idx {
- NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6,
- NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7,
- NES_CQP_QP_WQE_FLUSH_SQ_CODE = 8,
- NES_CQP_QP_WQE_FLUSH_RQ_CODE = 9,
- NES_CQP_QP_WQE_NEW_MSS_IDX = 15,
-};
-
-enum nes_nic_ctx_bits {
- NES_NIC_CTX_RQ_SIZE_32 = (3<<8),
- NES_NIC_CTX_RQ_SIZE_512 = (3<<8),
- NES_NIC_CTX_SQ_SIZE_32 = (1<<10),
- NES_NIC_CTX_SQ_SIZE_512 = (3<<10),
-};
-
-enum nes_nic_qp_ctx_word_idx {
- NES_NIC_CTX_MISC_IDX = 0,
- NES_NIC_CTX_SQ_LOW_IDX = 2,
- NES_NIC_CTX_SQ_HIGH_IDX = 3,
- NES_NIC_CTX_RQ_LOW_IDX = 4,
- NES_NIC_CTX_RQ_HIGH_IDX = 5,
-};
-
-enum nes_cqp_cq_bits {
- NES_CQP_CQ_CEQE_MASK = (1<<9),
- NES_CQP_CQ_CEQ_VALID = (1<<10),
- NES_CQP_CQ_RESIZE = (1<<11),
- NES_CQP_CQ_CHK_OVERFLOW = (1<<12),
- NES_CQP_CQ_4KB_CHUNK = (1<<14),
- NES_CQP_CQ_VIRT = (1<<15),
-};
-
-enum nes_cqp_stag_bits {
- NES_CQP_STAG_VA_TO = (1<<9),
- NES_CQP_STAG_DEALLOC_PBLS = (1<<10),
- NES_CQP_STAG_PBL_BLK_SIZE = (1<<11),
- NES_CQP_STAG_MR = (1<<13),
- NES_CQP_STAG_RIGHTS_LOCAL_READ = (1<<16),
- NES_CQP_STAG_RIGHTS_LOCAL_WRITE = (1<<17),
- NES_CQP_STAG_RIGHTS_REMOTE_READ = (1<<18),
- NES_CQP_STAG_RIGHTS_REMOTE_WRITE = (1<<19),
- NES_CQP_STAG_RIGHTS_WINDOW_BIND = (1<<20),
- NES_CQP_STAG_REM_ACC_EN = (1<<21),
- NES_CQP_STAG_LEAVE_PENDING = (1<<31),
-};
-
-enum nes_cqp_ceq_wqeword_idx {
- NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX = 1,
- NES_CQP_CEQ_WQE_PBL_LOW_IDX = 6,
- NES_CQP_CEQ_WQE_PBL_HIGH_IDX = 7,
-};
-
-enum nes_cqp_ceq_bits {
- NES_CQP_CEQ_4KB_CHUNK = (1<<14),
- NES_CQP_CEQ_VIRT = (1<<15),
-};
-
-enum nes_cqp_aeq_wqeword_idx {
- NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX = 1,
- NES_CQP_AEQ_WQE_PBL_LOW_IDX = 6,
- NES_CQP_AEQ_WQE_PBL_HIGH_IDX = 7,
-};
-
-enum nes_cqp_aeq_bits {
- NES_CQP_AEQ_4KB_CHUNK = (1<<14),
- NES_CQP_AEQ_VIRT = (1<<15),
-};
-
-enum nes_cqp_lmi_wqeword_idx {
- NES_CQP_LMI_WQE_LMI_OFFSET_IDX = 1,
- NES_CQP_LMI_WQE_FRAG_LOW_IDX = 8,
- NES_CQP_LMI_WQE_FRAG_HIGH_IDX = 9,
- NES_CQP_LMI_WQE_FRAG_LEN_IDX = 10,
-};
-
-enum nes_cqp_arp_wqeword_idx {
- NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX = 6,
- NES_CQP_ARP_WQE_MAC_HIGH_IDX = 7,
- NES_CQP_ARP_WQE_REACHABILITY_MAX_IDX = 1,
-};
-
-enum nes_cqp_upload_wqeword_idx {
- NES_CQP_UPLOAD_WQE_CTXT_LOW_IDX = 6,
- NES_CQP_UPLOAD_WQE_CTXT_HIGH_IDX = 7,
- NES_CQP_UPLOAD_WQE_HTE_IDX = 8,
-};
-
-enum nes_cqp_arp_bits {
- NES_CQP_ARP_VALID = (1<<8),
- NES_CQP_ARP_PERM = (1<<9),
-};
-
-enum nes_cqp_flush_bits {
- NES_CQP_FLUSH_SQ = (1<<30),
- NES_CQP_FLUSH_RQ = (1<<31),
- NES_CQP_FLUSH_MAJ_MIN = (1<<28),
-};
-
-enum nes_cqe_opcode_bits {
- NES_CQE_STAG_VALID = (1<<6),
- NES_CQE_ERROR = (1<<7),
- NES_CQE_SQ = (1<<8),
- NES_CQE_SE = (1<<9),
- NES_CQE_PSH = (1<<29),
- NES_CQE_FIN = (1<<30),
- NES_CQE_VALID = (1<<31),
-};
-
-
-enum nes_cqe_word_idx {
- NES_CQE_PAYLOAD_LENGTH_IDX = 0,
- NES_CQE_COMP_COMP_CTX_LOW_IDX = 2,
- NES_CQE_COMP_COMP_CTX_HIGH_IDX = 3,
- NES_CQE_INV_STAG_IDX = 4,
- NES_CQE_QP_ID_IDX = 5,
- NES_CQE_ERROR_CODE_IDX = 6,
- NES_CQE_OPCODE_IDX = 7,
-};
-
-enum nes_ceqe_word_idx {
- NES_CEQE_CQ_CTX_LOW_IDX = 0,
- NES_CEQE_CQ_CTX_HIGH_IDX = 1,
-};
-
-enum nes_ceqe_status_bit {
- NES_CEQE_VALID = (1<<31),
-};
-
-enum nes_int_bits {
- NES_INT_CEQ0 = (1<<0),
- NES_INT_CEQ1 = (1<<1),
- NES_INT_CEQ2 = (1<<2),
- NES_INT_CEQ3 = (1<<3),
- NES_INT_CEQ4 = (1<<4),
- NES_INT_CEQ5 = (1<<5),
- NES_INT_CEQ6 = (1<<6),
- NES_INT_CEQ7 = (1<<7),
- NES_INT_CEQ8 = (1<<8),
- NES_INT_CEQ9 = (1<<9),
- NES_INT_CEQ10 = (1<<10),
- NES_INT_CEQ11 = (1<<11),
- NES_INT_CEQ12 = (1<<12),
- NES_INT_CEQ13 = (1<<13),
- NES_INT_CEQ14 = (1<<14),
- NES_INT_CEQ15 = (1<<15),
- NES_INT_AEQ0 = (1<<16),
- NES_INT_AEQ1 = (1<<17),
- NES_INT_AEQ2 = (1<<18),
- NES_INT_AEQ3 = (1<<19),
- NES_INT_AEQ4 = (1<<20),
- NES_INT_AEQ5 = (1<<21),
- NES_INT_AEQ6 = (1<<22),
- NES_INT_AEQ7 = (1<<23),
- NES_INT_MAC0 = (1<<24),
- NES_INT_MAC1 = (1<<25),
- NES_INT_MAC2 = (1<<26),
- NES_INT_MAC3 = (1<<27),
- NES_INT_TSW = (1<<28),
- NES_INT_TIMER = (1<<29),
- NES_INT_INTF = (1<<30),
-};
-
-enum nes_intf_int_bits {
- NES_INTF_INT_PCIERR = (1<<0),
- NES_INTF_PERIODIC_TIMER = (1<<2),
- NES_INTF_ONE_SHOT_TIMER = (1<<3),
- NES_INTF_INT_CRITERR = (1<<14),
- NES_INTF_INT_AEQ0_OFLOW = (1<<16),
- NES_INTF_INT_AEQ1_OFLOW = (1<<17),
- NES_INTF_INT_AEQ2_OFLOW = (1<<18),
- NES_INTF_INT_AEQ3_OFLOW = (1<<19),
- NES_INTF_INT_AEQ4_OFLOW = (1<<20),
- NES_INTF_INT_AEQ5_OFLOW = (1<<21),
- NES_INTF_INT_AEQ6_OFLOW = (1<<22),
- NES_INTF_INT_AEQ7_OFLOW = (1<<23),
- NES_INTF_INT_AEQ_OFLOW = (0xff<<16),
-};
-
-enum nes_mac_int_bits {
- NES_MAC_INT_LINK_STAT_CHG = (1<<1),
- NES_MAC_INT_XGMII_EXT = (1<<2),
- NES_MAC_INT_TX_UNDERFLOW = (1<<6),
- NES_MAC_INT_TX_ERROR = (1<<7),
-};
-
-enum nes_cqe_allocate_bits {
- NES_CQE_ALLOC_INC_SELECT = (1<<28),
- NES_CQE_ALLOC_NOTIFY_NEXT = (1<<29),
- NES_CQE_ALLOC_NOTIFY_SE = (1<<30),
- NES_CQE_ALLOC_RESET = (1<<31),
-};
-
-enum nes_nic_rq_wqe_word_idx {
- NES_NIC_RQ_WQE_LENGTH_1_0_IDX = 0,
- NES_NIC_RQ_WQE_LENGTH_3_2_IDX = 1,
- NES_NIC_RQ_WQE_FRAG0_LOW_IDX = 2,
- NES_NIC_RQ_WQE_FRAG0_HIGH_IDX = 3,
- NES_NIC_RQ_WQE_FRAG1_LOW_IDX = 4,
- NES_NIC_RQ_WQE_FRAG1_HIGH_IDX = 5,
- NES_NIC_RQ_WQE_FRAG2_LOW_IDX = 6,
- NES_NIC_RQ_WQE_FRAG2_HIGH_IDX = 7,
- NES_NIC_RQ_WQE_FRAG3_LOW_IDX = 8,
- NES_NIC_RQ_WQE_FRAG3_HIGH_IDX = 9,
-};
-
-enum nes_nic_sq_wqe_word_idx {
- NES_NIC_SQ_WQE_MISC_IDX = 0,
- NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX = 1,
- NES_NIC_SQ_WQE_LSO_INFO_IDX = 2,
- NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX = 3,
- NES_NIC_SQ_WQE_LENGTH_2_1_IDX = 4,
- NES_NIC_SQ_WQE_LENGTH_4_3_IDX = 5,
- NES_NIC_SQ_WQE_FRAG0_LOW_IDX = 6,
- NES_NIC_SQ_WQE_FRAG0_HIGH_IDX = 7,
- NES_NIC_SQ_WQE_FRAG1_LOW_IDX = 8,
- NES_NIC_SQ_WQE_FRAG1_HIGH_IDX = 9,
- NES_NIC_SQ_WQE_FRAG2_LOW_IDX = 10,
- NES_NIC_SQ_WQE_FRAG2_HIGH_IDX = 11,
- NES_NIC_SQ_WQE_FRAG3_LOW_IDX = 12,
- NES_NIC_SQ_WQE_FRAG3_HIGH_IDX = 13,
- NES_NIC_SQ_WQE_FRAG4_LOW_IDX = 14,
- NES_NIC_SQ_WQE_FRAG4_HIGH_IDX = 15,
-};
-
-enum nes_iwarp_sq_wqe_word_idx {
- NES_IWARP_SQ_WQE_MISC_IDX = 0,
- NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX = 1,
- NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX = 2,
- NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX = 3,
- NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX = 4,
- NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX = 5,
- NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX = 7,
- NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX = 8,
- NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX = 9,
- NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX = 10,
- NES_IWARP_SQ_WQE_RDMA_STAG_IDX = 11,
- NES_IWARP_SQ_WQE_IMM_DATA_START_IDX = 12,
- NES_IWARP_SQ_WQE_FRAG0_LOW_IDX = 16,
- NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX = 17,
- NES_IWARP_SQ_WQE_LENGTH0_IDX = 18,
- NES_IWARP_SQ_WQE_STAG0_IDX = 19,
- NES_IWARP_SQ_WQE_FRAG1_LOW_IDX = 20,
- NES_IWARP_SQ_WQE_FRAG1_HIGH_IDX = 21,
- NES_IWARP_SQ_WQE_LENGTH1_IDX = 22,
- NES_IWARP_SQ_WQE_STAG1_IDX = 23,
- NES_IWARP_SQ_WQE_FRAG2_LOW_IDX = 24,
- NES_IWARP_SQ_WQE_FRAG2_HIGH_IDX = 25,
- NES_IWARP_SQ_WQE_LENGTH2_IDX = 26,
- NES_IWARP_SQ_WQE_STAG2_IDX = 27,
- NES_IWARP_SQ_WQE_FRAG3_LOW_IDX = 28,
- NES_IWARP_SQ_WQE_FRAG3_HIGH_IDX = 29,
- NES_IWARP_SQ_WQE_LENGTH3_IDX = 30,
- NES_IWARP_SQ_WQE_STAG3_IDX = 31,
-};
-
-enum nes_iwarp_sq_bind_wqe_word_idx {
- NES_IWARP_SQ_BIND_WQE_MR_IDX = 6,
- NES_IWARP_SQ_BIND_WQE_MW_IDX = 7,
- NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX = 8,
- NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX = 9,
- NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX = 10,
- NES_IWARP_SQ_BIND_WQE_VA_FBO_HIGH_IDX = 11,
-};
-
-enum nes_iwarp_sq_fmr_wqe_word_idx {
- NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX = 7,
- NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX = 8,
- NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX = 9,
- NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX = 10,
- NES_IWARP_SQ_FMR_WQE_VA_FBO_HIGH_IDX = 11,
- NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX = 12,
- NES_IWARP_SQ_FMR_WQE_PBL_ADDR_HIGH_IDX = 13,
- NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
-};
-
-enum nes_iwarp_sq_fmr_opcodes {
- NES_IWARP_SQ_FMR_WQE_ZERO_BASED = (1<<6),
- NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K = (0<<7),
- NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M = (1<<7),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ = (1<<16),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE = (1<<17),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ = (1<<18),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND = (1<<20),
-};
-
-#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK 0xFF;
-
-enum nes_iwarp_sq_locinv_wqe_word_idx {
- NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
-};
-
-enum nes_iwarp_rq_wqe_word_idx {
- NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
- NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
- NES_IWARP_RQ_WQE_COMP_CTX_HIGH_IDX = 3,
- NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX = 4,
- NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX = 5,
- NES_IWARP_RQ_WQE_FRAG0_LOW_IDX = 8,
- NES_IWARP_RQ_WQE_FRAG0_HIGH_IDX = 9,
- NES_IWARP_RQ_WQE_LENGTH0_IDX = 10,
- NES_IWARP_RQ_WQE_STAG0_IDX = 11,
- NES_IWARP_RQ_WQE_FRAG1_LOW_IDX = 12,
- NES_IWARP_RQ_WQE_FRAG1_HIGH_IDX = 13,
- NES_IWARP_RQ_WQE_LENGTH1_IDX = 14,
- NES_IWARP_RQ_WQE_STAG1_IDX = 15,
- NES_IWARP_RQ_WQE_FRAG2_LOW_IDX = 16,
- NES_IWARP_RQ_WQE_FRAG2_HIGH_IDX = 17,
- NES_IWARP_RQ_WQE_LENGTH2_IDX = 18,
- NES_IWARP_RQ_WQE_STAG2_IDX = 19,
- NES_IWARP_RQ_WQE_FRAG3_LOW_IDX = 20,
- NES_IWARP_RQ_WQE_FRAG3_HIGH_IDX = 21,
- NES_IWARP_RQ_WQE_LENGTH3_IDX = 22,
- NES_IWARP_RQ_WQE_STAG3_IDX = 23,
-};
-
-enum nes_nic_sq_wqe_bits {
- NES_NIC_SQ_WQE_PHDR_CS_READY = (1<<21),
- NES_NIC_SQ_WQE_LSO_ENABLE = (1<<22),
- NES_NIC_SQ_WQE_TAGVALUE_ENABLE = (1<<23),
- NES_NIC_SQ_WQE_DISABLE_CHKSUM = (1<<30),
- NES_NIC_SQ_WQE_COMPLETION = (1<<31),
-};
-
-enum nes_nic_cqe_word_idx {
- NES_NIC_CQE_ACCQP_ID_IDX = 0,
- NES_NIC_CQE_HASH_RCVNXT = 1,
- NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2,
- NES_NIC_CQE_MISC_IDX = 3,
-};
-
-#define NES_PKT_TYPE_APBVT_BITS 0xC112
-#define NES_PKT_TYPE_APBVT_MASK 0xff3e
-
-#define NES_PKT_TYPE_PVALID_BITS 0x10000000
-#define NES_PKT_TYPE_PVALID_MASK 0x30000000
-
-#define NES_PKT_TYPE_TCPV4_BITS 0x0110
-#define NES_PKT_TYPE_TCPV4_MASK 0x3f30
-
-#define NES_PKT_TYPE_UDPV4_BITS 0x0210
-#define NES_PKT_TYPE_UDPV4_MASK 0x3f30
-
-#define NES_PKT_TYPE_IPV4_BITS 0x0010
-#define NES_PKT_TYPE_IPV4_MASK 0x3f30
-
-#define NES_PKT_TYPE_OTHER_BITS 0x0000
-#define NES_PKT_TYPE_OTHER_MASK 0x0030
-
-#define NES_NIC_CQE_ERRV_SHIFT 16
-enum nes_nic_ev_bits {
- NES_NIC_ERRV_BITS_MODE = (1<<0),
- NES_NIC_ERRV_BITS_IPV4_CSUM_ERR = (1<<1),
- NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR = (1<<2),
- NES_NIC_ERRV_BITS_WQE_OVERRUN = (1<<3),
- NES_NIC_ERRV_BITS_IPH_ERR = (1<<4),
-};
-
-enum nes_nic_cqe_bits {
- NES_NIC_CQE_ERRV_MASK = (0xff<<NES_NIC_CQE_ERRV_SHIFT),
- NES_NIC_CQE_SQ = (1<<24),
- NES_NIC_CQE_ACCQP_PORT = (1<<28),
- NES_NIC_CQE_ACCQP_VALID = (1<<29),
- NES_NIC_CQE_TAG_VALID = (1<<30),
- NES_NIC_CQE_VALID = (1<<31),
-};
-
-enum nes_aeqe_word_idx {
- NES_AEQE_COMP_CTXT_LOW_IDX = 0,
- NES_AEQE_COMP_CTXT_HIGH_IDX = 1,
- NES_AEQE_COMP_QP_CQ_ID_IDX = 2,
- NES_AEQE_MISC_IDX = 3,
-};
-
-enum nes_aeqe_bits {
- NES_AEQE_QP = (1<<16),
- NES_AEQE_CQ = (1<<17),
- NES_AEQE_SQ = (1<<18),
- NES_AEQE_INBOUND_RDMA = (1<<19),
- NES_AEQE_IWARP_STATE_MASK = (7<<20),
- NES_AEQE_TCP_STATE_MASK = (0xf<<24),
- NES_AEQE_Q2_DATA_WRITTEN = (0x3<<28),
- NES_AEQE_VALID = (1<<31),
-};
-
-#define NES_AEQE_IWARP_STATE_SHIFT 20
-#define NES_AEQE_TCP_STATE_SHIFT 24
-#define NES_AEQE_Q2_DATA_ETHERNET (1<<28)
-#define NES_AEQE_Q2_DATA_MPA (1<<29)
-
-enum nes_aeqe_iwarp_state {
- NES_AEQE_IWARP_STATE_NON_EXISTANT = 0,
- NES_AEQE_IWARP_STATE_IDLE = 1,
- NES_AEQE_IWARP_STATE_RTS = 2,
- NES_AEQE_IWARP_STATE_CLOSING = 3,
- NES_AEQE_IWARP_STATE_TERMINATE = 5,
- NES_AEQE_IWARP_STATE_ERROR = 6
-};
-
-enum nes_aeqe_tcp_state {
- NES_AEQE_TCP_STATE_NON_EXISTANT = 0,
- NES_AEQE_TCP_STATE_CLOSED = 1,
- NES_AEQE_TCP_STATE_LISTEN = 2,
- NES_AEQE_TCP_STATE_SYN_SENT = 3,
- NES_AEQE_TCP_STATE_SYN_RCVD = 4,
- NES_AEQE_TCP_STATE_ESTABLISHED = 5,
- NES_AEQE_TCP_STATE_CLOSE_WAIT = 6,
- NES_AEQE_TCP_STATE_FIN_WAIT_1 = 7,
- NES_AEQE_TCP_STATE_CLOSING = 8,
- NES_AEQE_TCP_STATE_LAST_ACK = 9,
- NES_AEQE_TCP_STATE_FIN_WAIT_2 = 10,
- NES_AEQE_TCP_STATE_TIME_WAIT = 11
-};
-
-enum nes_aeqe_aeid {
- NES_AEQE_AEID_AMP_UNALLOCATED_STAG = 0x0102,
- NES_AEQE_AEID_AMP_INVALID_STAG = 0x0103,
- NES_AEQE_AEID_AMP_BAD_QP = 0x0104,
- NES_AEQE_AEID_AMP_BAD_PD = 0x0105,
- NES_AEQE_AEID_AMP_BAD_STAG_KEY = 0x0106,
- NES_AEQE_AEID_AMP_BAD_STAG_INDEX = 0x0107,
- NES_AEQE_AEID_AMP_BOUNDS_VIOLATION = 0x0108,
- NES_AEQE_AEID_AMP_RIGHTS_VIOLATION = 0x0109,
- NES_AEQE_AEID_AMP_TO_WRAP = 0x010a,
- NES_AEQE_AEID_AMP_FASTREG_SHARED = 0x010b,
- NES_AEQE_AEID_AMP_FASTREG_VALID_STAG = 0x010c,
- NES_AEQE_AEID_AMP_FASTREG_MW_STAG = 0x010d,
- NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS = 0x010e,
- NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW = 0x010f,
- NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH = 0x0110,
- NES_AEQE_AEID_AMP_INVALIDATE_SHARED = 0x0111,
- NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS = 0x0112,
- NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS = 0x0113,
- NES_AEQE_AEID_AMP_MWBIND_VALID_STAG = 0x0114,
- NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG = 0x0115,
- NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG = 0x0116,
- NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG = 0x0117,
- NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS = 0x0118,
- NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS = 0x0119,
- NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT = 0x011a,
- NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED = 0x011b,
- NES_AEQE_AEID_BAD_CLOSE = 0x0201,
- NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE = 0x0202,
- NES_AEQE_AEID_CQ_OPERATION_ERROR = 0x0203,
- NES_AEQE_AEID_PRIV_OPERATION_DENIED = 0x0204,
- NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO = 0x0205,
- NES_AEQE_AEID_STAG_ZERO_INVALID = 0x0206,
- NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN = 0x0301,
- NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID = 0x0302,
- NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER = 0x0303,
- NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION = 0x0304,
- NES_AEQE_AEID_DDP_UBE_INVALID_MO = 0x0305,
- NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE = 0x0306,
- NES_AEQE_AEID_DDP_UBE_INVALID_QN = 0x0307,
- NES_AEQE_AEID_DDP_NO_L_BIT = 0x0308,
- NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION = 0x0311,
- NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE = 0x0312,
- NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST = 0x0313,
- NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP = 0x0314,
- NES_AEQE_AEID_INVALID_ARP_ENTRY = 0x0401,
- NES_AEQE_AEID_INVALID_TCP_OPTION_RCVD = 0x0402,
- NES_AEQE_AEID_STALE_ARP_ENTRY = 0x0403,
- NES_AEQE_AEID_LLP_CLOSE_COMPLETE = 0x0501,
- NES_AEQE_AEID_LLP_CONNECTION_RESET = 0x0502,
- NES_AEQE_AEID_LLP_FIN_RECEIVED = 0x0503,
- NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH = 0x0504,
- NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR = 0x0505,
- NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE = 0x0506,
- NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL = 0x0507,
- NES_AEQE_AEID_LLP_SYN_RECEIVED = 0x0508,
- NES_AEQE_AEID_LLP_TERMINATE_RECEIVED = 0x0509,
- NES_AEQE_AEID_LLP_TOO_MANY_RETRIES = 0x050a,
- NES_AEQE_AEID_LLP_TOO_MANY_KEEPALIVE_RETRIES = 0x050b,
- NES_AEQE_AEID_RESET_SENT = 0x0601,
- NES_AEQE_AEID_TERMINATE_SENT = 0x0602,
- NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC = 0x0700
-};
-
-enum nes_iwarp_sq_opcodes {
- NES_IWARP_SQ_WQE_WRPDU = (1<<15),
- NES_IWARP_SQ_WQE_PSH = (1<<21),
- NES_IWARP_SQ_WQE_STREAMING = (1<<23),
- NES_IWARP_SQ_WQE_IMM_DATA = (1<<28),
- NES_IWARP_SQ_WQE_READ_FENCE = (1<<29),
- NES_IWARP_SQ_WQE_LOCAL_FENCE = (1<<30),
- NES_IWARP_SQ_WQE_SIGNALED_COMPL = (1<<31),
-};
-
-enum nes_iwarp_sq_wqe_bits {
- NES_IWARP_SQ_OP_RDMAW = 0,
- NES_IWARP_SQ_OP_RDMAR = 1,
- NES_IWARP_SQ_OP_SEND = 3,
- NES_IWARP_SQ_OP_SENDINV = 4,
- NES_IWARP_SQ_OP_SENDSE = 5,
- NES_IWARP_SQ_OP_SENDSEINV = 6,
- NES_IWARP_SQ_OP_BIND = 8,
- NES_IWARP_SQ_OP_FAST_REG = 9,
- NES_IWARP_SQ_OP_LOCINV = 10,
- NES_IWARP_SQ_OP_RDMAR_LOCINV = 11,
- NES_IWARP_SQ_OP_NOP = 12,
-};
-
-enum nes_iwarp_cqe_major_code {
- NES_IWARP_CQE_MAJOR_FLUSH = 1,
- NES_IWARP_CQE_MAJOR_DRV = 0x8000
-};
-
-enum nes_iwarp_cqe_minor_code {
- NES_IWARP_CQE_MINOR_FLUSH = 1
-};
-
-#define NES_EEPROM_READ_REQUEST (1<<16)
-#define NES_MAC_ADDR_VALID (1<<20)
-
-/*
- * NES index registers init values.
- */
-struct nes_init_values {
- u32 index;
- u32 data;
- u8 wrt;
-};
-
-/*
- * NES registers in BAR0.
- */
-struct nes_pci_regs {
- u32 int_status;
- u32 int_mask;
- u32 int_pending;
- u32 intf_int_status;
- u32 intf_int_mask;
- u32 other_regs[59]; /* pad out to 256 bytes for now */
-};
-
-#define NES_CQP_SQ_SIZE 128
-#define NES_CCQ_SIZE 128
-#define NES_NIC_WQ_SIZE 512
-#define NES_NIC_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_512) | (NES_NIC_CTX_SQ_SIZE_512))
-#define NES_NIC_BACK_STORE 0x00038000
-
-struct nes_device;
-
-struct nes_hw_nic_qp_context {
- __le32 context_words[6];
-};
-
-struct nes_hw_nic_sq_wqe {
- __le32 wqe_words[16];
-};
-
-struct nes_hw_nic_rq_wqe {
- __le32 wqe_words[16];
-};
-
-struct nes_hw_nic_cqe {
- __le32 cqe_words[4];
-};
-
-struct nes_hw_cqp_qp_context {
- __le32 context_words[4];
-};
-
-struct nes_hw_cqp_wqe {
- __le32 wqe_words[16];
-};
-
-struct nes_hw_qp_wqe {
- __le32 wqe_words[32];
-};
-
-struct nes_hw_cqe {
- __le32 cqe_words[8];
-};
-
-struct nes_hw_ceqe {
- __le32 ceqe_words[2];
-};
-
-struct nes_hw_aeqe {
- __le32 aeqe_words[4];
-};
-
-struct nes_cqp_request {
- union {
- u64 cqp_callback_context;
- void *cqp_callback_pointer;
- };
- wait_queue_head_t waitq;
- struct nes_hw_cqp_wqe cqp_wqe;
- struct list_head list;
- atomic_t refcount;
- void (*cqp_callback)(struct nes_device *nesdev, struct nes_cqp_request *cqp_request);
- u16 major_code;
- u16 minor_code;
- u8 waiting;
- u8 request_done;
- u8 dynamic;
- u8 callback;
-};
-
-struct nes_hw_cqp {
- struct nes_hw_cqp_wqe *sq_vbase;
- dma_addr_t sq_pbase;
- spinlock_t lock;
- wait_queue_head_t waitq;
- u16 qp_id;
- u16 sq_head;
- u16 sq_tail;
- u16 sq_size;
-};
-
-#define NES_FIRST_FRAG_SIZE 128
-struct nes_first_frag {
- u8 buffer[NES_FIRST_FRAG_SIZE];
-};
-
-struct nes_hw_nic {
- struct nes_first_frag *first_frag_vbase; /* virtual address of first frags */
- struct nes_hw_nic_sq_wqe *sq_vbase; /* virtual address of sq */
- struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */
- struct sk_buff *tx_skb[NES_NIC_WQ_SIZE];
- struct sk_buff *rx_skb[NES_NIC_WQ_SIZE];
- dma_addr_t frag_paddr[NES_NIC_WQ_SIZE];
- unsigned long first_frag_overflow[BITS_TO_LONGS(NES_NIC_WQ_SIZE)];
- dma_addr_t sq_pbase; /* PCI memory for host rings */
- dma_addr_t rq_pbase; /* PCI memory for host rings */
-
- u16 qp_id;
- u16 sq_head;
- u16 sq_tail;
- u16 sq_size;
- u16 rq_head;
- u16 rq_tail;
- u16 rq_size;
- u8 replenishing_rq;
- u8 reserved;
-
- spinlock_t rq_lock;
-};
-
-struct nes_hw_nic_cq {
- struct nes_hw_nic_cqe volatile *cq_vbase; /* PCI memory for host rings */
- void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
- dma_addr_t cq_pbase; /* PCI memory for host rings */
- int rx_cqes_completed;
- int cqe_allocs_pending;
- int rx_pkts_indicated;
- u16 cq_head;
- u16 cq_size;
- u16 cq_number;
- u8 cqes_pending;
-};
-
-struct nes_hw_qp {
- struct nes_hw_qp_wqe *sq_vbase; /* PCI memory for host rings */
- struct nes_hw_qp_wqe *rq_vbase; /* PCI memory for host rings */
- void *q2_vbase; /* PCI memory for host rings */
- dma_addr_t sq_pbase; /* PCI memory for host rings */
- dma_addr_t rq_pbase; /* PCI memory for host rings */
- dma_addr_t q2_pbase; /* PCI memory for host rings */
- u32 qp_id;
- u16 sq_head;
- u16 sq_tail;
- u16 sq_size;
- u16 rq_head;
- u16 rq_tail;
- u16 rq_size;
- u8 rq_encoded_size;
- u8 sq_encoded_size;
-};
-
-struct nes_hw_cq {
- struct nes_hw_cqe *cq_vbase; /* PCI memory for host rings */
- void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_cq *cq);
- dma_addr_t cq_pbase; /* PCI memory for host rings */
- u16 cq_head;
- u16 cq_size;
- u16 cq_number;
-};
-
-struct nes_hw_ceq {
- struct nes_hw_ceqe volatile *ceq_vbase; /* PCI memory for host rings */
- dma_addr_t ceq_pbase; /* PCI memory for host rings */
- u16 ceq_head;
- u16 ceq_size;
-};
-
-struct nes_hw_aeq {
- struct nes_hw_aeqe volatile *aeq_vbase; /* PCI memory for host rings */
- dma_addr_t aeq_pbase; /* PCI memory for host rings */
- u16 aeq_head;
- u16 aeq_size;
-};
-
-struct nic_qp_map {
- u8 qpid;
- u8 nic_index;
- u8 logical_port;
- u8 is_hnic;
-};
-
-#define NES_CQP_ARP_AEQ_INDEX_MASK 0x000f0000
-#define NES_CQP_ARP_AEQ_INDEX_SHIFT 16
-
-#define NES_CQP_APBVT_ADD 0x00008000
-#define NES_CQP_APBVT_NIC_SHIFT 16
-
-#define NES_ARP_ADD 1
-#define NES_ARP_DELETE 2
-#define NES_ARP_RESOLVE 3
-
-#define NES_MAC_SW_IDLE 0
-#define NES_MAC_SW_INTERRUPT 1
-#define NES_MAC_SW_MH 2
-
-struct nes_arp_entry {
- u32 ip_addr;
- u8 mac_addr[ETH_ALEN];
-};
-
-#define NES_NIC_FAST_TIMER 96
-#define NES_NIC_FAST_TIMER_LOW 40
-#define NES_NIC_FAST_TIMER_HIGH 1000
-#define DEFAULT_NES_QL_HIGH 256
-#define DEFAULT_NES_QL_LOW 16
-#define DEFAULT_NES_QL_TARGET 64
-#define DEFAULT_JUMBO_NES_QL_LOW 12
-#define DEFAULT_JUMBO_NES_QL_TARGET 40
-#define DEFAULT_JUMBO_NES_QL_HIGH 128
-#define NES_NIC_CQ_DOWNWARD_TREND 16
-#define NES_PFT_SIZE 48
-
-#define NES_MGT_WQ_COUNT 32
-#define NES_MGT_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_32) | (NES_NIC_CTX_SQ_SIZE_32))
-#define NES_MGT_QP_OFFSET 36
-#define NES_MGT_QP_COUNT 4
-
-struct nes_hw_tune_timer {
- /* u16 cq_count; */
- u16 threshold_low;
- u16 threshold_target;
- u16 threshold_high;
- u16 timer_in_use;
- u16 timer_in_use_old;
- u16 timer_in_use_min;
- u16 timer_in_use_max;
- u8 timer_direction_upward;
- u8 timer_direction_downward;
- u16 cq_count_old;
- u8 cq_direction_downward;
-};
-
-#define NES_TIMER_INT_LIMIT 2
-#define NES_TIMER_INT_LIMIT_DYNAMIC 10
-#define NES_TIMER_ENABLE_LIMIT 4
-#define NES_MAX_LINK_INTERRUPTS 128
-#define NES_MAX_LINK_CHECK 200
-
-struct nes_adapter {
- u64 fw_ver;
- unsigned long *allocated_qps;
- unsigned long *allocated_cqs;
- unsigned long *allocated_mrs;
- unsigned long *allocated_pds;
- unsigned long *allocated_arps;
- struct nes_qp **qp_table;
- struct workqueue_struct *work_q;
-
- struct list_head list;
- struct list_head active_listeners;
- /* list of the netdev's associated with each logical port */
- struct list_head nesvnic_list[4];
-
- struct timer_list mh_timer;
- struct timer_list lc_timer;
- struct work_struct work;
- spinlock_t resource_lock;
- spinlock_t phy_lock;
- spinlock_t pbl_lock;
- spinlock_t periodic_timer_lock;
-
- struct nes_arp_entry arp_table[NES_MAX_ARP_TABLE_SIZE];
-
- /* Adapter CEQ and AEQs */
- struct nes_hw_ceq ceq[16];
- struct nes_hw_aeq aeq[8];
-
- struct nes_hw_tune_timer tune_timer;
-
- unsigned long doorbell_start;
-
- u32 hw_rev;
- u32 vendor_id;
- u32 vendor_part_id;
- u32 device_cap_flags;
- u32 tick_delta;
- u32 timer_int_req;
- u32 arp_table_size;
- u32 next_arp_index;
-
- u32 max_mr;
- u32 max_256pbl;
- u32 max_4kpbl;
- u32 free_256pbl;
- u32 free_4kpbl;
- u32 max_mr_size;
- u32 max_qp;
- u32 next_qp;
- u32 max_irrq;
- u32 max_qp_wr;
- u32 max_sge;
- u32 max_cq;
- u32 next_cq;
- u32 max_cqe;
- u32 max_pd;
- u32 base_pd;
- u32 next_pd;
- u32 hte_index_mask;
-
- /* EEPROM information */
- u32 rx_pool_size;
- u32 tx_pool_size;
- u32 rx_threshold;
- u32 tcp_timer_core_clk_divisor;
- u32 iwarp_config;
- u32 cm_config;
- u32 sws_timer_config;
- u32 tcp_config1;
- u32 wqm_wat;
- u32 core_clock;
- u32 firmware_version;
- u32 eeprom_version;
-
- u32 nic_rx_eth_route_err;
-
- u32 et_rx_coalesce_usecs;
- u32 et_rx_max_coalesced_frames;
- u32 et_rx_coalesce_usecs_irq;
- u32 et_rx_max_coalesced_frames_irq;
- u32 et_pkt_rate_low;
- u32 et_rx_coalesce_usecs_low;
- u32 et_rx_max_coalesced_frames_low;
- u32 et_pkt_rate_high;
- u32 et_rx_coalesce_usecs_high;
- u32 et_rx_max_coalesced_frames_high;
- u32 et_rate_sample_interval;
- u32 timer_int_limit;
- u32 wqm_quanta;
- u8 allow_unaligned_fpdus;
-
- /* Adapter base MAC address */
- u32 mac_addr_low;
- u16 mac_addr_high;
-
- u16 firmware_eeprom_offset;
- u16 software_eeprom_offset;
-
- u16 max_irrq_wr;
-
- /* pd config for each port */
- u16 pd_config_size[4];
- u16 pd_config_base[4];
-
- u16 link_interrupt_count[4];
- u8 crit_error_count[32];
-
- /* the phy index for each port */
- u8 phy_index[4];
- u8 mac_sw_state[4];
- u8 mac_link_down[4];
- u8 phy_type[4];
- u8 log_port;
-
- /* PCI information */
- struct nes_device *nesdev;
- unsigned int devfn;
- unsigned char bus_number;
- unsigned char OneG_Mode;
-
- unsigned char ref_count;
- u8 netdev_count;
- u8 netdev_max; /* from host nic address count in EEPROM */
- u8 port_count;
- u8 virtwq;
- u8 send_term_ok;
- u8 et_use_adaptive_rx_coalesce;
- u8 adapter_fcn_count;
- u8 pft_mcast_map[NES_PFT_SIZE];
-};
-
-struct nes_pbl {
- u64 *pbl_vbase;
- dma_addr_t pbl_pbase;
- struct page *page;
- unsigned long user_base;
- u32 pbl_size;
- struct list_head list;
- /* TODO: need to add list for two level tables */
-};
-
-#define NES_4K_PBL_CHUNK_SIZE 4096
-
-struct nes_fast_mr_wqe_pbl {
- u64 *kva;
- dma_addr_t paddr;
-};
-
-struct nes_listener {
- struct work_struct work;
- struct workqueue_struct *wq;
- struct nes_vnic *nesvnic;
- struct iw_cm_id *cm_id;
- struct list_head list;
- unsigned long socket;
- u8 accept_failed;
-};
-
-struct nes_ib_device;
-
-#define NES_EVENT_DELAY msecs_to_jiffies(100)
-
-struct nes_vnic {
- struct nes_ib_device *nesibdev;
- u64 sq_full;
- u64 tso_requests;
- u64 segmented_tso_requests;
- u64 linearized_skbs;
- u64 tx_sw_dropped;
- u64 endnode_nstat_rx_discard;
- u64 endnode_nstat_rx_octets;
- u64 endnode_nstat_rx_frames;
- u64 endnode_nstat_tx_octets;
- u64 endnode_nstat_tx_frames;
- u64 endnode_ipv4_tcp_retransmits;
- /* void *mem; */
- struct nes_device *nesdev;
- struct net_device *netdev;
- atomic_t rx_skbs_needed;
- atomic_t rx_skb_timer_running;
- int budget;
- u32 msg_enable;
- /* u32 tx_avail; */
- __be32 local_ipaddr;
- struct napi_struct napi;
- spinlock_t tx_lock; /* could use netdev tx lock? */
- struct timer_list rq_wqes_timer;
- u32 nic_mem_size;
- void *nic_vbase;
- dma_addr_t nic_pbase;
- struct nes_hw_nic nic;
- struct nes_hw_nic_cq nic_cq;
- u32 mcrq_qp_id;
- struct nes_ucontext *mcrq_ucontext;
- struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev);
- void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *);
- int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr );
- struct net_device_stats netstats;
- /* used to put the netdev on the adapters logical port list */
- struct list_head list;
- u16 max_frame_size;
- u8 netdev_open;
- u8 linkup;
- u8 logical_port;
- u8 netdev_index; /* might not be needed, indexes nesdev->netdev */
- u8 perfect_filter_index;
- u8 nic_index;
- u8 qp_nic_index[4];
- u8 next_qp_nic_index;
- u8 of_device_registered;
- u8 rdma_enabled;
- struct timer_list event_timer;
- enum ib_event_type delayed_event;
- enum ib_event_type last_dispatched_event;
- spinlock_t port_ibevent_lock;
- u32 mgt_mem_size;
- void *mgt_vbase;
- dma_addr_t mgt_pbase;
- struct nes_vnic_mgt *mgtvnic[NES_MGT_QP_COUNT];
- struct task_struct *mgt_thread;
- wait_queue_head_t mgt_wait_queue;
- struct sk_buff_head mgt_skb_list;
-
-};
-
-struct nes_ib_device {
- struct ib_device ibdev;
- struct nes_vnic *nesvnic;
-
- /* Virtual RNIC Limits */
- u32 max_mr;
- u32 max_qp;
- u32 max_cq;
- u32 max_pd;
- u32 num_mr;
- u32 num_qp;
- u32 num_cq;
- u32 num_pd;
-};
-
-enum nes_hdrct_flags {
- DDP_LEN_FLAG = 0x80,
- DDP_HDR_FLAG = 0x40,
- RDMA_HDR_FLAG = 0x20
-};
-
-enum nes_term_layers {
- LAYER_RDMA = 0,
- LAYER_DDP = 1,
- LAYER_MPA = 2
-};
-
-enum nes_term_error_types {
- RDMAP_CATASTROPHIC = 0,
- RDMAP_REMOTE_PROT = 1,
- RDMAP_REMOTE_OP = 2,
- DDP_CATASTROPHIC = 0,
- DDP_TAGGED_BUFFER = 1,
- DDP_UNTAGGED_BUFFER = 2,
- DDP_LLP = 3
-};
-
-enum nes_term_rdma_errors {
- RDMAP_INV_STAG = 0x00,
- RDMAP_INV_BOUNDS = 0x01,
- RDMAP_ACCESS = 0x02,
- RDMAP_UNASSOC_STAG = 0x03,
- RDMAP_TO_WRAP = 0x04,
- RDMAP_INV_RDMAP_VER = 0x05,
- RDMAP_UNEXPECTED_OP = 0x06,
- RDMAP_CATASTROPHIC_LOCAL = 0x07,
- RDMAP_CATASTROPHIC_GLOBAL = 0x08,
- RDMAP_CANT_INV_STAG = 0x09,
- RDMAP_UNSPECIFIED = 0xff
-};
-
-enum nes_term_ddp_errors {
- DDP_CATASTROPHIC_LOCAL = 0x00,
- DDP_TAGGED_INV_STAG = 0x00,
- DDP_TAGGED_BOUNDS = 0x01,
- DDP_TAGGED_UNASSOC_STAG = 0x02,
- DDP_TAGGED_TO_WRAP = 0x03,
- DDP_TAGGED_INV_DDP_VER = 0x04,
- DDP_UNTAGGED_INV_QN = 0x01,
- DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
- DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
- DDP_UNTAGGED_INV_MO = 0x04,
- DDP_UNTAGGED_INV_TOO_LONG = 0x05,
- DDP_UNTAGGED_INV_DDP_VER = 0x06
-};
-
-enum nes_term_mpa_errors {
- MPA_CLOSED = 0x01,
- MPA_CRC = 0x02,
- MPA_MARKER = 0x03,
- MPA_REQ_RSP = 0x04,
-};
-
-struct nes_terminate_hdr {
- u8 layer_etype;
- u8 error_code;
- u8 hdrct;
- u8 rsvd;
-};
-
-/* Used to determine how to fill in terminate error codes */
-#define IWARP_OPCODE_WRITE 0
-#define IWARP_OPCODE_READREQ 1
-#define IWARP_OPCODE_READRSP 2
-#define IWARP_OPCODE_SEND 3
-#define IWARP_OPCODE_SEND_INV 4
-#define IWARP_OPCODE_SEND_SE 5
-#define IWARP_OPCODE_SEND_SE_INV 6
-#define IWARP_OPCODE_TERM 7
-
-/* These values are used only during terminate processing */
-#define TERM_DDP_LEN_TAGGED 14
-#define TERM_DDP_LEN_UNTAGGED 18
-#define TERM_RDMA_LEN 28
-#define RDMA_OPCODE_MASK 0x0f
-#define RDMA_READ_REQ_OPCODE 1
-#define BAD_FRAME_OFFSET 64
-#define CQE_MAJOR_DRV 0x8000
-
-/* Used for link status recheck after interrupt processing */
-#define NES_LINK_RECHECK_DELAY msecs_to_jiffies(50)
-#define NES_LINK_RECHECK_MAX 60
-
-#endif /* __NES_HW_H */
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
deleted file mode 100644
index cc4dce5c3e5f..000000000000
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ /dev/null
@@ -1,1155 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/skbuff.h>
-#include <linux/etherdevice.h>
-#include <linux/kthread.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <net/tcp.h>
-#include "nes.h"
-#include "nes_mgt.h"
-
-atomic_t pau_qps_created;
-atomic_t pau_qps_destroyed;
-
-static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic)
-{
- unsigned long flags;
- dma_addr_t bus_address;
- struct sk_buff *skb;
- struct nes_hw_nic_rq_wqe *nic_rqe;
- struct nes_hw_mgt *nesmgt;
- struct nes_device *nesdev;
- struct nes_rskb_cb *cb;
- u32 rx_wqes_posted = 0;
-
- nesmgt = &mgtvnic->mgt;
- nesdev = mgtvnic->nesvnic->nesdev;
- spin_lock_irqsave(&nesmgt->rq_lock, flags);
- if (nesmgt->replenishing_rq != 0) {
- if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
- (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
- atomic_set(&mgtvnic->rx_skb_timer_running, 1);
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */
- add_timer(&mgtvnic->rq_wqes_timer);
- } else {
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- }
- return;
- }
- nesmgt->replenishing_rq = 1;
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- do {
- skb = dev_alloc_skb(mgtvnic->nesvnic->max_frame_size);
- if (skb) {
- skb->dev = mgtvnic->nesvnic->netdev;
-
- bus_address = pci_map_single(nesdev->pcidev,
- skb->data, mgtvnic->nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->busaddr = bus_address;
- cb->maplen = mgtvnic->nesvnic->max_frame_size;
-
- nic_rqe = &nesmgt->rq_vbase[mgtvnic->mgt.rq_head];
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
- cpu_to_le32(mgtvnic->nesvnic->max_frame_size);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] =
- cpu_to_le32((u32)bus_address);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] =
- cpu_to_le32((u32)((u64)bus_address >> 32));
- nesmgt->rx_skb[nesmgt->rq_head] = skb;
- nesmgt->rq_head++;
- nesmgt->rq_head &= nesmgt->rq_size - 1;
- atomic_dec(&mgtvnic->rx_skbs_needed);
- barrier();
- if (++rx_wqes_posted == 255) {
- nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
- rx_wqes_posted = 0;
- }
- } else {
- spin_lock_irqsave(&nesmgt->rq_lock, flags);
- if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
- (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
- atomic_set(&mgtvnic->rx_skb_timer_running, 1);
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */
- add_timer(&mgtvnic->rq_wqes_timer);
- } else {
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- }
- break;
- }
- } while (atomic_read(&mgtvnic->rx_skbs_needed));
- barrier();
- if (rx_wqes_posted)
- nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
- nesmgt->replenishing_rq = 0;
-}
-
-/**
- * nes_mgt_rq_wqes_timeout
- */
-static void nes_mgt_rq_wqes_timeout(struct timer_list *t)
-{
- struct nes_vnic_mgt *mgtvnic = from_timer(mgtvnic, t,
- rq_wqes_timer);
-
- atomic_set(&mgtvnic->rx_skb_timer_running, 0);
- if (atomic_read(&mgtvnic->rx_skbs_needed))
- nes_replenish_mgt_rq(mgtvnic);
-}
-
-/**
- * nes_mgt_free_skb - unmap and free skb
- */
-static void nes_mgt_free_skb(struct nes_device *nesdev, struct sk_buff *skb, u32 dir)
-{
- struct nes_rskb_cb *cb;
-
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, dir);
- cb->busaddr = 0;
- dev_kfree_skb_any(skb);
-}
-
-/**
- * nes_download_callback - handle download completions
- */
-static void nes_download_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
-{
- struct pau_fpdu_info *fpdu_info = cqp_request->cqp_callback_pointer;
- struct nes_qp *nesqp = fpdu_info->nesqp;
- struct sk_buff *skb;
- int i;
-
- for (i = 0; i < fpdu_info->frag_cnt; i++) {
- skb = fpdu_info->frags[i].skb;
- if (fpdu_info->frags[i].cmplt) {
- nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
- nes_rem_ref_cm_node(nesqp->cm_node);
- }
- }
-
- if (fpdu_info->hdr_vbase)
- pci_free_consistent(nesdev->pcidev, fpdu_info->hdr_len,
- fpdu_info->hdr_vbase, fpdu_info->hdr_pbase);
- kfree(fpdu_info);
-}
-
-/**
- * nes_get_seq - Get the seq, ack_seq and window from the packet
- */
-static u32 nes_get_seq(struct sk_buff *skb, u32 *ack, u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
-{
- struct nes_rskb_cb *cb = (struct nes_rskb_cb *)&skb->cb[0];
- struct iphdr *iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
- struct tcphdr *tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
-
- *ack = be32_to_cpu(tcph->ack_seq);
- *wnd = be16_to_cpu(tcph->window);
- *fin_rcvd = tcph->fin;
- *rst_rcvd = tcph->rst;
- return be32_to_cpu(tcph->seq);
-}
-
-/**
- * nes_get_next_skb - Get the next skb based on where current skb is in the queue
- */
-static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp *nesqp,
- struct sk_buff *skb, u32 nextseq, u32 *ack,
- u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
-{
- u32 seq;
- bool processacks;
- struct sk_buff *old_skb;
-
- if (skb) {
- /* Continue processing fpdu */
- skb = skb_peek_next(skb, &nesqp->pau_list);
- if (!skb)
- goto out;
- processacks = false;
- } else {
- /* Starting a new one */
- if (skb_queue_empty(&nesqp->pau_list))
- goto out;
- skb = skb_peek(&nesqp->pau_list);
- processacks = true;
- }
-
- while (1) {
- if (skb_queue_empty(&nesqp->pau_list))
- goto out;
-
- seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd);
- if (seq == nextseq) {
- if (skb->len || processacks)
- break;
- } else if (after(seq, nextseq)) {
- goto out;
- }
-
- old_skb = skb;
- skb = skb_peek_next(skb, &nesqp->pau_list);
- skb_unlink(old_skb, &nesqp->pau_list);
- nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE);
- nes_rem_ref_cm_node(nesqp->cm_node);
- if (!skb)
- goto out;
- }
- return skb;
-
-out:
- return NULL;
-}
-
-/**
- * get_fpdu_info - Find the next complete fpdu and return its fragments.
- */
-static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
- struct pau_fpdu_info **pau_fpdu_info)
-{
- struct sk_buff *skb;
- struct iphdr *iph;
- struct tcphdr *tcph;
- struct nes_rskb_cb *cb;
- struct pau_fpdu_info *fpdu_info = NULL;
- struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
- u32 fpdu_len = 0;
- u32 tmp_len;
- int frag_cnt = 0;
- u32 tot_len;
- u32 frag_tot;
- u32 ack;
- u32 fin_rcvd;
- u32 rst_rcvd;
- u16 wnd;
- int i;
- int rc = 0;
-
- *pau_fpdu_info = NULL;
-
- skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd);
- if (!skb)
- goto out;
-
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- if (skb->len) {
- fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING;
- fpdu_len = (fpdu_len + 3) & 0xfffffffc;
- tmp_len = fpdu_len;
-
- /* See if we have all of the fpdu */
- frag_tot = 0;
- memset(&frags, 0, sizeof frags);
- for (i = 0; i < MAX_FPDU_FRAGS; i++) {
- frags[i].physaddr = cb->busaddr;
- frags[i].physaddr += skb->data - cb->data_start;
- frags[i].frag_len = min(tmp_len, skb->len);
- frags[i].skb = skb;
- frags[i].cmplt = (skb->len == frags[i].frag_len);
- frag_tot += frags[i].frag_len;
- frag_cnt++;
-
- tmp_len -= frags[i].frag_len;
- if (tmp_len == 0)
- break;
-
- skb = nes_get_next_skb(nesdev, nesqp, skb,
- nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd);
- if (!skb)
- goto out;
- if (rst_rcvd) {
- /* rst received in the middle of fpdu */
- for (; i >= 0; i--) {
- skb_unlink(frags[i].skb, &nesqp->pau_list);
- nes_mgt_free_skb(nesdev, frags[i].skb, PCI_DMA_TODEVICE);
- }
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- frags[0].physaddr = cb->busaddr;
- frags[0].physaddr += skb->data - cb->data_start;
- frags[0].frag_len = skb->len;
- frags[0].skb = skb;
- frags[0].cmplt = true;
- frag_cnt = 1;
- break;
- }
-
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- }
- } else {
- /* no data */
- frags[0].physaddr = cb->busaddr;
- frags[0].frag_len = 0;
- frags[0].skb = skb;
- frags[0].cmplt = true;
- frag_cnt = 1;
- }
-
- /* Found one */
- fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
- if (!fpdu_info) {
- rc = -ENOMEM;
- goto out;
- }
-
- fpdu_info->cqp_request = nes_get_cqp_request(nesdev);
- if (fpdu_info->cqp_request == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
- rc = -ENOMEM;
- goto out;
- }
-
- cb = (struct nes_rskb_cb *)&frags[0].skb->cb[0];
- iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
- tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
- fpdu_info->hdr_len = (((unsigned char *)tcph) + 4 * (tcph->doff)) - cb->data_start;
- fpdu_info->data_len = fpdu_len;
- tot_len = fpdu_info->hdr_len + fpdu_len - ETH_HLEN;
-
- if (frags[0].cmplt) {
- fpdu_info->hdr_pbase = cb->busaddr;
- fpdu_info->hdr_vbase = NULL;
- } else {
- fpdu_info->hdr_vbase = pci_alloc_consistent(nesdev->pcidev,
- fpdu_info->hdr_len, &fpdu_info->hdr_pbase);
- if (!fpdu_info->hdr_vbase) {
- nes_debug(NES_DBG_PAU, "Unable to allocate memory for pau first frag\n");
- rc = -ENOMEM;
- goto out;
- }
-
- /* Copy hdrs, adjusting len and seqnum */
- memcpy(fpdu_info->hdr_vbase, cb->data_start, fpdu_info->hdr_len);
- iph = (struct iphdr *)(fpdu_info->hdr_vbase + ETH_HLEN);
- tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
- }
-
- iph->tot_len = cpu_to_be16(tot_len);
- iph->saddr = cpu_to_be32(0x7f000001);
-
- tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
- tcph->ack_seq = cpu_to_be32(ack);
- tcph->window = cpu_to_be16(wnd);
-
- nesqp->pau_rcv_nxt += fpdu_len + fin_rcvd;
-
- memcpy(fpdu_info->frags, frags, sizeof(fpdu_info->frags));
- fpdu_info->frag_cnt = frag_cnt;
- fpdu_info->nesqp = nesqp;
- *pau_fpdu_info = fpdu_info;
-
- /* Update skb's for next pass */
- for (i = 0; i < frag_cnt; i++) {
- cb = (struct nes_rskb_cb *)&frags[i].skb->cb[0];
- skb_pull(frags[i].skb, frags[i].frag_len);
-
- if (frags[i].skb->len == 0) {
- /* Pull skb off the list - it will be freed in the callback */
- if (!skb_queue_empty(&nesqp->pau_list))
- skb_unlink(frags[i].skb, &nesqp->pau_list);
- } else {
- /* Last skb still has data so update the seq */
- iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
- tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
- tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
- }
- }
-
-out:
- if (rc) {
- if (fpdu_info) {
- if (fpdu_info->cqp_request)
- nes_put_cqp_request(nesdev, fpdu_info->cqp_request);
- kfree(fpdu_info);
- }
- }
- return rc;
-}
-
-/**
- * forward_fpdu - send complete fpdus, one at a time
- */
-static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- struct nes_device *nesdev = nesvnic->nesdev;
- struct pau_fpdu_info *fpdu_info;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- u64 u64tmp;
- u32 u32tmp;
- int rc;
-
- while (1) {
- spin_lock_irqsave(&nesqp->pau_lock, flags);
- rc = get_fpdu_info(nesdev, nesqp, &fpdu_info);
- if (rc || (fpdu_info == NULL)) {
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
- return rc;
- }
-
- cqp_request = fpdu_info->cqp_request;
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_OPCODE_IDX,
- NES_CQP_DOWNLOAD_SEGMENT |
- (((u32)nesvnic->logical_port) << NES_CQP_OP_LOGICAL_PORT_SHIFT));
-
- u32tmp = fpdu_info->hdr_len << 16;
- u32tmp |= fpdu_info->hdr_len + (u32)fpdu_info->data_len;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX,
- u32tmp);
-
- u32tmp = (fpdu_info->frags[1].frag_len << 16) | fpdu_info->frags[0].frag_len;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_2_1_IDX,
- u32tmp);
-
- u32tmp = (fpdu_info->frags[3].frag_len << 16) | fpdu_info->frags[2].frag_len;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_4_3_IDX,
- u32tmp);
-
- u64tmp = (u64)fpdu_info->hdr_pbase;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX,
- lower_32_bits(u64tmp));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX,
- upper_32_bits(u64tmp));
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
- lower_32_bits(fpdu_info->frags[0].physaddr));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_HIGH_IDX,
- upper_32_bits(fpdu_info->frags[0].physaddr));
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_LOW_IDX,
- lower_32_bits(fpdu_info->frags[1].physaddr));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_HIGH_IDX,
- upper_32_bits(fpdu_info->frags[1].physaddr));
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_LOW_IDX,
- lower_32_bits(fpdu_info->frags[2].physaddr));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_HIGH_IDX,
- upper_32_bits(fpdu_info->frags[2].physaddr));
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_LOW_IDX,
- lower_32_bits(fpdu_info->frags[3].physaddr));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_HIGH_IDX,
- upper_32_bits(fpdu_info->frags[3].physaddr));
-
- cqp_request->cqp_callback_pointer = fpdu_info;
- cqp_request->callback = 1;
- cqp_request->cqp_callback = nes_download_callback;
-
- atomic_set(&cqp_request->refcount, 1);
- nes_post_cqp_request(nesdev, cqp_request);
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
- }
-
- return 0;
-}
-
-static void process_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- int again = 1;
- unsigned long flags;
-
- do {
- /* Ignore rc - if it failed, tcp retries will cause it to try again */
- forward_fpdus(nesvnic, nesqp);
-
- spin_lock_irqsave(&nesqp->pau_lock, flags);
- if (nesqp->pau_pending) {
- nesqp->pau_pending = 0;
- } else {
- nesqp->pau_busy = 0;
- again = 0;
- }
-
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
- } while (again);
-}
-
-/**
- * queue_fpdus - Handle fpdu's that hw passed up to sw
- */
-static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- struct sk_buff *tmpskb;
- struct nes_rskb_cb *cb;
- struct iphdr *iph;
- struct tcphdr *tcph;
- unsigned char *tcph_end;
- u32 rcv_nxt;
- u32 rcv_wnd;
- u32 seqnum;
- u32 len;
- bool process_it = false;
- unsigned long flags;
-
- /* Move data ptr to after tcp header */
- iph = (struct iphdr *)skb->data;
- tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
- seqnum = be32_to_cpu(tcph->seq);
- tcph_end = (((char *)tcph) + (4 * tcph->doff));
-
- len = be16_to_cpu(iph->tot_len);
- if (skb->len > len)
- skb_trim(skb, len);
- skb_pull(skb, tcph_end - skb->data);
-
- /* Initialize tracking values */
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->seqnum = seqnum;
-
- /* Make sure data is in the receive window */
- rcv_nxt = nesqp->pau_rcv_nxt;
- rcv_wnd = le32_to_cpu(nesqp->nesqp_context->rcv_wnd);
- if (!between(seqnum, rcv_nxt, (rcv_nxt + rcv_wnd))) {
- nes_mgt_free_skb(nesvnic->nesdev, skb, PCI_DMA_TODEVICE);
- nes_rem_ref_cm_node(nesqp->cm_node);
- return;
- }
-
- spin_lock_irqsave(&nesqp->pau_lock, flags);
-
- if (nesqp->pau_busy)
- nesqp->pau_pending = 1;
- else
- nesqp->pau_busy = 1;
-
- /* Queue skb by sequence number */
- if (skb_queue_len(&nesqp->pau_list) == 0) {
- __skb_queue_head(&nesqp->pau_list, skb);
- } else {
- skb_queue_walk(&nesqp->pau_list, tmpskb) {
- cb = (struct nes_rskb_cb *)&tmpskb->cb[0];
- if (before(seqnum, cb->seqnum))
- break;
- }
- __skb_insert(skb, tmpskb->prev, tmpskb, &nesqp->pau_list);
- }
- if (nesqp->pau_state == PAU_READY)
- process_it = true;
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
-
- if (process_it)
- process_fpdus(nesvnic, nesqp);
-
- return;
-}
-
-/**
- * mgt_thread - Handle mgt skbs in a safe context
- */
-static int mgt_thread(void *context)
-{
- struct nes_vnic *nesvnic = context;
- struct sk_buff *skb;
- struct nes_rskb_cb *cb;
-
- while (!kthread_should_stop()) {
- wait_event_interruptible(nesvnic->mgt_wait_queue,
- skb_queue_len(&nesvnic->mgt_skb_list) || kthread_should_stop());
- while ((skb_queue_len(&nesvnic->mgt_skb_list)) && !kthread_should_stop()) {
- skb = skb_dequeue(&nesvnic->mgt_skb_list);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->data_start = skb->data - ETH_HLEN;
- cb->busaddr = pci_map_single(nesvnic->nesdev->pcidev, cb->data_start,
- nesvnic->max_frame_size, PCI_DMA_TODEVICE);
- queue_fpdus(skb, nesvnic, cb->nesqp);
- }
- }
-
- /* Closing down so delete any entries on the queue */
- while (skb_queue_len(&nesvnic->mgt_skb_list)) {
- skb = skb_dequeue(&nesvnic->mgt_skb_list);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- nes_rem_ref_cm_node(cb->nesqp->cm_node);
- dev_kfree_skb_any(skb);
- }
- return 0;
-}
-
-/**
- * nes_queue_skbs - Queue skb so it can be handled in a thread context
- */
-void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- struct nes_rskb_cb *cb;
-
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->nesqp = nesqp;
- skb_queue_tail(&nesvnic->mgt_skb_list, skb);
- wake_up_interruptible(&nesvnic->mgt_wait_queue);
-}
-
-void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp)
-{
- struct sk_buff *skb;
- unsigned long flags;
- atomic_inc(&pau_qps_destroyed);
-
- /* Free packets that have not yet been forwarded */
- /* Lock is acquired by skb_dequeue when removing the skb */
- spin_lock_irqsave(&nesqp->pau_lock, flags);
- while (skb_queue_len(&nesqp->pau_list)) {
- skb = skb_dequeue(&nesqp->pau_list);
- nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
- nes_rem_ref_cm_node(nesqp->cm_node);
- }
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
-}
-
-static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
-{
- struct pau_qh_chg *qh_chg = cqp_request->cqp_callback_pointer;
- struct nes_cqp_request *new_request;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_adapter *nesadapter;
- struct nes_qp *nesqp;
- struct nes_v4_quad nes_quad;
- u32 crc_value;
- u64 u64temp;
-
- nesadapter = nesdev->nesadapter;
- nesqp = qh_chg->nesqp;
-
- /* Should we handle the bad completion */
- if (cqp_request->major_code)
- WARN(1, PFX "Invalid cqp_request major_code=0x%x\n",
- cqp_request->major_code);
-
- switch (nesqp->pau_state) {
- case PAU_DEL_QH:
- /* Old hash code deleted, now set the new one */
- nesqp->pau_state = PAU_ADD_LB_QH;
- new_request = nes_get_cqp_request(nesdev);
- if (new_request == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to get a new_request.\n");
- WARN_ON(1);
- return;
- }
-
- memset(&nes_quad, 0, sizeof(nes_quad));
- nes_quad.DstIpAdrIndex =
- cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = cpu_to_be32(0x7f000001);
- nes_quad.TcpPorts[0] = swab16(nesqp->nesqp_context->tcpPorts[1]);
- nes_quad.TcpPorts[1] = swab16(nesqp->nesqp_context->tcpPorts[0]);
-
- /* Produce hash key */
- crc_value = get_crc_value(&nes_quad);
- nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
- nes_debug(NES_DBG_PAU, "new HTE Index = 0x%08X, CRC = 0x%08X\n",
- nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
-
- nesqp->hte_index &= nesadapter->hte_index_mask;
- nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
- nesqp->nesqp_context->ip0 = cpu_to_le32(0x7f000001);
- nesqp->nesqp_context->rcv_nxt = cpu_to_le32(nesqp->pau_rcv_nxt);
-
- cqp_wqe = &new_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words,
- NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH |
- NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
- u64temp = (u64)nesqp->nesqp_context_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- nes_debug(NES_DBG_PAU, "Waiting for CQP completion for adding the quad hash.\n");
-
- new_request->cqp_callback_pointer = qh_chg;
- new_request->callback = 1;
- new_request->cqp_callback = nes_chg_qh_handler;
- atomic_set(&new_request->refcount, 1);
- nes_post_cqp_request(nesdev, new_request);
- break;
-
- case PAU_ADD_LB_QH:
- /* Start processing the queued fpdu's */
- nesqp->pau_state = PAU_READY;
- process_fpdus(qh_chg->nesvnic, qh_chg->nesqp);
- kfree(qh_chg);
- break;
- }
-}
-
-/**
- * nes_change_quad_hash
- */
-static int nes_change_quad_hash(struct nes_device *nesdev,
- struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- struct nes_cqp_request *cqp_request = NULL;
- struct pau_qh_chg *qh_chg = NULL;
- u64 u64temp;
- struct nes_hw_cqp_wqe *cqp_wqe;
- int ret = 0;
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
- ret = -ENOMEM;
- goto chg_qh_err;
- }
-
- qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC);
- if (!qh_chg) {
- ret = -ENOMEM;
- goto chg_qh_err;
- }
- qh_chg->nesdev = nesdev;
- qh_chg->nesvnic = nesvnic;
- qh_chg->nesqp = nesqp;
- nesqp->pau_state = PAU_DEL_QH;
-
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words,
- NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | NES_CQP_QP_DEL_HTE |
- NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
- u64temp = (u64)nesqp->nesqp_context_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- nes_debug(NES_DBG_PAU, "Waiting for CQP completion for deleting the quad hash.\n");
-
- cqp_request->cqp_callback_pointer = qh_chg;
- cqp_request->callback = 1;
- cqp_request->cqp_callback = nes_chg_qh_handler;
- atomic_set(&cqp_request->refcount, 1);
- nes_post_cqp_request(nesdev, cqp_request);
-
- return ret;
-
-chg_qh_err:
- kfree(qh_chg);
- if (cqp_request)
- nes_put_cqp_request(nesdev, cqp_request);
- return ret;
-}
-
-/**
- * nes_mgt_ce_handler
- * This management code deals with any packed and unaligned (pau) fpdu's
- * that the hardware cannot handle.
- */
-static void nes_mgt_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
-{
- struct nes_vnic_mgt *mgtvnic = container_of(cq, struct nes_vnic_mgt, mgt_cq);
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 head;
- u32 cq_size;
- u32 cqe_count = 0;
- u32 cqe_misc;
- u32 qp_id = 0;
- u32 skbs_needed;
- unsigned long context;
- struct nes_qp *nesqp;
- struct sk_buff *rx_skb;
- struct nes_rskb_cb *cb;
-
- head = cq->cq_head;
- cq_size = cq->cq_size;
-
- while (1) {
- cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]);
- if (!(cqe_misc & NES_NIC_CQE_VALID))
- break;
-
- nesqp = NULL;
- if (cqe_misc & NES_NIC_CQE_ACCQP_VALID) {
- qp_id = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_ACCQP_ID_IDX]);
- qp_id &= 0x001fffff;
- if (qp_id < nesadapter->max_qp) {
- context = (unsigned long)nesadapter->qp_table[qp_id - NES_FIRST_QPN];
- nesqp = (struct nes_qp *)context;
- }
- }
-
- if (nesqp) {
- if (nesqp->pau_mode == false) {
- nesqp->pau_mode = true; /* First time for this qp */
- nesqp->pau_rcv_nxt = le32_to_cpu(
- cq->cq_vbase[head].cqe_words[NES_NIC_CQE_HASH_RCVNXT]);
- skb_queue_head_init(&nesqp->pau_list);
- spin_lock_init(&nesqp->pau_lock);
- atomic_inc(&pau_qps_created);
- nes_change_quad_hash(nesdev, mgtvnic->nesvnic, nesqp);
- }
-
- rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
- rx_skb->len = 0;
- skb_put(rx_skb, cqe_misc & 0x0000ffff);
- rx_skb->protocol = eth_type_trans(rx_skb, mgtvnic->nesvnic->netdev);
- cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
- pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, PCI_DMA_FROMDEVICE);
- cb->busaddr = 0;
- mgtvnic->mgt.rq_tail++;
- mgtvnic->mgt.rq_tail &= mgtvnic->mgt.rq_size - 1;
-
- nes_add_ref_cm_node(nesqp->cm_node);
- nes_queue_mgt_skbs(rx_skb, mgtvnic->nesvnic, nesqp);
- } else {
- printk(KERN_ERR PFX "Invalid QP %d for packed/unaligned handling\n", qp_id);
- }
-
- cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0;
- cqe_count++;
- if (++head >= cq_size)
- head = 0;
-
- if (cqe_count == 255) {
- /* Replenish mgt CQ */
- nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16));
- nesdev->currcq_count += cqe_count;
- cqe_count = 0;
- }
-
- skbs_needed = atomic_inc_return(&mgtvnic->rx_skbs_needed);
- if (skbs_needed > (mgtvnic->mgt.rq_size >> 1))
- nes_replenish_mgt_rq(mgtvnic);
- }
-
- cq->cq_head = head;
- nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- cq->cq_number | (cqe_count << 16));
- nes_read32(nesdev->regs + NES_CQE_ALLOC);
- nesdev->currcq_count += cqe_count;
-}
-
-/**
- * nes_init_mgt_qp
- */
-int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic)
-{
- struct nes_vnic_mgt *mgtvnic;
- u32 counter;
- void *vmem;
- dma_addr_t pmem;
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 cqp_head;
- unsigned long flags;
- struct nes_hw_nic_qp_context *mgt_context;
- u64 u64temp;
- struct nes_hw_nic_rq_wqe *mgt_rqe;
- struct sk_buff *skb;
- u32 wqe_count;
- struct nes_rskb_cb *cb;
- u32 mgt_mem_size;
- void *mgt_vbase;
- dma_addr_t mgt_pbase;
- int i;
- int ret;
-
- /* Allocate space the all mgt QPs once */
- mgtvnic = kcalloc(NES_MGT_QP_COUNT, sizeof(struct nes_vnic_mgt),
- GFP_KERNEL);
- if (!mgtvnic)
- return -ENOMEM;
-
- /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */
- /* We are not sending from this NIC so sq is not allocated */
- mgt_mem_size = 256 +
- (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)) +
- (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_cqe)) +
- sizeof(struct nes_hw_nic_qp_context);
- mgt_mem_size = (mgt_mem_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
- mgt_vbase = pci_alloc_consistent(nesdev->pcidev, NES_MGT_QP_COUNT * mgt_mem_size, &mgt_pbase);
- if (!mgt_vbase) {
- kfree(mgtvnic);
- nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt host descriptor rings\n");
- return -ENOMEM;
- }
-
- nesvnic->mgt_mem_size = NES_MGT_QP_COUNT * mgt_mem_size;
- nesvnic->mgt_vbase = mgt_vbase;
- nesvnic->mgt_pbase = mgt_pbase;
-
- skb_queue_head_init(&nesvnic->mgt_skb_list);
- init_waitqueue_head(&nesvnic->mgt_wait_queue);
- nesvnic->mgt_thread = kthread_run(mgt_thread, nesvnic, "nes_mgt_thread");
-
- for (i = 0; i < NES_MGT_QP_COUNT; i++) {
- mgtvnic->nesvnic = nesvnic;
- mgtvnic->mgt.qp_id = nesdev->mac_index + NES_MGT_QP_OFFSET + i;
- memset(mgt_vbase, 0, mgt_mem_size);
- nes_debug(NES_DBG_INIT, "Allocated mgt QP structures at %p (phys = %016lX), size = %u.\n",
- mgt_vbase, (unsigned long)mgt_pbase, mgt_mem_size);
-
- vmem = (void *)(((unsigned long)mgt_vbase + (256 - 1)) &
- ~(unsigned long)(256 - 1));
- pmem = (dma_addr_t)(((unsigned long long)mgt_pbase + (256 - 1)) &
- ~(unsigned long long)(256 - 1));
-
- spin_lock_init(&mgtvnic->mgt.rq_lock);
-
- /* setup the RQ */
- mgtvnic->mgt.rq_vbase = vmem;
- mgtvnic->mgt.rq_pbase = pmem;
- mgtvnic->mgt.rq_head = 0;
- mgtvnic->mgt.rq_tail = 0;
- mgtvnic->mgt.rq_size = NES_MGT_WQ_COUNT;
-
- /* setup the CQ */
- vmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
- pmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
-
- mgtvnic->mgt_cq.cq_number = mgtvnic->mgt.qp_id;
- mgtvnic->mgt_cq.cq_vbase = vmem;
- mgtvnic->mgt_cq.cq_pbase = pmem;
- mgtvnic->mgt_cq.cq_head = 0;
- mgtvnic->mgt_cq.cq_size = NES_MGT_WQ_COUNT;
-
- mgtvnic->mgt_cq.ce_handler = nes_mgt_ce_handler;
-
- /* Send CreateCQ request to CQP */
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- cqp_head = nesdev->cqp.sq_head;
-
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
- NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
- ((u32)mgtvnic->mgt_cq.cq_size << 16));
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(
- mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16));
- u64temp = (u64)mgtvnic->mgt_cq.cq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
- u64temp = (unsigned long)&mgtvnic->mgt_cq;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1));
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
- cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- /* Send CreateQP request to CQP */
- mgt_context = (void *)(&mgtvnic->mgt_cq.cq_vbase[mgtvnic->mgt_cq.cq_size]);
- mgt_context->context_words[NES_NIC_CTX_MISC_IDX] =
- cpu_to_le32((u32)NES_MGT_CTX_SIZE |
- ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12));
- nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n",
- nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE),
- nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE));
- if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0)
- mgt_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE);
-
- u64temp = (u64)mgtvnic->mgt.rq_pbase;
- mgt_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
- mgt_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
- u64temp = (u64)mgtvnic->mgt.rq_pbase;
- mgt_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
- mgt_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
- NES_CQP_QP_TYPE_NIC);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(mgtvnic->mgt.qp_id);
- u64temp = (u64)mgtvnic->mgt_cq.cq_pbase +
- (mgtvnic->mgt_cq.cq_size * sizeof(struct nes_hw_nic_cqe));
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
- nesdev->cqp.sq_head = cqp_head;
-
- barrier();
-
- /* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n",
- mgtvnic->mgt.qp_id);
-
- ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_INIT, "Create MGT QP%u completed, wait_event_timeout ret = %u.\n",
- mgtvnic->mgt.qp_id, ret);
- if (!ret) {
- nes_debug(NES_DBG_INIT, "MGT QP%u create timeout expired\n", mgtvnic->mgt.qp_id);
- if (i == 0) {
- pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
- nesvnic->mgt_pbase);
- kfree(mgtvnic);
- } else {
- nes_destroy_mgt(nesvnic);
- }
- return -EIO;
- }
-
- /* Populate the RQ */
- for (counter = 0; counter < (NES_MGT_WQ_COUNT - 1); counter++) {
- skb = dev_alloc_skb(nesvnic->max_frame_size);
- if (!skb) {
- nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name);
- return -ENOMEM;
- }
-
- skb->dev = netdev;
-
- pmem = pci_map_single(nesdev->pcidev, skb->data,
- nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->busaddr = pmem;
- cb->maplen = nesvnic->max_frame_size;
-
- mgt_rqe = &mgtvnic->mgt.rq_vbase[counter];
- mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32((u32)nesvnic->max_frame_size);
- mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
- mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
- mgtvnic->mgt.rx_skb[counter] = skb;
- }
-
- timer_setup(&mgtvnic->rq_wqes_timer, nes_mgt_rq_wqes_timeout,
- 0);
-
- wqe_count = NES_MGT_WQ_COUNT - 1;
- mgtvnic->mgt.rq_head = wqe_count;
- barrier();
- do {
- counter = min(wqe_count, ((u32)255));
- wqe_count -= counter;
- nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id);
- } while (wqe_count);
-
- nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- mgtvnic->mgt_cq.cq_number);
- nes_read32(nesdev->regs + NES_CQE_ALLOC);
-
- mgt_vbase += mgt_mem_size;
- mgt_pbase += mgt_mem_size;
- nesvnic->mgtvnic[i] = mgtvnic++;
- }
- return 0;
-}
-
-
-void nes_destroy_mgt(struct nes_vnic *nesvnic)
-{
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_vnic_mgt *mgtvnic;
- struct nes_vnic_mgt *first_mgtvnic;
- unsigned long flags;
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 cqp_head;
- struct sk_buff *rx_skb;
- int i;
- int ret;
-
- kthread_stop(nesvnic->mgt_thread);
-
- /* Free remaining NIC receive buffers */
- first_mgtvnic = nesvnic->mgtvnic[0];
- for (i = 0; i < NES_MGT_QP_COUNT; i++) {
- mgtvnic = nesvnic->mgtvnic[i];
- if (mgtvnic == NULL)
- continue;
-
- while (mgtvnic->mgt.rq_head != mgtvnic->mgt.rq_tail) {
- rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
- nes_mgt_free_skb(nesdev, rx_skb, PCI_DMA_FROMDEVICE);
- mgtvnic->mgt.rq_tail++;
- mgtvnic->mgt.rq_tail &= (mgtvnic->mgt.rq_size - 1);
- }
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
-
- /* Destroy NIC QP */
- cqp_head = nesdev->cqp.sq_head;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- mgtvnic->mgt.qp_id);
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
-
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
-
- /* Destroy NIC CQ */
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_DESTROY_CQ | ((u32)mgtvnic->mgt_cq.cq_size << 16)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16)));
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
-
- nesdev->cqp.sq_head = cqp_head;
- barrier();
-
- /* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
- " cqp.sq_tail=%u, cqp.sq_size=%u\n",
- cqp_head, nesdev->cqp.sq_head,
- nesdev->cqp.sq_tail, nesdev->cqp.sq_size);
-
- ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
- NES_EVENT_TIMEOUT);
-
- nes_debug(NES_DBG_SHUTDOWN, "Destroy MGT QP returned, wait_event_timeout ret = %u, cqp_head=%u,"
- " cqp.sq_head=%u, cqp.sq_tail=%u\n",
- ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
- if (!ret)
- nes_debug(NES_DBG_SHUTDOWN, "MGT QP%u destroy timeout expired\n",
- mgtvnic->mgt.qp_id);
-
- nesvnic->mgtvnic[i] = NULL;
- }
-
- if (nesvnic->mgt_vbase) {
- pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
- nesvnic->mgt_pbase);
- nesvnic->mgt_vbase = NULL;
- nesvnic->mgt_pbase = 0;
- }
-
- kfree(first_mgtvnic);
-}
diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h
deleted file mode 100644
index 4f7f701c4a81..000000000000
--- a/drivers/infiniband/hw/nes/nes_mgt.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
-* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenIB.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
-
-#ifndef __NES_MGT_H
-#define __NES_MGT_H
-
-#define MPA_FRAMING 6 /* length is 2 bytes, crc is 4 bytes */
-
-int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic);
-void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp);
-void nes_destroy_mgt(struct nes_vnic *nesvnic);
-void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp);
-
-struct nes_hw_mgt {
- struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */
- dma_addr_t rq_pbase; /* PCI memory for host rings */
- struct sk_buff *rx_skb[NES_NIC_WQ_SIZE];
- u16 qp_id;
- u16 sq_head;
- u16 rq_head;
- u16 rq_tail;
- u16 rq_size;
- u8 replenishing_rq;
- u8 reserved;
- spinlock_t rq_lock;
-};
-
-struct nes_vnic_mgt {
- struct nes_vnic *nesvnic;
- struct nes_hw_mgt mgt;
- struct nes_hw_nic_cq mgt_cq;
- atomic_t rx_skbs_needed;
- struct timer_list rq_wqes_timer;
- atomic_t rx_skb_timer_running;
-};
-
-#define MAX_FPDU_FRAGS 4
-struct pau_fpdu_frag {
- struct sk_buff *skb;
- u64 physaddr;
- u32 frag_len;
- bool cmplt;
-};
-
-struct pau_fpdu_info {
- struct nes_qp *nesqp;
- struct nes_cqp_request *cqp_request;
- void *hdr_vbase;
- dma_addr_t hdr_pbase;
- int hdr_len;
- u16 data_len;
- u16 frag_cnt;
- struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
-};
-
-enum pau_qh_state {
- PAU_DEL_QH,
- PAU_ADD_LB_QH,
- PAU_READY
-};
-
-struct pau_qh_chg {
- struct nes_device *nesdev;
- struct nes_vnic *nesvnic;
- struct nes_qp *nesqp;
-};
-
-#endif /* __NES_MGT_H */
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
deleted file mode 100644
index 16f33454c198..000000000000
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ /dev/null
@@ -1,1870 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if_arp.h>
-#include <linux/if_vlan.h>
-#include <linux/ethtool.h>
-#include <linux/slab.h>
-#include <net/tcp.h>
-
-#include <net/inet_common.h>
-#include <linux/inet.h>
-
-#include "nes.h"
-
-static struct nic_qp_map nic_qp_mapping_0[] = {
- {16,0,0,1},{24,4,0,0},{28,8,0,0},{32,12,0,0},
- {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0},
- {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0},
- {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_1[] = {
- {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0},
- {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_2[] = {
- {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_3[] = {
- {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_4[] = {
- {28,8,0,0},{32,12,0,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_5[] = {
- {29,9,1,0},{33,13,1,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_6[] = {
- {30,10,2,0},{34,14,2,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_7[] = {
- {31,11,3,0},{35,15,3,0}
-};
-
-static struct nic_qp_map *nic_qp_mapping_per_function[] = {
- nic_qp_mapping_0, nic_qp_mapping_1, nic_qp_mapping_2, nic_qp_mapping_3,
- nic_qp_mapping_4, nic_qp_mapping_5, nic_qp_mapping_6, nic_qp_mapping_7
-};
-
-static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
- | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
-static int debug = -1;
-static int nics_per_function = 1;
-
-/**
- * nes_netdev_poll
- */
-static int nes_netdev_poll(struct napi_struct *napi, int budget)
-{
- struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq;
-
- nesvnic->budget = budget;
- nescq->cqes_pending = 0;
- nescq->rx_cqes_completed = 0;
- nescq->cqe_allocs_pending = 0;
- nescq->rx_pkts_indicated = 0;
-
- nes_nic_ce_handler(nesdev, nescq);
-
- if (nescq->cqes_pending == 0) {
- napi_complete(napi);
- /* clear out completed cqes and arm */
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- nescq->cq_number | (nescq->cqe_allocs_pending << 16));
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
- } else {
- /* clear out completed cqes but don't arm */
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- nescq->cq_number | (nescq->cqe_allocs_pending << 16));
- nes_debug(NES_DBG_NETDEV, "%s: exiting with work pending\n",
- nesvnic->netdev->name);
- }
- return nescq->rx_pkts_indicated;
-}
-
-
-/**
- * nes_netdev_open - Activate the network interface; ifconfig
- * ethx up.
- */
-static int nes_netdev_open(struct net_device *netdev)
-{
- u32 macaddr_low;
- u16 macaddr_high;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- int ret;
- int i;
- struct nes_vnic *first_nesvnic = NULL;
- u32 nic_active_bit;
- u32 nic_active;
- struct list_head *list_pos, *list_temp;
- unsigned long flags;
-
- if (nesvnic->netdev_open == 1)
- return 0;
-
- if (netif_msg_ifup(nesvnic))
- printk(KERN_INFO PFX "%s: enabling interface\n", netdev->name);
-
- ret = nes_init_nic_qp(nesdev, netdev);
- if (ret) {
- return ret;
- }
-
- netif_carrier_off(netdev);
- netif_stop_queue(netdev);
-
- if ((!nesvnic->of_device_registered) && (nesvnic->rdma_enabled)) {
- nesvnic->nesibdev = nes_init_ofa_device(netdev);
- if (nesvnic->nesibdev == NULL) {
- printk(KERN_ERR PFX "%s: nesvnic->nesibdev alloc failed", netdev->name);
- } else {
- nesvnic->nesibdev->nesvnic = nesvnic;
- ret = nes_register_ofa_device(nesvnic->nesibdev);
- if (ret) {
- printk(KERN_ERR PFX "%s: Unable to register RDMA device, ret = %d\n",
- netdev->name, ret);
- }
- }
- }
- /* Set packet filters */
- nic_active_bit = 1 << nesvnic->nic_index;
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
-
- macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
- macaddr_high += (u16)netdev->dev_addr[1];
-
- macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
- macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
- macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
- macaddr_low += (u32)netdev->dev_addr[5];
-
- /* Program the various MAC regs */
- for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
- if (nesvnic->qp_nic_index[i] == 0xf) {
- break;
- }
- nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW"
- " (Addr:%08X) = %08X, HIGH = %08X.\n",
- i, nesvnic->qp_nic_index[i],
- NES_IDX_PERFECT_FILTER_LOW+
- (nesvnic->qp_nic_index[i] * 8),
- macaddr_low,
- (u32)macaddr_high | NES_MAC_ADDR_VALID |
- ((((u32)nesvnic->nic_index) << 16)));
- nes_write_indexed(nesdev,
- NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8),
- macaddr_low);
- nes_write_indexed(nesdev,
- NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8),
- (u32)macaddr_high | NES_MAC_ADDR_VALID |
- ((((u32)nesvnic->nic_index) << 16)));
- }
-
-
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- nesvnic->nic_cq.cq_number);
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
- list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) {
- first_nesvnic = container_of(list_pos, struct nes_vnic, list);
- if (first_nesvnic->netdev_open == 1)
- break;
- }
- if (first_nesvnic->netdev_open == 0) {
- nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n");
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index),
- ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
- NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
- first_nesvnic = nesvnic;
- }
-
- if (first_nesvnic->linkup) {
- /* Enable network packets */
- nesvnic->linkup = 1;
- netif_start_queue(netdev);
- netif_carrier_on(netdev);
- }
-
- spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
- if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) {
- nesdev->link_recheck = 1;
- mod_delayed_work(system_wq, &nesdev->work,
- NES_LINK_RECHECK_DELAY);
- }
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
-
- spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags);
- if (nesvnic->of_device_registered) {
- nesdev->nesadapter->send_term_ok = 1;
- if (nesvnic->linkup == 1) {
- if (nesdev->iw_status == 0) {
- nesdev->iw_status = 1;
- nes_port_ibevent(nesvnic);
- }
- } else {
- nesdev->iw_status = 0;
- }
- }
- spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags);
-
- napi_enable(&nesvnic->napi);
- nesvnic->netdev_open = 1;
-
- return 0;
-}
-
-
-/**
- * nes_netdev_stop
- */
-static int nes_netdev_stop(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u32 nic_active_mask;
- u32 nic_active;
- struct nes_vnic *first_nesvnic = NULL;
- struct list_head *list_pos, *list_temp;
- unsigned long flags;
-
- nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n",
- nesvnic, nesdev, netdev, netdev->name);
- if (nesvnic->netdev_open == 0)
- return 0;
-
- if (netif_msg_ifdown(nesvnic))
- printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name);
- netif_carrier_off(netdev);
-
- /* Disable network packets */
- napi_disable(&nesvnic->napi);
- netif_stop_queue(netdev);
- list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) {
- first_nesvnic = container_of(list_pos, struct nes_vnic, list);
- if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic))
- break;
- }
-
- if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic) &&
- (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) !=
- PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+
- (0x200*nesdev->mac_index), 0xffffffff);
- nes_write_indexed(first_nesvnic->nesdev,
- NES_IDX_MAC_INT_MASK+
- (0x200*first_nesvnic->nesdev->mac_index),
- ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
- NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
- } else {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff);
- }
-
- nic_active_mask = ~((u32)(1 << nesvnic->nic_index));
- nes_write_indexed(nesdev, NES_IDX_PERFECT_FILTER_HIGH+
- (nesvnic->perfect_filter_index*8), 0);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
-
- spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags);
- if (nesvnic->of_device_registered) {
- nesdev->nesadapter->send_term_ok = 0;
- nesdev->iw_status = 0;
- if (nesvnic->linkup == 1)
- nes_port_ibevent(nesvnic);
- }
- del_timer_sync(&nesvnic->event_timer);
- nesvnic->event_timer.function = NULL;
- spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags);
-
- nes_destroy_nic_qp(nesvnic);
-
- nesvnic->netdev_open = 0;
-
- return 0;
-}
-
-
-/**
- * nes_nic_send
- */
-static bool nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_nic *nesnic = &nesvnic->nic;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- struct tcphdr *tcph;
- __le16 *wqe_fragment_length;
- u32 wqe_misc;
- u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */
- u16 skb_fragment_index;
- dma_addr_t bus_address;
-
- nic_sqe = &nesnic->sq_vbase[nesnic->sq_head];
- wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
-
- /* setup the VLAN tag if present */
- if (skb_vlan_tag_present(skb)) {
- nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
- netdev->name, skb_vlan_tag_get(skb));
- wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
- wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb);
- } else
- wqe_misc = 0;
-
- /* bump past the vlan tag */
- wqe_fragment_length++;
- /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */
- wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (skb_is_gso(skb)) {
- tcph = tcp_hdr(skb);
- /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... is_gso = %u seg size = %u\n",
- netdev->name, skb_is_gso(skb), skb_shinfo(skb)->gso_size); */
- wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | (u16)skb_shinfo(skb)->gso_size;
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
- ((u32)tcph->doff) |
- (((u32)(((unsigned char *)tcph) - skb->data)) << 4));
- }
- } else { /* CHECKSUM_HW */
- wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM;
- }
-
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
- skb->len);
- memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
- skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), skb_headlen(skb)));
- wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE),
- skb_headlen(skb)));
- wqe_fragment_length[1] = 0;
- if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) {
- if ((skb_shinfo(skb)->nr_frags + 1) > 4) {
- nes_debug(NES_DBG_NIC_TX, "%s: Packet with %u fragments not sent, skb_headlen=%u\n",
- netdev->name, skb_shinfo(skb)->nr_frags + 2, skb_headlen(skb));
- kfree_skb(skb);
- nesvnic->tx_sw_dropped++;
- return false;
- }
- set_bit(nesnic->sq_head, nesnic->first_frag_overflow);
- bus_address = pci_map_single(nesdev->pcidev, skb->data + NES_FIRST_FRAG_SIZE,
- skb_headlen(skb) - NES_FIRST_FRAG_SIZE, PCI_DMA_TODEVICE);
- wqe_fragment_length[wqe_fragment_index++] =
- cpu_to_le16(skb_headlen(skb) - NES_FIRST_FRAG_SIZE);
- wqe_fragment_length[wqe_fragment_index] = 0;
- set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
- ((u64)(bus_address)));
- nesnic->tx_skb[nesnic->sq_head] = skb;
- }
-
- if (skb_headlen(skb) == skb->len) {
- if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) {
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0;
- nesnic->tx_skb[nesnic->sq_head] = skb;
- }
- } else {
- /* Deal with Fragments */
- nesnic->tx_skb[nesnic->sq_head] = skb;
- for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags;
- skb_fragment_index++) {
- skb_frag_t *frag =
- &skb_shinfo(skb)->frags[skb_fragment_index];
- bus_address = skb_frag_dma_map(&nesdev->pcidev->dev,
- frag, 0, skb_frag_size(frag),
- DMA_TO_DEVICE);
- wqe_fragment_length[wqe_fragment_index] =
- cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[skb_fragment_index]));
- set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
- bus_address);
- wqe_fragment_index++;
- if (wqe_fragment_index < 5)
- wqe_fragment_length[wqe_fragment_index] = 0;
- }
- }
-
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, wqe_misc);
- nesnic->sq_head++;
- nesnic->sq_head &= nesnic->sq_size - 1;
- return true;
-}
-
-
-/**
- * nes_netdev_start_xmit
- */
-static netdev_tx_t nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_nic *nesnic = &nesvnic->nic;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- struct tcphdr *tcph;
- /* struct udphdr *udph; */
-#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS
- /* 64K segment plus overflow on each side */
- dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS];
- dma_addr_t bus_address;
- u32 tso_frag_index;
- u32 tso_frag_count;
- u32 tso_wqe_length;
- u32 curr_tcp_seq;
- u32 wqe_count=1;
- struct iphdr *iph;
- __le16 *wqe_fragment_length;
- u32 nr_frags;
- u32 original_first_length;
- /* u64 *wqe_fragment_address; */
- /* first fragment (0) is used by copy buffer */
- u16 wqe_fragment_index=1;
- u16 hoffset;
- u16 nhoffset;
- u16 wqes_needed;
- u16 wqes_available;
- u32 wqe_misc;
-
- /*
- * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
- * " (%u frags), tso_size=%u\n",
- * netdev->name, skb->len, skb_headlen(skb),
- * skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
- */
-
- if (netif_queue_stopped(netdev))
- return NETDEV_TX_BUSY;
-
- /* Check if SQ is full */
- if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) {
- if (!netif_queue_stopped(netdev)) {
- netif_stop_queue(netdev);
- barrier();
- if ((((((volatile u16)nesnic->sq_tail)+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) != 1) {
- netif_start_queue(netdev);
- goto sq_no_longer_full;
- }
- }
- nesvnic->sq_full++;
- return NETDEV_TX_BUSY;
- }
-
-sq_no_longer_full:
- nr_frags = skb_shinfo(skb)->nr_frags;
- if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) {
- nr_frags++;
- }
- /* Check if too many fragments */
- if (unlikely((nr_frags > 4))) {
- if (skb_is_gso(skb)) {
- nesvnic->segmented_tso_requests++;
- nesvnic->tso_requests++;
- /* Basically 4 fragments available per WQE with extended fragments */
- wqes_needed = nr_frags >> 2;
- wqes_needed += (nr_frags&3)?1:0;
- wqes_available = (((nesnic->sq_tail+nesnic->sq_size)-nesnic->sq_head) - 1) &
- (nesnic->sq_size - 1);
-
- if (unlikely(wqes_needed > wqes_available)) {
- if (!netif_queue_stopped(netdev)) {
- netif_stop_queue(netdev);
- barrier();
- wqes_available = (((((volatile u16)nesnic->sq_tail)+nesnic->sq_size)-nesnic->sq_head) - 1) &
- (nesnic->sq_size - 1);
- if (wqes_needed <= wqes_available) {
- netif_start_queue(netdev);
- goto tso_sq_no_longer_full;
- }
- }
- nesvnic->sq_full++;
- nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n",
- netdev->name);
- return NETDEV_TX_BUSY;
- }
-tso_sq_no_longer_full:
- /* Map all the buffers */
- for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags;
- tso_frag_count++) {
- skb_frag_t *frag =
- &skb_shinfo(skb)->frags[tso_frag_count];
- tso_bus_address[tso_frag_count] =
- skb_frag_dma_map(&nesdev->pcidev->dev,
- frag, 0, skb_frag_size(frag),
- DMA_TO_DEVICE);
- }
-
- tso_frag_index = 0;
- curr_tcp_seq = ntohl(tcp_hdr(skb)->seq);
- hoffset = skb_transport_header(skb) - skb->data;
- nhoffset = skb_network_header(skb) - skb->data;
- original_first_length = hoffset + ((((struct tcphdr *)skb_transport_header(skb))->doff)<<2);
-
- for (wqe_count=0; wqe_count<((u32)wqes_needed); wqe_count++) {
- tso_wqe_length = 0;
- nic_sqe = &nesnic->sq_vbase[nesnic->sq_head];
- wqe_fragment_length =
- (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
- /* setup the VLAN tag if present */
- if (skb_vlan_tag_present(skb)) {
- nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
- netdev->name,
- skb_vlan_tag_get(skb));
- wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
- wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb);
- } else
- wqe_misc = 0;
-
- /* bump past the vlan tag */
- wqe_fragment_length++;
-
- /* Assumes header totally fits in allocated buffer and is in first fragment */
- if (original_first_length > NES_FIRST_FRAG_SIZE) {
- nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n",
- original_first_length, NES_FIRST_FRAG_SIZE);
- nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
- " (%u frags), is_gso = %u tso_size=%u\n",
- netdev->name,
- skb->len, skb_headlen(skb),
- skb_shinfo(skb)->nr_frags, skb_is_gso(skb), skb_shinfo(skb)->gso_size);
- }
- memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
- skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE),
- original_first_length));
- iph = (struct iphdr *)
- (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[nhoffset]);
- tcph = (struct tcphdr *)
- (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[hoffset]);
- if ((wqe_count+1)!=(u32)wqes_needed) {
- tcph->fin = 0;
- tcph->psh = 0;
- tcph->rst = 0;
- tcph->urg = 0;
- }
- if (wqe_count) {
- tcph->syn = 0;
- }
- tcph->seq = htonl(curr_tcp_seq);
- wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE),
- original_first_length));
-
- wqe_fragment_index = 1;
- if ((wqe_count==0) && (skb_headlen(skb) > original_first_length)) {
- set_bit(nesnic->sq_head, nesnic->first_frag_overflow);
- bus_address = pci_map_single(nesdev->pcidev, skb->data + original_first_length,
- skb_headlen(skb) - original_first_length, PCI_DMA_TODEVICE);
- wqe_fragment_length[wqe_fragment_index++] =
- cpu_to_le16(skb_headlen(skb) - original_first_length);
- wqe_fragment_length[wqe_fragment_index] = 0;
- set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
- bus_address);
- tso_wqe_length += skb_headlen(skb) -
- original_first_length;
- }
- while (wqe_fragment_index < 5) {
- wqe_fragment_length[wqe_fragment_index] =
- cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index]));
- set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
- (u64)tso_bus_address[tso_frag_index]);
- wqe_fragment_index++;
- tso_wqe_length += skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index++]);
- if (wqe_fragment_index < 5)
- wqe_fragment_length[wqe_fragment_index] = 0;
- if (tso_frag_index == tso_frag_count)
- break;
- }
- if ((wqe_count+1) == (u32)wqes_needed) {
- nesnic->tx_skb[nesnic->sq_head] = skb;
- } else {
- nesnic->tx_skb[nesnic->sq_head] = NULL;
- }
- wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_shinfo(skb)->gso_size;
- if ((tso_wqe_length + original_first_length) > skb_shinfo(skb)->gso_size) {
- wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE;
- } else {
- iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset);
- }
-
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX,
- wqe_misc);
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
- ((u32)tcph->doff) | (((u32)hoffset) << 4));
-
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
- tso_wqe_length + original_first_length);
- curr_tcp_seq += tso_wqe_length;
- nesnic->sq_head++;
- nesnic->sq_head &= nesnic->sq_size-1;
- }
- } else {
- hoffset = skb_transport_header(skb) - skb->data;
- nhoffset = skb_network_header(skb) - skb->data;
- if (skb_linearize(skb)) {
- nesvnic->tx_sw_dropped++;
- kfree_skb(skb);
- return NETDEV_TX_OK;
- }
- nesvnic->linearized_skbs++;
- skb_set_transport_header(skb, hoffset);
- skb_set_network_header(skb, nhoffset);
- if (!nes_nic_send(skb, netdev))
- return NETDEV_TX_OK;
- }
- } else {
- if (!nes_nic_send(skb, netdev))
- return NETDEV_TX_OK;
- }
-
- barrier();
-
- if (wqe_count)
- nes_write32(nesdev->regs+NES_WQE_ALLOC,
- (wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id);
-
- netif_trans_update(netdev);
-
- return NETDEV_TX_OK;
-}
-
-
-/**
- * nes_netdev_get_stats
- */
-static struct net_device_stats *nes_netdev_get_stats(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u64 u64temp;
- u32 u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + (nesvnic->nic_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->endnode_nstat_rx_discard += u32temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + (nesvnic->nic_index*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32;
-
- nesvnic->endnode_nstat_rx_octets += u64temp;
- nesvnic->netstats.rx_bytes += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + (nesvnic->nic_index*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32;
-
- nesvnic->endnode_nstat_rx_frames += u64temp;
- nesvnic->netstats.rx_packets += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + (nesvnic->nic_index*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32;
-
- nesvnic->endnode_nstat_tx_octets += u64temp;
- nesvnic->netstats.tx_bytes += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + (nesvnic->nic_index*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32;
-
- nesvnic->endnode_nstat_tx_frames += u64temp;
- nesvnic->netstats.tx_packets += u64temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_short_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_oversized_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_jabber_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_length_errors += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_crc_errors += u32temp;
- nesvnic->netstats.rx_crc_errors += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_tx_errors += u32temp;
- nesvnic->netstats.tx_errors += u32temp;
-
- return &nesvnic->netstats;
-}
-
-
-/**
- * nes_netdev_tx_timeout
- */
-static void nes_netdev_tx_timeout(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
-
- if (netif_msg_timer(nesvnic))
- nes_debug(NES_DBG_NIC_TX, "%s: tx timeout\n", netdev->name);
-}
-
-
-/**
- * nes_netdev_set_mac_address
- */
-static int nes_netdev_set_mac_address(struct net_device *netdev, void *p)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct sockaddr *mac_addr = p;
- int i;
- u32 macaddr_low;
- u16 macaddr_high;
-
- if (!is_valid_ether_addr(mac_addr->sa_data))
- return -EADDRNOTAVAIL;
-
- memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len);
- printk(PFX "%s: Address length = %d, Address = %pM\n",
- __func__, netdev->addr_len, mac_addr->sa_data);
- macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
- macaddr_high += (u16)netdev->dev_addr[1];
- macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
- macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
- macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
- macaddr_low += (u32)netdev->dev_addr[5];
-
- for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
- if (nesvnic->qp_nic_index[i] == 0xf) {
- break;
- }
- nes_write_indexed(nesdev,
- NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8),
- macaddr_low);
- nes_write_indexed(nesdev,
- NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8),
- (u32)macaddr_high | NES_MAC_ADDR_VALID |
- ((((u32)nesvnic->nic_index) << 16)));
- }
- return 0;
-}
-
-
-static void set_allmulti(struct nes_device *nesdev, u32 nic_active_bit)
-{
- u32 nic_active;
-
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active &= ~nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
-}
-
-#define get_addr(addrs, index) ((addrs) + (index) * ETH_ALEN)
-
-/**
- * nes_netdev_set_multicast_list
- */
-static void nes_netdev_set_multicast_list(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
- u32 nic_active_bit;
- u32 nic_active;
- u32 perfect_filter_register_address;
- u32 macaddr_low;
- u16 macaddr_high;
- u8 mc_all_on = 0;
- u8 mc_index;
- int mc_nic_index = -1;
- u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count *
- nics_per_function, 4);
- u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated;
- unsigned long flags;
- int mc_count = netdev_mc_count(netdev);
-
- spin_lock_irqsave(&nesadapter->resource_lock, flags);
- nic_active_bit = 1 << nesvnic->nic_index;
-
- if (netdev->flags & IFF_PROMISC) {
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
- mc_all_on = 1;
- } else if ((netdev->flags & IFF_ALLMULTI) ||
- (nesvnic->nic_index > 3)) {
- set_allmulti(nesdev, nic_active_bit);
- mc_all_on = 1;
- } else {
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
- nic_active &= ~nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active &= ~nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
- }
-
- nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscuous = %d, All Multicast = %d.\n",
- mc_count, !!(netdev->flags & IFF_PROMISC),
- !!(netdev->flags & IFF_ALLMULTI));
- if (!mc_all_on) {
- char *addrs;
- int i;
- struct netdev_hw_addr *ha;
-
- addrs = kmalloc_array(mc_count, ETH_ALEN, GFP_ATOMIC);
- if (!addrs) {
- set_allmulti(nesdev, nic_active_bit);
- goto unlock;
- }
- i = 0;
- netdev_for_each_mc_addr(ha, netdev)
- memcpy(get_addr(addrs, i++), ha->addr, ETH_ALEN);
-
- perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW +
- pft_entries_preallocated * 0x8;
- for (i = 0, mc_index = 0; mc_index < max_pft_entries_avaiable;
- mc_index++) {
- while (i < mc_count && nesvnic->mcrq_mcast_filter &&
- ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic,
- get_addr(addrs, i++))) == 0));
- if (mc_nic_index < 0)
- mc_nic_index = nesvnic->nic_index;
- while (nesadapter->pft_mcast_map[mc_index] < 16 &&
- nesadapter->pft_mcast_map[mc_index] !=
- nesvnic->nic_index &&
- mc_index < max_pft_entries_avaiable) {
- nes_debug(NES_DBG_NIC_RX,
- "mc_index=%d skipping nic_index=%d, used for=%d\n",
- mc_index, nesvnic->nic_index,
- nesadapter->pft_mcast_map[mc_index]);
- mc_index++;
- }
- if (mc_index >= max_pft_entries_avaiable)
- break;
- if (i < mc_count) {
- char *addr = get_addr(addrs, i++);
-
- nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %pM to register 0x%04X nic_idx=%d\n",
- addr,
- perfect_filter_register_address+(mc_index * 8),
- mc_nic_index);
- macaddr_high = ((u8) addr[0]) << 8;
- macaddr_high += (u8) addr[1];
- macaddr_low = ((u8) addr[2]) << 24;
- macaddr_low += ((u8) addr[3]) << 16;
- macaddr_low += ((u8) addr[4]) << 8;
- macaddr_low += (u8) addr[5];
-
- nes_write_indexed(nesdev,
- perfect_filter_register_address+(mc_index * 8),
- macaddr_low);
- nes_write_indexed(nesdev,
- perfect_filter_register_address+4+(mc_index * 8),
- (u32)macaddr_high | NES_MAC_ADDR_VALID |
- ((((u32)(1<<mc_nic_index)) << 16)));
- nesadapter->pft_mcast_map[mc_index] =
- nesvnic->nic_index;
- } else {
- nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n",
- perfect_filter_register_address+(mc_index * 8));
- nes_write_indexed(nesdev,
- perfect_filter_register_address+4+(mc_index * 8),
- 0);
- nesadapter->pft_mcast_map[mc_index] = 255;
- }
- }
- kfree(addrs);
- /* PFT is not large enough */
- if (i < mc_count)
- set_allmulti(nesdev, nic_active_bit);
- }
-
-unlock:
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
-}
-
-
-/**
- * nes_netdev_change_mtu
- */
-static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u8 jumbomode = 0;
- u32 nic_active;
- u32 nic_active_bit;
- u32 uc_all_active;
- u32 mc_all_active;
-
- netdev->mtu = new_mtu;
- nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN;
-
- if (netdev->mtu > ETH_DATA_LEN) {
- jumbomode=1;
- }
- nes_nic_init_timer_defaults(nesdev, jumbomode);
-
- if (netif_running(netdev)) {
- nic_active_bit = 1 << nesvnic->nic_index;
- mc_all_active = nes_read_indexed(nesdev,
- NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit;
- uc_all_active = nes_read_indexed(nesdev,
- NES_IDX_NIC_UNICAST_ALL) & nic_active_bit;
-
- nes_netdev_stop(netdev);
- nes_netdev_open(netdev);
-
- nic_active = nes_read_indexed(nesdev,
- NES_IDX_NIC_MULTICAST_ALL);
- nic_active |= mc_all_active;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL,
- nic_active);
-
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active |= uc_all_active;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
- }
-
- return 0;
-}
-
-
-static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
- "Link Change Interrupts",
- "Linearized SKBs",
- "T/GSO Requests",
- "Pause Frames Sent",
- "Pause Frames Received",
- "Internal Routing Errors",
- "SQ SW Dropped SKBs",
- "SQ Full",
- "Segmented TSO Requests",
- "Rx Symbol Errors",
- "Rx Jabber Errors",
- "Rx Oversized Frames",
- "Rx Short Frames",
- "Rx Length Errors",
- "Rx CRC Errors",
- "Rx Port Discard",
- "Endnode Rx Discards",
- "Endnode Rx Octets",
- "Endnode Rx Frames",
- "Endnode Tx Octets",
- "Endnode Tx Frames",
- "Tx Errors",
- "mh detected",
- "mh pauses",
- "Retransmission Count",
- "CM Connects",
- "CM Accepts",
- "Disconnects",
- "Connected Events",
- "Connect Requests",
- "CM Rejects",
- "ModifyQP Timeouts",
- "CreateQPs",
- "SW DestroyQPs",
- "DestroyQPs",
- "CM Closes",
- "CM Packets Sent",
- "CM Packets Bounced",
- "CM Packets Created",
- "CM Packets Rcvd",
- "CM Packets Dropped",
- "CM Packets Retrans",
- "CM Listens Created",
- "CM Listens Destroyed",
- "CM Backlog Drops",
- "CM Loopbacks",
- "CM Nodes Created",
- "CM Nodes Destroyed",
- "CM Accel Drops",
- "CM Resets Received",
- "Free 4Kpbls",
- "Free 256pbls",
- "Timer Inits",
- "PAU CreateQPs",
- "PAU DestroyQPs",
-};
-#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset)
-
-
-/**
- * nes_netdev_get_sset_count
- */
-static int nes_netdev_get_sset_count(struct net_device *netdev, int stringset)
-{
- if (stringset == ETH_SS_STATS)
- return NES_ETHTOOL_STAT_COUNT;
- else
- return -EINVAL;
-}
-
-
-/**
- * nes_netdev_get_strings
- */
-static void nes_netdev_get_strings(struct net_device *netdev, u32 stringset,
- u8 *ethtool_strings)
-{
- if (stringset == ETH_SS_STATS)
- memcpy(ethtool_strings,
- &nes_ethtool_stringset,
- sizeof(nes_ethtool_stringset));
-}
-
-
-/**
- * nes_netdev_get_ethtool_stats
- */
-
-static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
- struct ethtool_stats *target_ethtool_stats, u64 *target_stat_values)
-{
- u64 u64temp;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 nic_count;
- u32 u32temp;
- u32 index = 0;
-
- target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT;
- target_stat_values[index] = nesvnic->nesdev->link_status_interrupts;
- target_stat_values[++index] = nesvnic->linearized_skbs;
- target_stat_values[++index] = nesvnic->tso_requests;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_pause_frames_sent += u32temp;
- target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_sent;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_pause_frames_received += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_PORT_RX_DISCARDS + (nesvnic->nesdev->mac_index*0x40));
- nesvnic->nesdev->port_rx_discards += u32temp;
- nesvnic->netstats.rx_dropped += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_PORT_TX_DISCARDS + (nesvnic->nesdev->mac_index*0x40));
- nesvnic->nesdev->port_tx_discards += u32temp;
- nesvnic->netstats.tx_dropped += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_short_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_oversized_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_jabber_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_length_errors += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_crc_errors += u32temp;
- nesvnic->netstats.rx_crc_errors += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_tx_errors += u32temp;
- nesvnic->netstats.tx_errors += u32temp;
-
- for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) {
- if (nesvnic->qp_nic_index[nic_count] == 0xf)
- break;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_DISCARD +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->endnode_nstat_rx_discard += u32temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI +
- (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
-
- nesvnic->endnode_nstat_rx_octets += u64temp;
- nesvnic->netstats.rx_bytes += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI +
- (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
-
- nesvnic->endnode_nstat_rx_frames += u64temp;
- nesvnic->netstats.rx_packets += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI +
- (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
-
- nesvnic->endnode_nstat_tx_octets += u64temp;
- nesvnic->netstats.tx_bytes += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI +
- (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
-
- nesvnic->endnode_nstat_tx_frames += u64temp;
- nesvnic->netstats.tx_packets += u64temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_IPV4_TCP_REXMITS + (nesvnic->qp_nic_index[nic_count]*0x200));
- nesvnic->endnode_ipv4_tcp_retransmits += u32temp;
- }
-
- target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_received;
- target_stat_values[++index] = nesdev->nesadapter->nic_rx_eth_route_err;
- target_stat_values[++index] = nesvnic->tx_sw_dropped;
- target_stat_values[++index] = nesvnic->sq_full;
- target_stat_values[++index] = nesvnic->segmented_tso_requests;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_symbol_err_frames;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_jabber_frames;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_oversized_frames;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_short_frames;
- target_stat_values[++index] = nesvnic->netstats.rx_length_errors;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_crc_errors;
- target_stat_values[++index] = nesvnic->nesdev->port_rx_discards;
- target_stat_values[++index] = nesvnic->endnode_nstat_rx_discard;
- target_stat_values[++index] = nesvnic->endnode_nstat_rx_octets;
- target_stat_values[++index] = nesvnic->endnode_nstat_rx_frames;
- target_stat_values[++index] = nesvnic->endnode_nstat_tx_octets;
- target_stat_values[++index] = nesvnic->endnode_nstat_tx_frames;
- target_stat_values[++index] = nesvnic->nesdev->mac_tx_errors;
- target_stat_values[++index] = mh_detected;
- target_stat_values[++index] = mh_pauses_sent;
- target_stat_values[++index] = nesvnic->endnode_ipv4_tcp_retransmits;
- target_stat_values[++index] = atomic_read(&cm_connects);
- target_stat_values[++index] = atomic_read(&cm_accepts);
- target_stat_values[++index] = atomic_read(&cm_disconnects);
- target_stat_values[++index] = atomic_read(&cm_connecteds);
- target_stat_values[++index] = atomic_read(&cm_connect_reqs);
- target_stat_values[++index] = atomic_read(&cm_rejects);
- target_stat_values[++index] = atomic_read(&mod_qp_timouts);
- target_stat_values[++index] = atomic_read(&qps_created);
- target_stat_values[++index] = atomic_read(&sw_qps_destroyed);
- target_stat_values[++index] = atomic_read(&qps_destroyed);
- target_stat_values[++index] = atomic_read(&cm_closes);
- target_stat_values[++index] = cm_packets_sent;
- target_stat_values[++index] = cm_packets_bounced;
- target_stat_values[++index] = cm_packets_created;
- target_stat_values[++index] = cm_packets_received;
- target_stat_values[++index] = cm_packets_dropped;
- target_stat_values[++index] = cm_packets_retrans;
- target_stat_values[++index] = atomic_read(&cm_listens_created);
- target_stat_values[++index] = atomic_read(&cm_listens_destroyed);
- target_stat_values[++index] = cm_backlog_drops;
- target_stat_values[++index] = atomic_read(&cm_loopbacks);
- target_stat_values[++index] = atomic_read(&cm_nodes_created);
- target_stat_values[++index] = atomic_read(&cm_nodes_destroyed);
- target_stat_values[++index] = atomic_read(&cm_accel_dropped_pkts);
- target_stat_values[++index] = atomic_read(&cm_resets_recvd);
- target_stat_values[++index] = nesadapter->free_4kpbl;
- target_stat_values[++index] = nesadapter->free_256pbl;
- target_stat_values[++index] = int_mod_timer_init;
- target_stat_values[++index] = atomic_read(&pau_qps_created);
- target_stat_values[++index] = atomic_read(&pau_qps_destroyed);
-}
-
-/**
- * nes_netdev_get_drvinfo
- */
-static void nes_netdev_get_drvinfo(struct net_device *netdev,
- struct ethtool_drvinfo *drvinfo)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
-
- strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
- strlcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev),
- sizeof(drvinfo->bus_info));
- snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
- "%u.%u", nesadapter->firmware_version >> 16,
- nesadapter->firmware_version & 0x000000ff);
- strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
-}
-
-
-/**
- * nes_netdev_set_coalesce
- */
-static int nes_netdev_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *et_coalesce)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
- unsigned long flags;
-
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
- if (et_coalesce->rx_max_coalesced_frames_low) {
- shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low;
- }
- if (et_coalesce->rx_max_coalesced_frames_irq) {
- shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq;
- }
- if (et_coalesce->rx_max_coalesced_frames_high) {
- shared_timer->threshold_high = et_coalesce->rx_max_coalesced_frames_high;
- }
- if (et_coalesce->rx_coalesce_usecs_low) {
- shared_timer->timer_in_use_min = et_coalesce->rx_coalesce_usecs_low;
- }
- if (et_coalesce->rx_coalesce_usecs_high) {
- shared_timer->timer_in_use_max = et_coalesce->rx_coalesce_usecs_high;
- }
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
-
- /* using this to drive total interrupt moderation */
- nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq;
- if (et_coalesce->use_adaptive_rx_coalesce) {
- nesadapter->et_use_adaptive_rx_coalesce = 1;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
- nesadapter->et_rx_coalesce_usecs_irq = 0;
- if (et_coalesce->pkt_rate_low) {
- nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low;
- }
- } else {
- nesadapter->et_use_adaptive_rx_coalesce = 0;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
- if (nesadapter->et_rx_coalesce_usecs_irq) {
- nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
- 0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8)));
- }
- }
- return 0;
-}
-
-
-/**
- * nes_netdev_get_coalesce
- */
-static int nes_netdev_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *et_coalesce)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct ethtool_coalesce temp_et_coalesce;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
- unsigned long flags;
-
- memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce));
- temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq;
- temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce;
- temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval;
- temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low;
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
- temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low;
- temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target;
- temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high;
- temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min;
- temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max;
- if (nesadapter->et_use_adaptive_rx_coalesce) {
- temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use;
- }
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
- memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce));
- return 0;
-}
-
-
-/**
- * nes_netdev_get_pauseparam
- */
-static void nes_netdev_get_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *et_pauseparam)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
-
- et_pauseparam->autoneg = 0;
- et_pauseparam->rx_pause = (nesvnic->nesdev->disable_rx_flow_control == 0) ? 1:0;
- et_pauseparam->tx_pause = (nesvnic->nesdev->disable_tx_flow_control == 0) ? 1:0;
-}
-
-
-/**
- * nes_netdev_set_pauseparam
- */
-static int nes_netdev_set_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *et_pauseparam)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u32 u32temp;
-
- if (et_pauseparam->autoneg) {
- /* TODO: should return unsupported */
- return 0;
- }
- if ((et_pauseparam->tx_pause == 1) && (nesdev->disable_tx_flow_control == 1)) {
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
- u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
- nes_write_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp);
- nesdev->disable_tx_flow_control = 0;
- } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) {
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
- u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
- nes_write_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp);
- nesdev->disable_tx_flow_control = 1;
- }
- if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) {
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40));
- u32temp &= ~NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE;
- nes_write_indexed(nesdev,
- NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp);
- nesdev->disable_rx_flow_control = 0;
- } else if ((et_pauseparam->rx_pause == 0) && (nesdev->disable_rx_flow_control == 0)) {
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40));
- u32temp |= NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE;
- nes_write_indexed(nesdev,
- NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp);
- nesdev->disable_rx_flow_control = 1;
- }
-
- return 0;
-}
-
-
-/**
- * nes_netdev_get_settings
- */
-static int nes_netdev_get_link_ksettings(struct net_device *netdev,
- struct ethtool_link_ksettings *cmd)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 mac_index = nesdev->mac_index;
- u8 phy_type = nesadapter->phy_type[mac_index];
- u8 phy_index = nesadapter->phy_index[mac_index];
- u16 phy_data;
- u32 supported, advertising;
-
- cmd->base.duplex = DUPLEX_FULL;
- cmd->base.port = PORT_MII;
-
- if (nesadapter->OneG_Mode) {
- cmd->base.speed = SPEED_1000;
- if (phy_type == NES_PHY_TYPE_PUMA_1G) {
- supported = SUPPORTED_1000baseT_Full;
- advertising = ADVERTISED_1000baseT_Full;
- cmd->base.autoneg = AUTONEG_DISABLE;
- cmd->base.phy_address = mac_index;
- } else {
- unsigned long flags;
-
- supported = SUPPORTED_1000baseT_Full
- | SUPPORTED_Autoneg;
- advertising = ADVERTISED_1000baseT_Full
- | ADVERTISED_Autoneg;
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- if (phy_data & 0x1000)
- cmd->base.autoneg = AUTONEG_ENABLE;
- else
- cmd->base.autoneg = AUTONEG_DISABLE;
- cmd->base.phy_address = phy_index;
- }
- ethtool_convert_legacy_u32_to_link_mode(
- cmd->link_modes.supported, supported);
- ethtool_convert_legacy_u32_to_link_mode(
- cmd->link_modes.advertising, advertising);
- return 0;
- }
- if ((phy_type == NES_PHY_TYPE_ARGUS) ||
- (phy_type == NES_PHY_TYPE_SFP_D) ||
- (phy_type == NES_PHY_TYPE_KR)) {
- cmd->base.port = PORT_FIBRE;
- supported = SUPPORTED_FIBRE;
- advertising = ADVERTISED_FIBRE;
- cmd->base.phy_address = phy_index;
- } else {
- supported = SUPPORTED_10000baseT_Full;
- advertising = ADVERTISED_10000baseT_Full;
- cmd->base.phy_address = mac_index;
- }
- cmd->base.speed = SPEED_10000;
- cmd->base.autoneg = AUTONEG_DISABLE;
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
- supported);
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
- advertising);
-
- return 0;
-}
-
-
-/**
- * nes_netdev_set_settings
- */
-static int
-nes_netdev_set_link_ksettings(struct net_device *netdev,
- const struct ethtool_link_ksettings *cmd)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- if ((nesadapter->OneG_Mode) &&
- (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) {
- unsigned long flags;
- u16 phy_data;
- u8 phy_index = nesadapter->phy_index[nesdev->mac_index];
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- if (cmd->base.autoneg) {
- /* Turn on Full duplex, Autoneg, and restart autonegotiation */
- phy_data |= 0x1300;
- } else {
- /* Turn off autoneg */
- phy_data &= ~0x1000;
- }
- nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- }
-
- return 0;
-}
-
-
-static const struct ethtool_ops nes_ethtool_ops = {
- .get_link = ethtool_op_get_link,
- .get_strings = nes_netdev_get_strings,
- .get_sset_count = nes_netdev_get_sset_count,
- .get_ethtool_stats = nes_netdev_get_ethtool_stats,
- .get_drvinfo = nes_netdev_get_drvinfo,
- .get_coalesce = nes_netdev_get_coalesce,
- .set_coalesce = nes_netdev_set_coalesce,
- .get_pauseparam = nes_netdev_get_pauseparam,
- .set_pauseparam = nes_netdev_set_pauseparam,
- .get_link_ksettings = nes_netdev_get_link_ksettings,
- .set_link_ksettings = nes_netdev_set_link_ksettings,
-};
-
-static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 u32temp;
- unsigned long flags;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
-
- nes_debug(NES_DBG_NETDEV, "%s: %s\n", __func__, netdev->name);
-
- /* Enable/Disable VLAN Stripping */
- u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG);
- if (features & NETIF_F_HW_VLAN_CTAG_RX)
- u32temp &= 0xfdffffff;
- else
- u32temp |= 0x02000000;
-
- nes_write_indexed(nesdev, NES_IDX_PCIX_DIAG, u32temp);
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-}
-
-static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_features_t features)
-{
- /*
- * Since there is no support for separate rx/tx vlan accel
- * enable/disable make sure tx flag is always in same state as rx.
- */
- if (features & NETIF_F_HW_VLAN_CTAG_RX)
- features |= NETIF_F_HW_VLAN_CTAG_TX;
- else
- features &= ~NETIF_F_HW_VLAN_CTAG_TX;
-
- return features;
-}
-
-static int nes_set_features(struct net_device *netdev, netdev_features_t features)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u32 changed = netdev->features ^ features;
-
- if (changed & NETIF_F_HW_VLAN_CTAG_RX)
- nes_vlan_mode(netdev, nesdev, features);
-
- return 0;
-}
-
-static const struct net_device_ops nes_netdev_ops = {
- .ndo_open = nes_netdev_open,
- .ndo_stop = nes_netdev_stop,
- .ndo_start_xmit = nes_netdev_start_xmit,
- .ndo_get_stats = nes_netdev_get_stats,
- .ndo_tx_timeout = nes_netdev_tx_timeout,
- .ndo_set_mac_address = nes_netdev_set_mac_address,
- .ndo_set_rx_mode = nes_netdev_set_multicast_list,
- .ndo_change_mtu = nes_netdev_change_mtu,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_fix_features = nes_fix_features,
- .ndo_set_features = nes_set_features,
-};
-
-/**
- * nes_netdev_init - initialize network device
- */
-struct net_device *nes_netdev_init(struct nes_device *nesdev,
- void __iomem *mmio_addr)
-{
- u64 u64temp;
- struct nes_vnic *nesvnic;
- struct net_device *netdev;
- struct nic_qp_map *curr_qp_map;
- u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index];
-
- netdev = alloc_etherdev(sizeof(struct nes_vnic));
- if (!netdev) {
- printk(KERN_ERR PFX "nesvnic etherdev alloc failed");
- return NULL;
- }
- nesvnic = netdev_priv(netdev);
-
- nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name);
-
- SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev);
-
- netdev->watchdog_timeo = NES_TX_TIMEOUT;
- netdev->irq = nesdev->pcidev->irq;
- netdev->max_mtu = NES_MAX_MTU;
- netdev->hard_header_len = ETH_HLEN;
- netdev->addr_len = ETH_ALEN;
- netdev->type = ARPHRD_ETHER;
- netdev->netdev_ops = &nes_netdev_ops;
- netdev->ethtool_ops = &nes_ethtool_ops;
- netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128);
- nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n");
-
- /* Fill in the port structure */
- nesvnic->netdev = netdev;
- nesvnic->nesdev = nesdev;
- nesvnic->msg_enable = netif_msg_init(debug, default_msg);
- nesvnic->netdev_index = nesdev->netdev_count;
- nesvnic->perfect_filter_index = nesdev->nesadapter->netdev_count;
- nesvnic->max_frame_size = netdev->mtu + netdev->hard_header_len + VLAN_HLEN;
-
- curr_qp_map = nic_qp_mapping_per_function[PCI_FUNC(nesdev->pcidev->devfn)];
- nesvnic->nic.qp_id = curr_qp_map[nesdev->netdev_count].qpid;
- nesvnic->nic_index = curr_qp_map[nesdev->netdev_count].nic_index;
- nesvnic->logical_port = curr_qp_map[nesdev->netdev_count].logical_port;
-
- /* Setup the burned in MAC address */
- u64temp = (u64)nesdev->nesadapter->mac_addr_low;
- u64temp += ((u64)nesdev->nesadapter->mac_addr_high) << 32;
- u64temp += nesvnic->nic_index;
- netdev->dev_addr[0] = (u8)(u64temp>>40);
- netdev->dev_addr[1] = (u8)(u64temp>>32);
- netdev->dev_addr[2] = (u8)(u64temp>>24);
- netdev->dev_addr[3] = (u8)(u64temp>>16);
- netdev->dev_addr[4] = (u8)(u64temp>>8);
- netdev->dev_addr[5] = (u8)u64temp;
-
- netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX;
- if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV))
- netdev->hw_features |= NETIF_F_TSO;
-
- netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX;
-
- nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
- " nic_index = %d, logical_port = %d, mac_index = %d.\n",
- nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id,
- nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index);
-
- if (nesvnic->nesdev->nesadapter->port_count == 1 &&
- nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) {
-
- nesvnic->qp_nic_index[0] = nesvnic->nic_index;
- nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1;
- if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) {
- nesvnic->qp_nic_index[2] = 0xf;
- nesvnic->qp_nic_index[3] = 0xf;
- } else {
- nesvnic->qp_nic_index[2] = nesvnic->nic_index + 2;
- nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3;
- }
- } else {
- if (nesvnic->nesdev->nesadapter->port_count == 2 ||
- (nesvnic->nesdev->nesadapter->port_count == 1 &&
- nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) {
- nesvnic->qp_nic_index[0] = nesvnic->nic_index;
- nesvnic->qp_nic_index[1] = nesvnic->nic_index
- + 2;
- nesvnic->qp_nic_index[2] = 0xf;
- nesvnic->qp_nic_index[3] = 0xf;
- } else {
- nesvnic->qp_nic_index[0] = nesvnic->nic_index;
- nesvnic->qp_nic_index[1] = 0xf;
- nesvnic->qp_nic_index[2] = 0xf;
- nesvnic->qp_nic_index[3] = 0xf;
- }
- }
- nesvnic->next_qp_nic_index = 0;
-
- if (nesdev->netdev_count == 0) {
- nesvnic->rdma_enabled = 1;
- } else {
- nesvnic->rdma_enabled = 0;
- }
- nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id;
- timer_setup(&nesvnic->event_timer, NULL, 0);
- spin_lock_init(&nesvnic->tx_lock);
- spin_lock_init(&nesvnic->port_ibevent_lock);
- nesdev->netdev[nesdev->netdev_count] = netdev;
-
- nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n",
- nesvnic, nesdev->mac_index);
- list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]);
-
- if ((nesdev->netdev_count == 0) &&
- ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
- ((phy_type == NES_PHY_TYPE_PUMA_1G) &&
- (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
- ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
- u32 u32temp;
- u32 link_mask = 0;
- u32 link_val = 0;
- u16 temp_phy_data;
- u16 phy_data = 0;
- unsigned long flags;
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200 * (nesdev->mac_index & 1)));
- if (phy_type != NES_PHY_TYPE_PUMA_1G) {
- u32temp |= 0x00200000;
- nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200 * (nesdev->mac_index & 1)), u32temp);
- }
-
- /* Check and set linkup here. This is for back to back */
- /* configuration where second port won't get link interrupt */
- switch (phy_type) {
- case NES_PHY_TYPE_PUMA_1G:
- if (nesdev->mac_index < 2) {
- link_mask = 0x01010000;
- link_val = 0x01010000;
- } else {
- link_mask = 0x02020000;
- link_val = 0x02020000;
- }
- break;
- case NES_PHY_TYPE_SFP_D:
- spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
- nes_read_10G_phy_reg(nesdev,
- nesdev->nesadapter->phy_index[nesdev->mac_index],
- 1, 0x9003);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- nes_read_10G_phy_reg(nesdev,
- nesdev->nesadapter->phy_index[nesdev->mac_index],
- 3, 0x0021);
- nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- nes_read_10G_phy_reg(nesdev,
- nesdev->nesadapter->phy_index[nesdev->mac_index],
- 3, 0x0021);
- phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
- phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
- break;
- default:
- link_mask = 0x0f1f0000;
- link_val = 0x0f0f0000;
- break;
- }
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200 * (nesdev->mac_index & 1)));
-
- if (phy_type == NES_PHY_TYPE_SFP_D) {
- if (phy_data & 0x0004)
- nesvnic->linkup = 1;
- } else {
- if ((u32temp & link_mask) == link_val)
- nesvnic->linkup = 1;
- }
-
- /* clear the MAC interrupt status, assumes direct logical to physical mapping */
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
- nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp);
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp);
-
- nes_init_phy(nesdev);
- }
-
- nes_vlan_mode(netdev, nesdev, netdev->features);
-
- return netdev;
-}
-
-
-/**
- * nes_netdev_destroy - destroy network device structure
- */
-void nes_netdev_destroy(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
-
- /* make sure 'stop' method is called by Linux stack */
- /* nes_netdev_stop(netdev); */
-
- list_del(&nesvnic->list);
-
- if (nesvnic->of_device_registered) {
- nes_destroy_ofa_device(nesvnic->nesibdev);
- }
-
- free_netdev(netdev);
-}
-
-
-/**
- * nes_nic_cm_xmit -- CM calls this to send out pkts
- */
-int nes_nic_cm_xmit(struct sk_buff *skb, struct net_device *netdev)
-{
- int ret;
-
- skb->dev = netdev;
- ret = dev_queue_xmit(skb);
- if (ret) {
- nes_debug(NES_DBG_CM, "Bad return code from dev_queue_xmit %d\n", ret);
- }
-
- return ret;
-}
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
deleted file mode 100644
index 21b4a8373acf..000000000000
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ /dev/null
@@ -1,916 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/slab.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include "nes.h"
-
-static u16 nes_read16_eeprom(void __iomem *addr, u16 offset);
-
-u32 mh_detected;
-u32 mh_pauses_sent;
-
-static u32 nes_set_pau(struct nes_device *nesdev)
-{
- u32 ret = 0;
- u32 counter;
-
- nes_write_indexed(nesdev, NES_IDX_GPR2, NES_ENABLE_PAU);
- nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
-
- for (counter = 0; counter < NES_PAU_COUNTER; counter++) {
- udelay(30);
- if (!nes_read_indexed(nesdev, NES_IDX_GPR2)) {
- printk(KERN_INFO PFX "PAU is supported.\n");
- break;
- }
- nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
- }
- if (counter == NES_PAU_COUNTER) {
- printk(KERN_INFO PFX "PAU is not supported.\n");
- return -EPERM;
- }
- return ret;
-}
-
-/**
- * nes_read_eeprom_values -
- */
-int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesadapter)
-{
- u32 mac_addr_low;
- u16 mac_addr_high;
- u16 eeprom_data;
- u16 eeprom_offset;
- u16 next_section_address;
- u16 sw_section_ver;
- u8 major_ver = 0;
- u8 minor_ver = 0;
-
- /* TODO: deal with EEPROM endian issues */
- if (nesadapter->firmware_eeprom_offset == 0) {
- /* Read the EEPROM Parameters */
- eeprom_data = nes_read16_eeprom(nesdev->regs, 0);
- nes_debug(NES_DBG_HW, "EEPROM Offset 0 = 0x%04X\n", eeprom_data);
- eeprom_offset = 2 + (((eeprom_data & 0x007f) << 3) <<
- ((eeprom_data & 0x0080) >> 7));
- nes_debug(NES_DBG_HW, "Firmware Offset = 0x%04X\n", eeprom_offset);
- nesadapter->firmware_eeprom_offset = eeprom_offset;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4);
- if (eeprom_data != 0x5746) {
- nes_debug(NES_DBG_HW, "Not a valid Firmware Image = 0x%04X\n", eeprom_data);
- return -1;
- }
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- eeprom_offset += ((eeprom_data & 0x00ff) << 3) << ((eeprom_data & 0x0100) >> 8);
- nes_debug(NES_DBG_HW, "Software Offset = 0x%04X\n", eeprom_offset);
- nesadapter->software_eeprom_offset = eeprom_offset;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4);
- if (eeprom_data != 0x5753) {
- printk("Not a valid Software Image = 0x%04X\n", eeprom_data);
- return -1;
- }
- sw_section_ver = nes_read16_eeprom(nesdev->regs, nesadapter->software_eeprom_offset + 6);
- nes_debug(NES_DBG_HW, "Software section version number = 0x%04X\n",
- sw_section_ver);
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) <<
- ((eeprom_data & 0x0100) >> 8));
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x414d) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) <<
- ((eeprom_data & 0x0100) >> 8));
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x4f52) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x4f52 but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x5746) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5746 but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x5753) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5753 but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x414d) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x464e) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x464e but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 8);
- printk(PFX "Firmware version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data);
- major_ver = (u8)(eeprom_data >> 8);
- minor_ver = (u8)(eeprom_data);
-
- if (nes_drv_opt & NES_DRV_OPT_DISABLE_VIRT_WQ) {
- nes_debug(NES_DBG_HW, "Virtual WQs have been disabled\n");
- } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) {
- nesadapter->virtwq = 1;
- }
- if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3))
- nesadapter->send_term_ok = 1;
-
- if (nes_drv_opt & NES_DRV_OPT_ENABLE_PAU) {
- if (!nes_set_pau(nesdev))
- nesadapter->allow_unaligned_fpdus = 1;
- }
-
- nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
- (u32)((u8)eeprom_data);
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 10);
- printk(PFX "EEPROM version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data);
- nesadapter->eeprom_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
- (u32)((u8)eeprom_data);
-
-no_fw_rev:
- /* eeprom is valid */
- eeprom_offset = nesadapter->software_eeprom_offset;
- eeprom_offset += 8;
- nesadapter->netdev_max = (u8)nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- mac_addr_high = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- mac_addr_low = (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- mac_addr_low <<= 16;
- mac_addr_low += (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "Base MAC Address = 0x%04X%08X\n",
- mac_addr_high, mac_addr_low);
- nes_debug(NES_DBG_HW, "MAC Address count = %u\n", nesadapter->netdev_max);
-
- nesadapter->mac_addr_low = mac_addr_low;
- nesadapter->mac_addr_high = mac_addr_high;
-
- /* Read the Phy Type array */
- eeprom_offset += 10;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->phy_type[0] = (u8)(eeprom_data >> 8);
- nesadapter->phy_type[1] = (u8)eeprom_data;
-
- /* Read the port array */
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->phy_type[2] = (u8)(eeprom_data >> 8);
- nesadapter->phy_type[3] = (u8)eeprom_data;
- /* port_count is set by soft reset reg */
- nes_debug(NES_DBG_HW, "port_count = %u, port 0 -> %u, port 1 -> %u,"
- " port 2 -> %u, port 3 -> %u\n",
- nesadapter->port_count,
- nesadapter->phy_type[0], nesadapter->phy_type[1],
- nesadapter->phy_type[2], nesadapter->phy_type[3]);
-
- /* Read PD config array */
- eeprom_offset += 10;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_size[0] = eeprom_data;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_base[0] = eeprom_data;
- nes_debug(NES_DBG_HW, "PD0 config, size=0x%04x, base=0x%04x\n",
- nesadapter->pd_config_size[0], nesadapter->pd_config_base[0]);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_size[1] = eeprom_data;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_base[1] = eeprom_data;
- nes_debug(NES_DBG_HW, "PD1 config, size=0x%04x, base=0x%04x\n",
- nesadapter->pd_config_size[1], nesadapter->pd_config_base[1]);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_size[2] = eeprom_data;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_base[2] = eeprom_data;
- nes_debug(NES_DBG_HW, "PD2 config, size=0x%04x, base=0x%04x\n",
- nesadapter->pd_config_size[2], nesadapter->pd_config_base[2]);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_size[3] = eeprom_data;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_base[3] = eeprom_data;
- nes_debug(NES_DBG_HW, "PD3 config, size=0x%04x, base=0x%04x\n",
- nesadapter->pd_config_size[3], nesadapter->pd_config_base[3]);
-
- /* Read Rx Pool Size */
- eeprom_offset += 22; /* 46 */
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->rx_pool_size = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "rx_pool_size = 0x%08X\n", nesadapter->rx_pool_size);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->tx_pool_size = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "tx_pool_size = 0x%08X\n", nesadapter->tx_pool_size);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->rx_threshold = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "rx_threshold = 0x%08X\n", nesadapter->rx_threshold);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->tcp_timer_core_clk_divisor = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "tcp_timer_core_clk_divisor = 0x%08X\n",
- nesadapter->tcp_timer_core_clk_divisor);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->iwarp_config = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "iwarp_config = 0x%08X\n", nesadapter->iwarp_config);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->cm_config = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "cm_config = 0x%08X\n", nesadapter->cm_config);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->sws_timer_config = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "sws_timer_config = 0x%08X\n", nesadapter->sws_timer_config);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->tcp_config1 = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "tcp_config1 = 0x%08X\n", nesadapter->tcp_config1);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->wqm_wat = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "wqm_wat = 0x%08X\n", nesadapter->wqm_wat);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->core_clock = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "core_clock = 0x%08X\n", nesadapter->core_clock);
-
- if ((sw_section_ver) && (nesadapter->hw_rev != NE020_REV)) {
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->phy_index[0] = (eeprom_data & 0xff00)>>8;
- nesadapter->phy_index[1] = eeprom_data & 0x00ff;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->phy_index[2] = (eeprom_data & 0xff00)>>8;
- nesadapter->phy_index[3] = eeprom_data & 0x00ff;
- } else {
- nesadapter->phy_index[0] = 4;
- nesadapter->phy_index[1] = 5;
- nesadapter->phy_index[2] = 6;
- nesadapter->phy_index[3] = 7;
- }
- nes_debug(NES_DBG_HW, "Phy address map = 0 > %u, 1 > %u, 2 > %u, 3 > %u\n",
- nesadapter->phy_index[0],nesadapter->phy_index[1],
- nesadapter->phy_index[2],nesadapter->phy_index[3]);
- }
-
- return 0;
-}
-
-
-/**
- * nes_read16_eeprom
- */
-static u16 nes_read16_eeprom(void __iomem *addr, u16 offset)
-{
- writel(NES_EEPROM_READ_REQUEST + (offset >> 1),
- (void __iomem *)addr + NES_EEPROM_COMMAND);
-
- do {
- } while (readl((void __iomem *)addr + NES_EEPROM_COMMAND) &
- NES_EEPROM_READ_REQUEST);
-
- return readw((void __iomem *)addr + NES_EEPROM_DATA);
-}
-
-
-/**
- * nes_write_1G_phy_reg
- */
-void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 data)
-{
- u32 u32temp;
- u32 counter;
-
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x50020000 | data | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-}
-
-
-/**
- * nes_read_1G_phy_reg
- * This routine only issues the read, the data must be read
- * separately.
- */
-void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 *data)
-{
- u32 u32temp;
- u32 counter;
-
- /* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n",
- phy_addr, nesdev->mac_index); */
-
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x60020000 | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1)) {
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
- *data = 0xffff;
- } else {
- *data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- }
-}
-
-
-/**
- * nes_write_10G_phy_reg
- */
-void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg,
- u16 data)
-{
- u32 port_addr;
- u32 u32temp;
- u32 counter;
-
- port_addr = phy_addr;
-
- /* set address */
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-
- /* set data */
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x10020000 | (u32)data | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-}
-
-
-/**
- * nes_read_10G_phy_reg
- * This routine only issues the read, the data must be read
- * separately.
- */
-void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg)
-{
- u32 port_addr;
- u32 u32temp;
- u32 counter;
-
- port_addr = phy_addr;
-
- /* set address */
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-
- /* issue read */
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x30020000 | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-}
-
-
-/**
- * nes_get_cqp_request
- */
-struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
-{
- unsigned long flags;
- struct nes_cqp_request *cqp_request = NULL;
-
- if (!list_empty(&nesdev->cqp_avail_reqs)) {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- if (!list_empty(&nesdev->cqp_avail_reqs)) {
- cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
- struct nes_cqp_request, list);
- list_del_init(&cqp_request->list);
- }
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- if (cqp_request == NULL) {
- cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC);
- if (cqp_request) {
- cqp_request->dynamic = 1;
- INIT_LIST_HEAD(&cqp_request->list);
- }
- }
-
- if (cqp_request) {
- init_waitqueue_head(&cqp_request->waitq);
- cqp_request->waiting = 0;
- cqp_request->request_done = 0;
- cqp_request->callback = 0;
- init_waitqueue_head(&cqp_request->waitq);
- nes_debug(NES_DBG_CQP, "Got cqp request %p from the available list \n",
- cqp_request);
- } else
- printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n",
- __func__);
-
- return cqp_request;
-}
-
-void nes_free_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request)
-{
- unsigned long flags;
-
- nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
- cqp_request,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
-
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
-}
-
-void nes_put_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request)
-{
- if (atomic_dec_and_test(&cqp_request->refcount))
- nes_free_cqp_request(nesdev, cqp_request);
-}
-
-
-/**
- * nes_post_cqp_request
- */
-void nes_post_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- unsigned long flags;
- u32 cqp_head;
- u64 u64temp;
- u32 opcode;
- int ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
-
- if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) &
- (nesdev->cqp.sq_size - 1)) != 1)
- && (list_empty(&nesdev->cqp_pending_reqs))) {
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
- opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]);
- if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
- ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
- barrier();
- u64temp = (unsigned long)cqp_request;
- set_wqe_64bit_value(cqp_wqe->wqe_words, ctx_index, u64temp);
- nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ,"
- " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
- " waiting = %d, refcount = %d.\n",
- opcode & NES_CQP_OPCODE_MASK,
- le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request,
- nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
- cqp_request->waiting, atomic_read(&cqp_request->refcount));
-
- barrier();
-
- /* Ring doorbell (1 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
-
- barrier();
- } else {
- nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X), line 1 = 0x%08X"
- " put on the pending queue.\n",
- cqp_request,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX]));
- list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs);
- }
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-
- return;
-}
-
-/**
- * nes_arp_table
- */
-int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 action)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- int arp_index;
- int err = 0;
- __be32 tmp_addr;
-
- for (arp_index = 0; (u32) arp_index < nesadapter->arp_table_size; arp_index++) {
- if (nesadapter->arp_table[arp_index].ip_addr == ip_addr)
- break;
- }
-
- if (action == NES_ARP_ADD) {
- if (arp_index != nesadapter->arp_table_size) {
- return -1;
- }
-
- arp_index = 0;
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps,
- nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index, NES_RESOURCE_ARP);
- if (err) {
- nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err);
- return err;
- }
- nes_debug(NES_DBG_NETDEV, "ADD, arp_index=%d\n", arp_index);
-
- nesadapter->arp_table[arp_index].ip_addr = ip_addr;
- memcpy(nesadapter->arp_table[arp_index].mac_addr, mac_addr, ETH_ALEN);
- return arp_index;
- }
-
- /* DELETE or RESOLVE */
- if (arp_index == nesadapter->arp_table_size) {
- tmp_addr = cpu_to_be32(ip_addr);
- nes_debug(NES_DBG_NETDEV, "MAC for %pI4 not in ARP table - cannot %s\n",
- &tmp_addr, action == NES_ARP_RESOLVE ? "resolve" : "delete");
- return -1;
- }
-
- if (action == NES_ARP_RESOLVE) {
- nes_debug(NES_DBG_NETDEV, "RESOLVE, arp_index=%d\n", arp_index);
- return arp_index;
- }
-
- if (action == NES_ARP_DELETE) {
- nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
- nesadapter->arp_table[arp_index].ip_addr = 0;
- eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
- nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
- return arp_index;
- }
-
- return -1;
-}
-
-
-/**
- * nes_mh_fix
- */
-void nes_mh_fix(struct timer_list *t)
-{
- struct nes_adapter *nesadapter = from_timer(nesadapter, t, mh_timer);
- struct nes_device *nesdev = nesadapter->nesdev;
- unsigned long flags;
- struct nes_vnic *nesvnic;
- u32 used_chunks_tx;
- u32 temp_used_chunks_tx;
- u32 temp_last_used_chunks_tx;
- u32 used_chunks_mask;
- u32 mac_tx_frames_low;
- u32 mac_tx_frames_high;
- u32 mac_tx_pauses;
- u32 reset_value;
- u32 tx_control;
- u32 tx_config;
- u32 tx_pause_quanta;
- u32 rx_control;
- u32 rx_config;
- u32 mac_exact_match;
- u32 mpp_debug;
- u32 i=0;
- u32 chunks_tx_progress = 0;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- if ((nesadapter->mac_sw_state[0] != NES_MAC_SW_IDLE) || (nesadapter->mac_link_down[0])) {
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- goto no_mh_work;
- }
- nesadapter->mac_sw_state[0] = NES_MAC_SW_MH;
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- do {
- mac_tx_frames_low = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_LOW);
- mac_tx_frames_high = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_HIGH);
- mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
- used_chunks_tx = nes_read_indexed(nesdev, NES_IDX_USED_CHUNKS_TX);
- nesdev->mac_pause_frames_sent += mac_tx_pauses;
- used_chunks_mask = 0;
- temp_used_chunks_tx = used_chunks_tx;
- temp_last_used_chunks_tx = nesdev->last_used_chunks_tx;
-
- if (nesdev->netdev[0]) {
- nesvnic = netdev_priv(nesdev->netdev[0]);
- } else {
- break;
- }
-
- for (i=0; i<4; i++) {
- used_chunks_mask <<= 8;
- if (nesvnic->qp_nic_index[i] != 0xff) {
- used_chunks_mask |= 0xff;
- if ((temp_used_chunks_tx&0xff)<(temp_last_used_chunks_tx&0xff)) {
- chunks_tx_progress = 1;
- }
- }
- temp_used_chunks_tx >>= 8;
- temp_last_used_chunks_tx >>= 8;
- }
- if ((mac_tx_frames_low) || (mac_tx_frames_high) ||
- (!(used_chunks_tx&used_chunks_mask)) ||
- (!(nesdev->last_used_chunks_tx&used_chunks_mask)) ||
- (chunks_tx_progress) ) {
- nesdev->last_used_chunks_tx = used_chunks_tx;
- break;
- }
- nesdev->last_used_chunks_tx = used_chunks_tx;
- barrier();
-
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000005);
- mh_pauses_sent++;
- mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
- if (mac_tx_pauses) {
- nesdev->mac_pause_frames_sent += mac_tx_pauses;
- break;
- }
-
- tx_control = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONTROL);
- tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
- tx_pause_quanta = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA);
- rx_control = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONTROL);
- rx_config = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONFIG);
- mac_exact_match = nes_read_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM);
- mpp_debug = nes_read_indexed(nesdev, NES_IDX_MPP_DEBUG);
-
- /* one last ditch effort to avoid a false positive */
- mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
- if (mac_tx_pauses) {
- nesdev->last_mac_tx_pauses = nesdev->mac_pause_frames_sent;
- nes_debug(NES_DBG_HW, "failsafe caught slow outbound pause\n");
- break;
- }
- mh_detected++;
-
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, 0x00000000);
- reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
-
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value | 0x0000001d);
-
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (i++ < 5000)) {
- /* mdelay(1); */
- }
-
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
- nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0);
-
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000);
- if (nesadapter->OneG_Mode) {
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222);
- } else {
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222);
- }
- nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_STATUS0);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff);
-
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, tx_control);
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA, tx_pause_quanta);
- nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONTROL, rx_control);
- nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONFIG, rx_config);
- nes_write_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM, mac_exact_match);
- nes_write_indexed(nesdev, NES_IDX_MPP_DEBUG, mpp_debug);
-
- } while (0);
-
- nesadapter->mac_sw_state[0] = NES_MAC_SW_IDLE;
-no_mh_work:
- nesdev->nesadapter->mh_timer.expires = jiffies + (HZ/5);
- add_timer(&nesdev->nesadapter->mh_timer);
-}
-
-/**
- * nes_clc
- */
-void nes_clc(struct timer_list *t)
-{
- struct nes_adapter *nesadapter = from_timer(nesadapter, t, lc_timer);
- unsigned long flags;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nesadapter->link_interrupt_count[0] = 0;
- nesadapter->link_interrupt_count[1] = 0;
- nesadapter->link_interrupt_count[2] = 0;
- nesadapter->link_interrupt_count[3] = 0;
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-
- nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */
- add_timer(&nesadapter->lc_timer);
-}
-
-
-/**
- * nes_dump_mem
- */
-void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length)
-{
- if (!(nes_debug_level & dump_debug_level)) {
- return;
- }
-
- if (length > 0x100) {
- nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100);
- length = 0x100;
- }
- nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", addr, length, length);
-
- print_hex_dump(KERN_ERR, PFX, DUMP_PREFIX_NONE, 16, 1, addr, length, true);
-}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
deleted file mode 100644
index 49024326a518..000000000000
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ /dev/null
@@ -1,3759 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/random.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/iw_cm.h>
-#include <rdma/ib_user_verbs.h>
-#include <rdma/uverbs_ioctl.h>
-
-#include "nes.h"
-
-#include <rdma/ib_umem.h>
-
-atomic_t mod_qp_timouts;
-atomic_t qps_created;
-atomic_t sw_qps_destroyed;
-
-static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
-static int nes_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
-
-/**
- * nes_alloc_mw
- */
-static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
- struct ib_udata *udata)
-{
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_cqp_request *cqp_request;
- struct nes_mr *nesmr;
- struct ib_mw *ibmw;
- struct nes_hw_cqp_wqe *cqp_wqe;
- int ret;
- u32 stag;
- u32 stag_index = 0;
- u32 next_stag_index = 0;
- u32 driver_key = 0;
- u8 stag_key = 0;
-
- if (type != IB_MW_TYPE_1)
- return ERR_PTR(-EINVAL);
-
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
-
- driver_key = 0;
-
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_MW);
- if (ret) {
- return ERR_PTR(ret);
- }
-
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
-
- nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
- stag, stag_index);
-
- /* Register the region with the adapter */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- kfree(nesmr);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
- cpu_to_le32( NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_RIGHTS_REMOTE_READ |
- NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_VA_TO |
- NES_CQP_STAG_REM_ACC_EN);
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- stag, ret, cqp_request->major_code, cqp_request->minor_code);
- if ((!ret) || (cqp_request->major_code)) {
- nes_put_cqp_request(nesdev, cqp_request);
- kfree(nesmr);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- if (!ret) {
- return ERR_PTR(-ETIME);
- } else {
- return ERR_PTR(-ENOMEM);
- }
- }
- nes_put_cqp_request(nesdev, cqp_request);
-
- nesmr->ibmw.rkey = stag;
- nesmr->mode = IWNES_MEMREG_TYPE_MW;
- ibmw = &nesmr->ibmw;
- nesmr->pbl_4k = 0;
- nesmr->pbls_used = 0;
-
- return ibmw;
-}
-
-
-/**
- * nes_dealloc_mw
- */
-static int nes_dealloc_mw(struct ib_mw *ibmw)
-{
- struct nes_mr *nesmr = to_nesmw(ibmw);
- struct nes_vnic *nesvnic = to_nesvnic(ibmw->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- int err = 0;
- int ret;
-
- /* Deallocate the window with the adapter */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, NES_CQP_DEALLOCATE_STAG);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n",
- ibmw->rkey);
- ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- ret, cqp_request->major_code, cqp_request->minor_code);
- if (!ret)
- err = -ETIME;
- else if (cqp_request->major_code)
- err = -EIO;
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- (ibmw->rkey & 0x0fffff00) >> 8);
- kfree(nesmr);
-
- return err;
-}
-
-
-/*
- * nes_alloc_fast_mr
- */
-static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
- u32 stag, u32 page_count)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- int ret;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 opcode = 0;
- u16 major_code;
- u64 region_length = page_count * PAGE_SIZE;
-
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, "
- "region_length = %llu\n",
- page_count, region_length);
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (nesadapter->free_4kpbl > 0) {
- nesadapter->free_4kpbl--;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- } else {
- /* No 4kpbl's available: */
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- nes_debug(NES_DBG_MR, "Out of Pbls\n");
- nes_free_cqp_request(nesdev, cqp_request);
- return -ENOMEM;
- }
-
- opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR |
- NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO |
- NES_CQP_STAG_REM_ACC_EN;
- /*
- * The current OFED API does not support the zero based TO option.
- * If added then need to changed the NES_CQP_STAG_VA* option. Also,
- * the API does not support that ability to have the MR set for local
- * access only when created and not allow the SQ op to override. Given
- * this the remote enable must be set here.
- */
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1);
-
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
- cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
- cpu_to_le32(nespd->pd_id & 0x00007fff);
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0);
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8));
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
- barrier();
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq,
- (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT);
-
- nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, "
- "wait_event_timeout ret = %u, CQP Major:Minor codes = "
- "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code,
- cqp_request->minor_code);
- major_code = cqp_request->major_code;
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (!ret || major_code) {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_4kpbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
-
- if (!ret)
- return -ETIME;
- else if (major_code)
- return -EIO;
- return 0;
-}
-
-/*
- * nes_alloc_mr
- */
-static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
-{
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- u32 next_stag_index;
- u8 stag_key = 0;
- u32 driver_key = 0;
- int err = 0;
- u32 stag_index = 0;
- struct nes_mr *nesmr;
- u32 stag;
- int ret;
- struct ib_mr *ibmr;
-
- if (mr_type != IB_MR_TYPE_MEM_REG)
- return ERR_PTR(-EINVAL);
-
- if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
- return ERR_PTR(-E2BIG);
-
-/*
- * Note: Set to always use a fixed length single page entry PBL. This is to allow
- * for the fast_reg_mr operation to always know the size of the PBL.
- */
- if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
- return ERR_PTR(-E2BIG);
-
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index,
- &next_stag_index, NES_RESOURCE_FAST_MR);
- if (err)
- return ERR_PTR(err);
-
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
-
- nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n",
- stag, stag_index);
-
- ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_num_sg);
-
- if (ret == 0) {
- nesmr->ibmr.rkey = stag;
- nesmr->ibmr.lkey = stag;
- nesmr->mode = IWNES_MEMREG_TYPE_FMEM;
- ibmr = &nesmr->ibmr;
- } else {
- kfree(nesmr);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- nesmr->pages = pci_alloc_consistent(nesdev->pcidev,
- max_num_sg * sizeof(u64),
- &nesmr->paddr);
- if (!nesmr->paddr)
- goto err;
-
- nesmr->max_pages = max_num_sg;
-
- return ibmr;
-
-err:
- nes_dereg_mr(ibmr, udata);
-
- return ERR_PTR(-ENOMEM);
-}
-
-static int nes_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct nes_mr *nesmr = to_nesmr(ibmr);
-
- if (unlikely(nesmr->npages == nesmr->max_pages))
- return -ENOMEM;
-
- nesmr->pages[nesmr->npages++] = cpu_to_le64(addr);
-
- return 0;
-}
-
-static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
- int sg_nents, unsigned int *sg_offset)
-{
- struct nes_mr *nesmr = to_nesmr(ibmr);
-
- nesmr->npages = 0;
-
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, nes_set_page);
-}
-
-/**
- * nes_query_device
- */
-static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
- struct ib_udata *uhw)
-{
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_ib_device *nesibdev = nesvnic->nesibdev;
-
- if (uhw->inlen || uhw->outlen)
- return -EINVAL;
-
- memset(props, 0, sizeof(*props));
- memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6);
-
- props->fw_ver = nesdev->nesadapter->firmware_version;
- props->device_cap_flags = nesdev->nesadapter->device_cap_flags;
- props->vendor_id = nesdev->nesadapter->vendor_id;
- props->vendor_part_id = nesdev->nesadapter->vendor_part_id;
- props->hw_ver = nesdev->nesadapter->hw_rev;
- props->max_mr_size = 0x80000000;
- props->max_qp = nesibdev->max_qp;
- props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
- props->max_send_sge = nesdev->nesadapter->max_sge;
- props->max_recv_sge = nesdev->nesadapter->max_sge;
- props->max_cq = nesibdev->max_cq;
- props->max_cqe = nesdev->nesadapter->max_cqe;
- props->max_mr = nesibdev->max_mr;
- props->max_mw = nesibdev->max_mr;
- props->max_pd = nesibdev->max_pd;
- props->max_sge_rd = 1;
- switch (nesdev->nesadapter->max_irrq_wr) {
- case 0:
- props->max_qp_rd_atom = 2;
- break;
- case 1:
- props->max_qp_rd_atom = 8;
- break;
- case 2:
- props->max_qp_rd_atom = 32;
- break;
- case 3:
- props->max_qp_rd_atom = 64;
- break;
- default:
- props->max_qp_rd_atom = 0;
- }
- props->max_qp_init_rd_atom = props->max_qp_rd_atom;
- props->atomic_cap = IB_ATOMIC_NONE;
- props->max_map_per_fmr = 1;
-
- return 0;
-}
-
-
-/**
- * nes_query_port
- */
-static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
-{
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct net_device *netdev = nesvnic->netdev;
-
- /* props being zeroed by the caller, avoid zeroing it here */
-
- props->max_mtu = IB_MTU_4096;
- props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
- props->lid = 1;
- if (netif_queue_stopped(netdev))
- props->state = IB_PORT_DOWN;
- else if (nesvnic->linkup)
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_DOWN;
- props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
- IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
- props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
- props->active_width = IB_WIDTH_4X;
- props->active_speed = IB_SPEED_SDR;
- props->max_msg_sz = 0x80000000;
-
- return 0;
-}
-
-/**
- * nes_query_pkey
- */
-static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
-{
- *pkey = 0;
- return 0;
-}
-
-
-/**
- * nes_query_gid
- */
-static int nes_query_gid(struct ib_device *ibdev, u8 port,
- int index, union ib_gid *gid)
-{
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
-
- memset(&(gid->raw[0]), 0, sizeof(gid->raw));
- memcpy(&(gid->raw[0]), nesvnic->netdev->dev_addr, 6);
-
- return 0;
-}
-
-
-/**
- * nes_alloc_ucontext - Allocate the user context data structure. This keeps track
- * of all objects associated with a particular user-mode client.
- */
-static int nes_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
-{
- struct ib_device *ibdev = uctx->device;
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_alloc_ucontext_req req;
- struct nes_alloc_ucontext_resp uresp = {};
- struct nes_ucontext *nes_ucontext = to_nesucontext(uctx);
- struct nes_ib_device *nesibdev = nesvnic->nesibdev;
-
-
- if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) {
- printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n");
- return -EINVAL;
- }
-
- if (req.userspace_ver != NES_ABI_USERSPACE_VER) {
- printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n",
- req.userspace_ver, NES_ABI_USERSPACE_VER);
- return -EINVAL;
- }
-
-
- uresp.max_qps = nesibdev->max_qp;
- uresp.max_pds = nesibdev->max_pd;
- uresp.wq_size = nesdev->nesadapter->max_qp_wr * 2;
- uresp.virtwq = nesadapter->virtwq;
- uresp.kernel_ver = NES_ABI_KERNEL_VER;
-
- nes_ucontext->nesdev = nesdev;
- nes_ucontext->mmap_wq_offset = uresp.max_pds;
- nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset +
- ((sizeof(struct nes_hw_qp_wqe) * uresp.max_qps * 2) + PAGE_SIZE-1) /
- PAGE_SIZE;
-
-
- if (ib_copy_to_udata(udata, &uresp, sizeof(uresp)))
- return -EFAULT;
-
- INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list);
- INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list);
- return 0;
-}
-
-/**
- * nes_dealloc_ucontext
- */
-static void nes_dealloc_ucontext(struct ib_ucontext *context)
-{
- return;
-}
-
-/**
- * nes_mmap
- */
-static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
- unsigned long index;
- struct nes_vnic *nesvnic = to_nesvnic(context->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- /* struct nes_adapter *nesadapter = nesdev->nesadapter; */
- struct nes_ucontext *nes_ucontext;
- struct nes_qp *nesqp;
-
- nes_ucontext = to_nesucontext(context);
-
-
- if (vma->vm_pgoff >= nes_ucontext->mmap_wq_offset) {
- index = (vma->vm_pgoff - nes_ucontext->mmap_wq_offset) * PAGE_SIZE;
- index /= ((sizeof(struct nes_hw_qp_wqe) * nesdev->nesadapter->max_qp_wr * 2) +
- PAGE_SIZE-1) & (~(PAGE_SIZE-1));
- if (!test_bit(index, nes_ucontext->allocated_wqs)) {
- nes_debug(NES_DBG_MMAP, "wq %lu not allocated\n", index);
- return -EFAULT;
- }
- nesqp = nes_ucontext->mmap_nesqp[index];
- if (nesqp == NULL) {
- nes_debug(NES_DBG_MMAP, "wq %lu has a NULL QP base.\n", index);
- return -EFAULT;
- }
- if (remap_pfn_range(vma, vma->vm_start,
- virt_to_phys(nesqp->hwqp.sq_vbase) >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot)) {
- nes_debug(NES_DBG_MMAP, "remap_pfn_range failed.\n");
- return -EAGAIN;
- }
- vma->vm_private_data = nesqp;
- return 0;
- } else {
- index = vma->vm_pgoff;
- if (!test_bit(index, nes_ucontext->allocated_doorbells))
- return -EFAULT;
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- (nesdev->doorbell_start +
- ((nes_ucontext->mmap_db_index[index] - nesdev->base_doorbell_index) * 4096))
- >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
- vma->vm_private_data = nes_ucontext;
- return 0;
- }
-
- return -ENOSYS;
-}
-
-
-/**
- * nes_alloc_pd
- */
-static int nes_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
-{
- struct ib_device *ibdev = pd->device;
- struct nes_pd *nespd = to_nespd(pd);
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_alloc_pd_resp uresp;
- u32 pd_num = 0;
- int err;
- struct nes_ucontext *nesucontext = rdma_udata_to_drv_context(
- udata, struct nes_ucontext, ibucontext);
-
- nes_debug(
- NES_DBG_PD,
- "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n",
- nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev,
- &nesucontext->ibucontext, netdev_refcnt_read(nesvnic->netdev));
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
- nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD);
- if (err)
- return err;
-
- nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n",
- nespd, dev_name(&nesvnic->nesibdev->ibdev.dev));
-
- nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd;
-
- if (udata) {
- nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells,
- NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db);
- nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n",
- nespd->mmap_db_index, nespd->pd_id);
- if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) {
- nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n");
- nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
- return -ENOMEM;
- }
-
- uresp.pd_id = nespd->pd_id;
- uresp.mmap_db_index = nespd->mmap_db_index;
- if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) {
- nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
- return -EFAULT;
- }
-
- set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells);
- nesucontext->mmap_db_index[nespd->mmap_db_index] = nespd->pd_id;
- nesucontext->first_free_db = nespd->mmap_db_index + 1;
- }
-
- nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd);
- return 0;
-}
-
-
-/**
- * nes_dealloc_pd
- */
-static void nes_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
-{
- struct nes_ucontext *nesucontext;
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- if (udata) {
- nesucontext =
- rdma_udata_to_drv_context(
- udata,
- struct nes_ucontext,
- ibucontext);
- nes_debug(NES_DBG_PD, "Clearing bit %u from allocated doorbells\n",
- nespd->mmap_db_index);
- clear_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells);
- nesucontext->mmap_db_index[nespd->mmap_db_index] = 0;
- if (nesucontext->first_free_db > nespd->mmap_db_index) {
- nesucontext->first_free_db = nespd->mmap_db_index;
- }
- }
-
- nes_debug(NES_DBG_PD, "Deallocating PD%u structure located @%p.\n",
- nespd->pd_id, nespd);
- nes_free_resource(nesadapter, nesadapter->allocated_pds,
- (nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12));
-}
-
-
-/**
- * nes_get_encoded_size
- */
-static inline u8 nes_get_encoded_size(int *size)
-{
- u8 encoded_size = 0;
- if (*size <= 32) {
- *size = 32;
- encoded_size = 1;
- } else if (*size <= 128) {
- *size = 128;
- encoded_size = 2;
- } else if (*size <= 512) {
- *size = 512;
- encoded_size = 3;
- }
- return (encoded_size);
-}
-
-
-
-/**
- * nes_setup_virt_qp
- */
-static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl,
- struct nes_vnic *nesvnic, int sq_size, int rq_size)
-{
- unsigned long flags;
- void *mem;
- __le64 *pbl = NULL;
- __le64 *tpbl;
- __le64 *pblbuffer;
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 pbl_entries;
- u8 rq_pbl_entries;
- u8 sq_pbl_entries;
-
- pbl_entries = nespbl->pbl_size >> 3;
- nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%lx\n",
- nespbl->pbl_size, pbl_entries,
- (void *)nespbl->pbl_vbase,
- (unsigned long) nespbl->pbl_pbase);
- pbl = (__le64 *) nespbl->pbl_vbase; /* points to first pbl entry */
- /* now lets set the sq_vbase as well as rq_vbase addrs we will assign */
- /* the first pbl to be fro the rq_vbase... */
- rq_pbl_entries = (rq_size * sizeof(struct nes_hw_qp_wqe)) >> 12;
- sq_pbl_entries = (sq_size * sizeof(struct nes_hw_qp_wqe)) >> 12;
- nesqp->hwqp.sq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32);
- if (!nespbl->page) {
- nes_debug(NES_DBG_QP, "QP nespbl->page is NULL \n");
- kfree(nespbl);
- return -ENOMEM;
- }
-
- nesqp->hwqp.sq_vbase = kmap(nespbl->page);
- nesqp->page = nespbl->page;
- if (!nesqp->hwqp.sq_vbase) {
- nes_debug(NES_DBG_QP, "QP sq_vbase kmap failed\n");
- kfree(nespbl);
- return -ENOMEM;
- }
-
- /* Now to get to sq.. we need to calculate how many */
- /* PBL entries were used by the rq.. */
- pbl += sq_pbl_entries;
- nesqp->hwqp.rq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32);
- /* nesqp->hwqp.rq_vbase = bus_to_virt(*pbl); */
- /*nesqp->hwqp.rq_vbase = phys_to_virt(*pbl); */
-
- nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%lx rq_vbase=%p rq_pbase=%lx\n",
- nesqp->hwqp.sq_vbase, (unsigned long) nesqp->hwqp.sq_pbase,
- nesqp->hwqp.rq_vbase, (unsigned long) nesqp->hwqp.rq_pbase);
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (!nesadapter->free_256pbl) {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kunmap(nesqp->page);
- kfree(nespbl);
- return -ENOMEM;
- }
- nesadapter->free_256pbl--;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-
- nesqp->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 256, &nesqp->pbl_pbase);
- pblbuffer = nesqp->pbl_vbase;
- if (!nesqp->pbl_vbase) {
- /* memory allocated during nes_reg_user_mr() */
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- kfree(nespbl);
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kunmap(nesqp->page);
- return -ENOMEM;
- }
- memset(nesqp->pbl_vbase, 0, 256);
- /* fill in the page address in the pbl buffer.. */
- tpbl = pblbuffer + 16;
- pbl = (__le64 *)nespbl->pbl_vbase;
- while (sq_pbl_entries--)
- *tpbl++ = *pbl++;
- tpbl = pblbuffer;
- while (rq_pbl_entries--)
- *tpbl++ = *pbl++;
-
- /* done with memory allocated during nes_reg_user_mr() */
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- kfree(nespbl);
-
- nesqp->qp_mem_size =
- max((u32)sizeof(struct nes_qp_context), ((u32)256)) + 256; /* this is Q2 */
- /* Round up to a multiple of a page */
- nesqp->qp_mem_size += PAGE_SIZE - 1;
- nesqp->qp_mem_size &= ~(PAGE_SIZE - 1);
-
- mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- &nesqp->hwqp.q2_pbase);
-
- if (!mem) {
- pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase);
- nesqp->pbl_vbase = NULL;
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kunmap(nesqp->page);
- return -ENOMEM;
- }
- nesqp->sq_kmapped = 1;
- nesqp->hwqp.q2_vbase = mem;
- mem += 256;
- memset(nesqp->hwqp.q2_vbase, 0, 256);
- nesqp->nesqp_context = mem;
- memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context));
- nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256;
-
- return 0;
-}
-
-
-/**
- * nes_setup_mmap_qp
- */
-static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic,
- int sq_size, int rq_size)
-{
- void *mem;
- struct nes_device *nesdev = nesvnic->nesdev;
-
- nesqp->qp_mem_size = (sizeof(struct nes_hw_qp_wqe) * sq_size) +
- (sizeof(struct nes_hw_qp_wqe) * rq_size) +
- max((u32)sizeof(struct nes_qp_context), ((u32)256)) +
- 256; /* this is Q2 */
- /* Round up to a multiple of a page */
- nesqp->qp_mem_size += PAGE_SIZE - 1;
- nesqp->qp_mem_size &= ~(PAGE_SIZE - 1);
-
- mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- &nesqp->hwqp.sq_pbase);
- if (!mem)
- return -ENOMEM;
- nes_debug(NES_DBG_QP, "PCI consistent memory for "
- "host descriptor rings located @ %p (pa = 0x%08lX.) size = %u.\n",
- mem, (unsigned long)nesqp->hwqp.sq_pbase, nesqp->qp_mem_size);
-
- memset(mem, 0, nesqp->qp_mem_size);
-
- nesqp->hwqp.sq_vbase = mem;
- mem += sizeof(struct nes_hw_qp_wqe) * sq_size;
-
- nesqp->hwqp.rq_vbase = mem;
- nesqp->hwqp.rq_pbase = nesqp->hwqp.sq_pbase +
- sizeof(struct nes_hw_qp_wqe) * sq_size;
- mem += sizeof(struct nes_hw_qp_wqe) * rq_size;
-
- nesqp->hwqp.q2_vbase = mem;
- nesqp->hwqp.q2_pbase = nesqp->hwqp.rq_pbase +
- sizeof(struct nes_hw_qp_wqe) * rq_size;
- mem += 256;
- memset(nesqp->hwqp.q2_vbase, 0, 256);
-
- nesqp->nesqp_context = mem;
- nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256;
- memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context));
- return 0;
-}
-
-
-/**
- * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory.
- */
-static void nes_free_qp_mem(struct nes_device *nesdev,
- struct nes_qp *nesqp, int virt_wqs)
-{
- unsigned long flags;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- if (!virt_wqs) {
- pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase);
- }else {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
- pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase );
- nesqp->pbl_vbase = NULL;
- if (nesqp->sq_kmapped) {
- nesqp->sq_kmapped = 0;
- kunmap(nesqp->page);
- }
- }
-}
-
-
-/**
- * nes_create_qp
- */
-static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr, struct ib_udata *udata)
-{
- u64 u64temp= 0;
- u64 u64nesqp = 0;
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_qp *nesqp;
- struct nes_cq *nescq;
- struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context(
- udata, struct nes_ucontext, ibucontext);
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- struct nes_create_qp_req req;
- struct nes_create_qp_resp uresp;
- struct nes_pbl *nespbl = NULL;
- u32 qp_num = 0;
- u32 opcode = 0;
- /* u32 counter = 0; */
- void *mem;
- unsigned long flags;
- int ret;
- int err;
- int virt_wqs = 0;
- int sq_size;
- int rq_size;
- u8 sq_encoded_size;
- u8 rq_encoded_size;
- /* int counter; */
-
- if (init_attr->create_flags)
- return ERR_PTR(-EINVAL);
-
- atomic_inc(&qps_created);
- switch (init_attr->qp_type) {
- case IB_QPT_RC:
- if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) {
- init_attr->cap.max_inline_data = 0;
- } else {
- init_attr->cap.max_inline_data = 64;
- }
- sq_size = init_attr->cap.max_send_wr;
- rq_size = init_attr->cap.max_recv_wr;
-
- /* check if the encoded sizes are OK or not... */
- sq_encoded_size = nes_get_encoded_size(&sq_size);
- rq_encoded_size = nes_get_encoded_size(&rq_size);
-
- if ((!sq_encoded_size) || (!rq_encoded_size)) {
- nes_debug(NES_DBG_QP, "ERROR bad rq (%u) or sq (%u) size\n",
- rq_size, sq_size);
- return ERR_PTR(-EINVAL);
- }
-
- init_attr->cap.max_send_wr = sq_size -2;
- init_attr->cap.max_recv_wr = rq_size -1;
- nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size);
-
- ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps,
- nesadapter->max_qp, &qp_num, &nesadapter->next_qp, NES_RESOURCE_QP);
- if (ret) {
- return ERR_PTR(ret);
- }
-
- /* Need 512 (actually now 1024) byte alignment on this structure */
- mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL);
- if (!mem) {
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- return ERR_PTR(-ENOMEM);
- }
- u64nesqp = (unsigned long)mem;
- u64nesqp += ((u64)NES_SW_CONTEXT_ALIGN) - 1;
- u64temp = ((u64)NES_SW_CONTEXT_ALIGN) - 1;
- u64nesqp &= ~u64temp;
- nesqp = (struct nes_qp *)(unsigned long)u64nesqp;
- /* nes_debug(NES_DBG_QP, "nesqp=%p, allocated buffer=%p. Rounded to closest %u\n",
- nesqp, mem, NES_SW_CONTEXT_ALIGN); */
- nesqp->allocated_buffer = mem;
-
- if (udata) {
- if (ib_copy_from_udata(&req, udata, sizeof(struct nes_create_qp_req))) {
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- kfree(nesqp->allocated_buffer);
- nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n");
- return ERR_PTR(-EFAULT);
- }
- if (req.user_wqe_buffers) {
- virt_wqs = 1;
- }
- if (req.user_qp_buffer)
- nesqp->nesuqp_addr = req.user_qp_buffer;
-
- nesqp->user_mode = 1;
- if (virt_wqs) {
- err = 1;
- list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) {
- if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) {
- list_del(&nespbl->list);
- err = 0;
- nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n",
- nespbl, nespbl->user_base);
- break;
- }
- }
- if (err) {
- nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n",
- (long long unsigned int)req.user_wqe_buffers);
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-EFAULT);
- }
- }
-
- nesqp->mmap_sq_db_index =
- find_next_zero_bit(nes_ucontext->allocated_wqs,
- NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq);
- /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n",
- nespd->mmap_db_index); */
- if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) {
- nes_debug(NES_DBG_QP,
- "db index > max user regions, failing create QP\n");
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- if (virt_wqs) {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- kfree(nespbl);
- }
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-ENOMEM);
- }
- set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs);
- nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp;
- nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1;
- }
- err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) :
- nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size);
- if (err) {
- nes_debug(NES_DBG_QP,
- "error geting qp mem code = %d\n", err);
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-ENOMEM);
- }
-
- nesqp->hwqp.sq_size = sq_size;
- nesqp->hwqp.sq_encoded_size = sq_encoded_size;
- nesqp->hwqp.sq_head = 1;
- nesqp->hwqp.rq_size = rq_size;
- nesqp->hwqp.rq_encoded_size = rq_encoded_size;
- /* nes_debug(NES_DBG_QP, "nesqp->nesqp_context_pbase = %p\n",
- (void *)nesqp->nesqp_context_pbase);
- */
- nesqp->hwqp.qp_id = qp_num;
- nesqp->ibqp.qp_num = nesqp->hwqp.qp_id;
- nesqp->nespd = nespd;
-
- nescq = to_nescq(init_attr->send_cq);
- nesqp->nesscq = nescq;
- nescq = to_nescq(init_attr->recv_cq);
- nesqp->nesrcq = nescq;
-
- nesqp->nesqp_context->misc |= cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) <<
- NES_QPCONTEXT_MISC_PCI_FCN_SHIFT);
- nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.rq_encoded_size <<
- NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT);
- nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size <<
- NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT);
- if (!udata) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN);
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN);
- }
- nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number +
- ((u32)nesqp->nesrcq->hw_cq.cq_number << 16));
- u64temp = (u64)nesqp->hwqp.sq_pbase;
- nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
-
-
- if (!virt_wqs) {
- u64temp = (u64)nesqp->hwqp.sq_pbase;
- nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
- u64temp = (u64)nesqp->hwqp.rq_pbase;
- nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
- } else {
- u64temp = (u64)nesqp->pbl_pbase;
- nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
- }
-
- /* nes_debug(NES_DBG_QP, "next_qp_nic_index=%u, using nic_index=%d\n",
- nesvnic->next_qp_nic_index,
- nesvnic->qp_nic_index[nesvnic->next_qp_nic_index]); */
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] <<
- NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT);
- nesvnic->next_qp_nic_index++;
- if ((nesvnic->next_qp_nic_index > 3) ||
- (nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] == 0xf)) {
- nesvnic->next_qp_nic_index = 0;
- }
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-
- nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32((u32)nesqp->nespd->pd_id << 16);
- u64temp = (u64)nesqp->hwqp.q2_pbase;
- nesqp->nesqp_context->q2_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->q2_addr_high = cpu_to_le32((u32)(u64temp >> 32));
- nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((unsigned long)(nesqp)));
- nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp))));
- nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM |
- NES_QPCONTEXT_ORDIRD_AAH |
- ((((u32)nesadapter->max_irrq_wr) <<
- NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK));
- if (disable_mpa_crc) {
- nes_debug(NES_DBG_QP, "Disabling MPA crc checking due to module option.\n");
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(NES_QPCONTEXT_ORDIRD_RNMC);
- }
-
-
- /* Create the QP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_QP, "Failed to get a cqp_request\n");
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- nes_free_qp_mem(nesdev, nesqp,virt_wqs);
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-ENOMEM);
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- if (!virt_wqs) {
- opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP |
- NES_CQP_QP_IWARP_STATE_IDLE;
- } else {
- opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_VIRT_WQS |
- NES_CQP_QP_IWARP_STATE_IDLE;
- }
- opcode |= NES_CQP_QP_CQS_VALID;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
-
- u64temp = (u64)nesqp->nesqp_context_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n",
- nesqp->hwqp.qp_id);
- ret = wait_event_timeout(cqp_request->waitq,
- (cqp_request->request_done != 0), NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_QP, "Create iwarp QP%u completed, wait_event_timeout ret=%u,"
- " nesdev->cqp_head = %u, nesdev->cqp.sq_tail = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail,
- cqp_request->major_code, cqp_request->minor_code);
- if ((!ret) || (cqp_request->major_code)) {
- nes_put_cqp_request(nesdev, cqp_request);
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- nes_free_qp_mem(nesdev, nesqp,virt_wqs);
- kfree(nesqp->allocated_buffer);
- if (!ret) {
- return ERR_PTR(-ETIME);
- } else {
- return ERR_PTR(-EIO);
- }
- }
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (udata) {
- uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
- uresp.mmap_rq_db_index = 0;
- uresp.actual_sq_size = sq_size;
- uresp.actual_rq_size = rq_size;
- uresp.qp_id = nesqp->hwqp.qp_id;
- uresp.nes_drv_opt = nes_drv_opt;
- if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- nes_free_qp_mem(nesdev, nesqp,virt_wqs);
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-EFAULT);
- }
- }
-
- nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n",
- nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp));
- spin_lock_init(&nesqp->lock);
- nes_add_ref(&nesqp->ibqp);
- break;
- default:
- nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type);
- return ERR_PTR(-EINVAL);
- }
- init_completion(&nesqp->sq_drained);
- init_completion(&nesqp->rq_drained);
-
- nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
- timer_setup(&nesqp->terminate_timer, nes_terminate_timeout, 0);
-
- /* update the QP table */
- nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
- nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
- netdev_refcnt_read(nesvnic->netdev));
-
- return &nesqp->ibqp;
-}
-
-/**
- * nes_clean_cq
- */
-static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq)
-{
- u32 cq_head;
- u32 lo;
- u32 hi;
- u64 u64temp;
- unsigned long flags = 0;
-
- spin_lock_irqsave(&nescq->lock, flags);
-
- cq_head = nescq->hw_cq.cq_head;
- while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
- rmb();
- lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
- hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]);
- u64temp = (((u64)hi) << 32) | ((u64)lo);
- u64temp &= ~(NES_SW_CONTEXT_ALIGN-1);
- if (u64temp == (u64)(unsigned long)nesqp) {
- /* Zero the context value so cqe will be ignored */
- nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0;
- nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0;
- }
-
- if (++cq_head >= nescq->hw_cq.cq_size)
- cq_head = 0;
- }
-
- spin_unlock_irqrestore(&nescq->lock, flags);
-}
-
-
-/**
- * nes_destroy_qp
- */
-static int nes_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_ucontext *nes_ucontext;
- struct ib_qp_attr attr;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- int ret = 0;
-
- atomic_inc(&sw_qps_destroyed);
- nesqp->destroyed = 1;
-
- /* Blow away the connection if it exists. */
- if (nesqp->ibqp_state >= IB_QPS_INIT && nesqp->ibqp_state <= IB_QPS_RTS) {
- /* if (nesqp->ibqp_state == IB_QPS_RTS) { */
- attr.qp_state = IB_QPS_ERR;
- nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
- }
-
- if (((nesqp->ibqp_state == IB_QPS_INIT) ||
- (nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) {
- cm_id = nesqp->cm_id;
- cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = -ETIMEDOUT;
- cm_event.local_addr = cm_id->local_addr;
- cm_event.remote_addr = cm_id->remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
- nes_debug(NES_DBG_QP, "Generating a CM Timeout Event for "
- "QP%u. cm_id = %p, refcount = %u. \n",
- nesqp->hwqp.qp_id, cm_id, atomic_read(&nesqp->refcount));
-
- cm_id->rem_ref(cm_id);
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret);
- }
-
- if (nesqp->user_mode) {
- if (udata) {
- nes_ucontext =
- rdma_udata_to_drv_context(
- udata,
- struct nes_ucontext,
- ibucontext);
- clear_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs);
- nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = NULL;
- if (nes_ucontext->first_free_wq > nesqp->mmap_sq_db_index) {
- nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index;
- }
- }
- if (nesqp->pbl_pbase && nesqp->sq_kmapped) {
- nesqp->sq_kmapped = 0;
- kunmap(nesqp->page);
- }
- } else {
- /* Clean any pending completions from the cq(s) */
- if (nesqp->nesscq)
- nes_clean_cq(nesqp, nesqp->nesscq);
-
- if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq))
- nes_clean_cq(nesqp, nesqp->nesrcq);
- }
- nes_rem_ref(&nesqp->ibqp);
- return 0;
-}
-
-
-/**
- * nes_create_cq
- */
-static struct ib_cq *nes_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
-{
- int entries = attr->cqe;
- u64 u64temp;
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_cq *nescq;
- struct nes_ucontext *nes_ucontext = NULL;
- struct nes_cqp_request *cqp_request;
- void *mem = NULL;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_pbl *nespbl = NULL;
- struct nes_create_cq_req req;
- struct nes_create_cq_resp resp;
- u32 cq_num = 0;
- u32 opcode = 0;
- u32 pbl_entries = 1;
- int err;
- unsigned long flags;
- int ret;
-
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
- if (entries > nesadapter->max_cqe)
- return ERR_PTR(-EINVAL);
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
- nesadapter->max_cq, &cq_num, &nesadapter->next_cq, NES_RESOURCE_CQ);
- if (err) {
- return ERR_PTR(err);
- }
-
- nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL);
- if (!nescq) {
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- return ERR_PTR(-ENOMEM);
- }
-
- nescq->hw_cq.cq_size = max(entries + 1, 5);
- nescq->hw_cq.cq_number = cq_num;
- nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1;
-
- if (udata) {
- struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context(
- udata, struct nes_ucontext, ibucontext);
-
- if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) {
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-EFAULT);
- }
- nesvnic->mcrq_ucontext = nes_ucontext;
- nes_ucontext->mcrqf = req.mcrqf;
- if (nes_ucontext->mcrqf) {
- if (nes_ucontext->mcrqf & 0x80000000)
- nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 28 + 2 * ((nes_ucontext->mcrqf & 0xf) - 1);
- else if (nes_ucontext->mcrqf & 0x40000000)
- nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff;
- else
- nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1;
- nescq->mcrqf = nes_ucontext->mcrqf;
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- }
- nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n",
- (unsigned long)req.user_cq_buffer, entries);
- err = 1;
- list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) {
- if (nespbl->user_base == (unsigned long )req.user_cq_buffer) {
- list_del(&nespbl->list);
- err = 0;
- nes_debug(NES_DBG_CQ, "Found PBL for virtual CQ. nespbl=%p.\n",
- nespbl);
- break;
- }
- }
- if (err) {
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-EFAULT);
- }
-
- pbl_entries = nespbl->pbl_size >> 3;
- nescq->cq_mem_size = 0;
- } else {
- nescq->cq_mem_size = nescq->hw_cq.cq_size * sizeof(struct nes_hw_cqe);
- nes_debug(NES_DBG_CQ, "Attempting to allocate pci memory (%u entries, %u bytes) for CQ%u.\n",
- entries, nescq->cq_mem_size, nescq->hw_cq.cq_number);
-
- /* allocate the physical buffer space */
- mem = pci_zalloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
- &nescq->hw_cq.cq_pbase);
- if (!mem) {
- printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n");
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-ENOMEM);
- }
-
- nescq->hw_cq.cq_vbase = mem;
- nescq->hw_cq.cq_head = 0;
- nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n",
- nescq->hw_cq.cq_number, nescq->hw_cq.cq_vbase,
- (u32)nescq->hw_cq.cq_pbase);
- }
-
- nescq->hw_cq.ce_handler = nes_iwarp_ce_handler;
- spin_lock_init(&nescq->lock);
-
- /* send CreateCQ request to CQP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n");
- if (!udata)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
- nescq->hw_cq.cq_pbase);
- else {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
- nespbl->pbl_vbase, nespbl->pbl_pbase);
- kfree(nespbl);
- }
-
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-ENOMEM);
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- opcode = NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
- NES_CQP_CQ_CHK_OVERFLOW |
- NES_CQP_CQ_CEQE_MASK | ((u32)nescq->hw_cq.cq_size << 16);
-
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-
- if (pbl_entries != 1) {
- if (pbl_entries > 32) {
- /* use 4k pbl */
- nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries);
- if (nesadapter->free_4kpbl == 0) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- nes_free_cqp_request(nesdev, cqp_request);
- if (!udata)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
- nescq->hw_cq.cq_pbase);
- else {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
- nespbl->pbl_vbase, nespbl->pbl_pbase);
- kfree(nespbl);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-ENOMEM);
- } else {
- opcode |= (NES_CQP_CQ_VIRT | NES_CQP_CQ_4KB_CHUNK);
- nescq->virtual_cq = 2;
- nesadapter->free_4kpbl--;
- }
- } else {
- /* use 256 byte pbl */
- nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries);
- if (nesadapter->free_256pbl == 0) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- nes_free_cqp_request(nesdev, cqp_request);
- if (!udata)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
- nescq->hw_cq.cq_pbase);
- else {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
- nespbl->pbl_vbase, nespbl->pbl_pbase);
- kfree(nespbl);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-ENOMEM);
- } else {
- opcode |= NES_CQP_CQ_VIRT;
- nescq->virtual_cq = 1;
- nesadapter->free_256pbl--;
- }
- }
- }
-
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16)));
-
- if (udata) {
- if (pbl_entries != 1)
- u64temp = (u64)nespbl->pbl_pbase;
- else
- u64temp = le64_to_cpu(nespbl->pbl_vbase[0]);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX,
- nes_ucontext->mmap_db_index[0]);
- } else {
- u64temp = (u64)nescq->hw_cq.cq_pbase;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
- }
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
- u64temp = (u64)(unsigned long)&nescq->hw_cq;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] =
- cpu_to_le32((u32)(u64temp >> 1));
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
- cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n",
- nescq->hw_cq.cq_number);
- ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT * 2);
- nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n",
- nescq->hw_cq.cq_number, ret);
- if ((!ret) || (cqp_request->major_code)) {
- nes_put_cqp_request(nesdev, cqp_request);
- if (!udata)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
- nescq->hw_cq.cq_pbase);
- else {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
- nespbl->pbl_vbase, nespbl->pbl_pbase);
- kfree(nespbl);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-EIO);
- }
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (udata) {
- /* free the nespbl */
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- kfree(nespbl);
- resp.cq_id = nescq->hw_cq.cq_number;
- resp.cq_size = nescq->hw_cq.cq_size;
- resp.mmap_db_index = 0;
- if (ib_copy_to_udata(udata, &resp, sizeof resp - sizeof resp.reserved)) {
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-EFAULT);
- }
- }
-
- return &nescq->ibcq;
-}
-
-
-/**
- * nes_destroy_cq
- */
-static int nes_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
-{
- struct nes_cq *nescq;
- struct nes_device *nesdev;
- struct nes_vnic *nesvnic;
- struct nes_adapter *nesadapter;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- u32 opcode = 0;
- int ret;
-
- if (ib_cq == NULL)
- return 0;
-
- nescq = to_nescq(ib_cq);
- nesvnic = to_nesvnic(ib_cq->device);
- nesdev = nesvnic->nesdev;
- nesadapter = nesdev->nesadapter;
-
- nes_debug(NES_DBG_CQ, "Destroy CQ%u\n", nescq->hw_cq.cq_number);
-
- /* Send DestroyCQ request to CQP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
- opcode = NES_CQP_DESTROY_CQ | (nescq->hw_cq.cq_size << 16);
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (nescq->virtual_cq == 1) {
- nesadapter->free_256pbl++;
- if (nesadapter->free_256pbl > nesadapter->max_256pbl) {
- printk(KERN_ERR PFX "%s: free 256B PBLs(%u) has exceeded the max(%u)\n",
- __func__, nesadapter->free_256pbl, nesadapter->max_256pbl);
- }
- } else if (nescq->virtual_cq == 2) {
- nesadapter->free_4kpbl++;
- if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) {
- printk(KERN_ERR PFX "%s: free 4K PBLs(%u) has exceeded the max(%u)\n",
- __func__, nesadapter->free_4kpbl, nesadapter->max_4kpbl);
- }
- opcode |= NES_CQP_CQ_4KB_CHUNK;
- }
-
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16)));
- if (!nescq->mcrqf)
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n",
- nescq->hw_cq.cq_number);
- ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_CQ, "Destroy iWARP CQ%u completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- nescq->hw_cq.cq_number, ret, cqp_request->major_code,
- cqp_request->minor_code);
- if (!ret) {
- nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
- nescq->hw_cq.cq_number);
- ret = -ETIME;
- } else if (cqp_request->major_code) {
- nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n",
- nescq->hw_cq.cq_number);
- ret = -EIO;
- } else {
- ret = 0;
- }
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (nescq->cq_mem_size)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size,
- nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase);
- kfree(nescq);
-
- return ret;
-}
-
-/**
- * root_256
- */
-static u32 root_256(struct nes_device *nesdev,
- struct nes_root_vpbl *root_vpbl,
- struct nes_root_vpbl *new_root,
- u16 pbl_count_4k)
-{
- u64 leaf_pbl;
- int i, j, k;
-
- if (pbl_count_4k == 1) {
- new_root->pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
- 512, &new_root->pbl_pbase);
-
- if (new_root->pbl_vbase == NULL)
- return 0;
-
- leaf_pbl = (u64)root_vpbl->pbl_pbase;
- for (i = 0; i < 16; i++) {
- new_root->pbl_vbase[i].pa_low =
- cpu_to_le32((u32)leaf_pbl);
- new_root->pbl_vbase[i].pa_high =
- cpu_to_le32((u32)((((u64)leaf_pbl) >> 32)));
- leaf_pbl += 256;
- }
- } else {
- for (i = 3; i >= 0; i--) {
- j = i * 16;
- root_vpbl->pbl_vbase[j] = root_vpbl->pbl_vbase[i];
- leaf_pbl = le32_to_cpu(root_vpbl->pbl_vbase[j].pa_low) +
- (((u64)le32_to_cpu(root_vpbl->pbl_vbase[j].pa_high))
- << 32);
- for (k = 1; k < 16; k++) {
- leaf_pbl += 256;
- root_vpbl->pbl_vbase[j + k].pa_low =
- cpu_to_le32((u32)leaf_pbl);
- root_vpbl->pbl_vbase[j + k].pa_high =
- cpu_to_le32((u32)((((u64)leaf_pbl) >> 32)));
- }
- }
- }
-
- return 1;
-}
-
-
-/**
- * nes_reg_mr
- */
-static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
- u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl,
- dma_addr_t single_buffer, u16 pbl_count_4k,
- u16 residual_page_count_4k, int acc, u64 *iova_start,
- u16 *actual_pbl_cnt, u8 *used_4k_pbls)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- int ret;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- uint pg_cnt = 0;
- u16 pbl_count_256 = 0;
- u16 pbl_count = 0;
- u8 use_256_pbls = 0;
- u8 use_4k_pbls = 0;
- u16 use_two_level = (pbl_count_4k > 1) ? 1 : 0;
- struct nes_root_vpbl new_root = { 0, NULL, NULL };
- u32 opcode = 0;
- u16 major_code;
-
- /* Register the region with the adapter */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- if (pbl_count_4k) {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-
- pg_cnt = ((pbl_count_4k - 1) * 512) + residual_page_count_4k;
- pbl_count_256 = (pg_cnt + 31) / 32;
- if (pg_cnt <= 32) {
- if (pbl_count_256 <= nesadapter->free_256pbl)
- use_256_pbls = 1;
- else if (pbl_count_4k <= nesadapter->free_4kpbl)
- use_4k_pbls = 1;
- } else if (pg_cnt <= 2048) {
- if (((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) &&
- (nesadapter->free_4kpbl > (nesadapter->max_4kpbl >> 1))) {
- use_4k_pbls = 1;
- } else if ((pbl_count_256 + 1) <= nesadapter->free_256pbl) {
- use_256_pbls = 1;
- use_two_level = 1;
- } else if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) {
- use_4k_pbls = 1;
- }
- } else {
- if ((pbl_count_4k + 1) <= nesadapter->free_4kpbl)
- use_4k_pbls = 1;
- }
-
- if (use_256_pbls) {
- pbl_count = pbl_count_256;
- nesadapter->free_256pbl -= pbl_count + use_two_level;
- } else if (use_4k_pbls) {
- pbl_count = pbl_count_4k;
- nesadapter->free_4kpbl -= pbl_count + use_two_level;
- } else {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- nes_debug(NES_DBG_MR, "Out of Pbls\n");
- nes_free_cqp_request(nesdev, cqp_request);
- return -ENOMEM;
- }
-
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
-
- if (use_256_pbls && use_two_level) {
- if (root_256(nesdev, root_vpbl, &new_root, pbl_count_4k) == 1) {
- if (new_root.pbl_pbase != 0)
- root_vpbl = &new_root;
- } else {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl += pbl_count_256 + use_two_level;
- use_256_pbls = 0;
-
- if (pbl_count_4k == 1)
- use_two_level = 0;
- pbl_count = pbl_count_4k;
-
- if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) {
- nesadapter->free_4kpbl -= pbl_count + use_two_level;
- use_4k_pbls = 1;
- }
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-
- if (use_4k_pbls == 0)
- return -ENOMEM;
- }
- }
-
- opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ |
- NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
- if (acc & IB_ACCESS_LOCAL_WRITE)
- opcode |= NES_CQP_STAG_RIGHTS_LOCAL_WRITE;
- if (acc & IB_ACCESS_REMOTE_WRITE)
- opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_REM_ACC_EN;
- if (acc & IB_ACCESS_REMOTE_READ)
- opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_REM_ACC_EN;
- if (acc & IB_ACCESS_MW_BIND)
- opcode |= NES_CQP_STAG_RIGHTS_WINDOW_BIND | NES_CQP_STAG_REM_ACC_EN;
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, *iova_start);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, region_length);
-
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
- cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
- cpu_to_le32(nespd->pd_id & 0x00007fff);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
-
- if (pbl_count == 0) {
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, single_buffer);
- } else {
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (pg_cnt * 8));
-
- if (use_4k_pbls)
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
- }
- barrier();
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- stag, ret, cqp_request->major_code, cqp_request->minor_code);
- major_code = cqp_request->major_code;
- nes_put_cqp_request(nesdev, cqp_request);
-
- if ((!ret || major_code) && pbl_count != 0) {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (use_256_pbls)
- nesadapter->free_256pbl += pbl_count + use_two_level;
- else if (use_4k_pbls)
- nesadapter->free_4kpbl += pbl_count + use_two_level;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
- if (new_root.pbl_pbase)
- pci_free_consistent(nesdev->pcidev, 512, new_root.pbl_vbase,
- new_root.pbl_pbase);
-
- if (!ret)
- return -ETIME;
- else if (major_code)
- return -EIO;
-
- *actual_pbl_cnt = pbl_count + use_two_level;
- *used_4k_pbls = use_4k_pbls;
- return 0;
-}
-
-
-/**
- * nes_reg_phys_mr
- */
-struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
- int acc, u64 *iova_start)
-{
- u64 region_length;
- struct nes_pd *nespd = to_nespd(ib_pd);
- struct nes_vnic *nesvnic = to_nesvnic(ib_pd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_mr *nesmr;
- struct ib_mr *ibmr;
- struct nes_vpbl vpbl;
- struct nes_root_vpbl root_vpbl;
- u32 stag;
- unsigned long mask;
- u32 stag_index = 0;
- u32 next_stag_index = 0;
- u32 driver_key = 0;
- int err = 0;
- int ret = 0;
- u16 pbl_count = 0;
- u8 single_page = 1;
- u8 stag_key = 0;
-
- region_length = 0;
- vpbl.pbl_vbase = NULL;
- root_vpbl.pbl_vbase = NULL;
- root_vpbl.pbl_pbase = 0;
-
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
-
- driver_key = 0;
-
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- if ((addr ^ *iova_start) & ~PAGE_MASK)
- return ERR_PTR(-EINVAL);
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
- &stag_index, &next_stag_index, NES_RESOURCE_PHYS_MR);
- if (err) {
- return ERR_PTR(err);
- }
-
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- /* Allocate a 4K buffer for the PBL */
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
- vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
- kfree(nesmr);
- goto reg_phys_err;
- }
-
-
- mask = !size;
-
- if (mask & ~PAGE_MASK) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_phys_err;
- }
-
- region_length += size;
- vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
- vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
-
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
-
- nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%016lX,"
- " length = 0x%016lX, index = 0x%08X\n",
- stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
-
- /* Make the leaf PBL the root if only one PBL */
- root_vpbl.pbl_pbase = vpbl.pbl_pbase;
-
- if (single_page) {
- pbl_count = 0;
- } else {
- pbl_count = 1;
- }
- ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
- addr, pbl_count, 1, acc, iova_start,
- &nesmr->pbls_used, &nesmr->pbl_4k);
-
- if (ret == 0) {
- nesmr->ibmr.rkey = stag;
- nesmr->ibmr.lkey = stag;
- nesmr->mode = IWNES_MEMREG_TYPE_MEM;
- ibmr = &nesmr->ibmr;
- } else {
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- }
-
-reg_phys_err:
- /* single PBL case */
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
- return ibmr;
-}
-
-
-/**
- * nes_get_dma_mr
- */
-static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
-{
- u64 kva = 0;
-
- nes_debug(NES_DBG_MR, "\n");
-
- return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
-}
-
-/**
- * nes_reg_user_mr
- */
-static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
-{
- u64 iova_start;
- __le64 *pbl;
- u64 region_length;
- dma_addr_t last_dma_addr = 0;
- dma_addr_t first_dma_addr = 0;
- struct nes_pd *nespd = to_nespd(pd);
- struct nes_vnic *nesvnic = to_nesvnic(pd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct ib_mr *ibmr = ERR_PTR(-EINVAL);
- struct sg_dma_page_iter dma_iter;
- struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context(
- udata, struct nes_ucontext, ibucontext);
- struct nes_pbl *nespbl;
- struct nes_mr *nesmr;
- struct ib_umem *region;
- struct nes_mem_reg_req req;
- struct nes_vpbl vpbl;
- struct nes_root_vpbl root_vpbl;
- int page_index;
- int page_count = 0;
- int err, pbl_depth = 0;
- int ret;
- u32 stag;
- u32 stag_index = 0;
- u32 next_stag_index;
- u32 driver_key;
- u32 root_pbl_index = 0;
- u32 cur_pbl_index = 0;
- u32 skip_pages;
- u16 pbl_count;
- u8 single_page = 1;
- u8 stag_key;
-
- region = ib_umem_get(udata, start, length, acc, 0);
- if (IS_ERR(region)) {
- return (struct ib_mr *)region;
- }
-
- nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
- " offset = %u, page size = %lu.\n",
- (unsigned long int)start, (unsigned long int)virt, (u32)length,
- ib_umem_offset(region), BIT(region->page_shift));
-
- skip_pages = ((u32)ib_umem_offset(region)) >> 12;
-
- if (ib_copy_from_udata(&req, udata, sizeof(req))) {
- ib_umem_release(region);
- return ERR_PTR(-EFAULT);
- }
- nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type);
-
- switch (req.reg_type) {
- case IWNES_MEMREG_TYPE_MEM:
- pbl_depth = 0;
- region_length = 0;
- vpbl.pbl_vbase = NULL;
- root_vpbl.pbl_vbase = NULL;
- root_vpbl.pbl_pbase = 0;
-
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
-
- driver_key = next_stag_index & 0x70000000;
-
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_USER_MR);
- if (err) {
- ib_umem_release(region);
- return ERR_PTR(err);
- }
-
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
- nesmr->region = region;
-
- for_each_sg_dma_page (region->sg_head.sgl, &dma_iter, region->nmap, 0) {
-
- region_length += PAGE_SIZE;
- region_length -= skip_pages << 12;
- skip_pages = 0;
- if ((page_count != 0) && (page_count << 12) - (ib_umem_offset(region) & (4096 - 1)) >= region->length)
- goto enough_pages;
- if ((page_count & 0x01FF) == 0) {
- if (page_count >= 1024 * 512) {
- ib_umem_release(region);
- nes_free_resource(nesadapter,
- nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-E2BIG);
- goto reg_user_mr_err;
- }
- if (root_pbl_index == 1) {
- root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
- 8192, &root_vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
- root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
- if (!root_vpbl.pbl_vbase) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.leaf_vpbl = kcalloc(1024,
- sizeof(*root_vpbl.leaf_vpbl),
- GFP_KERNEL);
- if (!root_vpbl.leaf_vpbl) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.pbl_vbase[0].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[0].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[0] = vpbl;
- }
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
- vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
- if (1 <= root_pbl_index) {
- root_vpbl.pbl_vbase[root_pbl_index].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[root_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
- }
- root_pbl_index++;
- cur_pbl_index = 0;
- }
- if (single_page) {
- if (page_count != 0) {
- if ((last_dma_addr + 4096) != sg_page_iter_dma_address(&dma_iter))
- single_page = 0;
- last_dma_addr = sg_page_iter_dma_address(&dma_iter);
- } else {
- first_dma_addr = sg_page_iter_dma_address(&dma_iter);
- last_dma_addr = first_dma_addr;
- }
- }
-
- vpbl.pbl_vbase[cur_pbl_index].pa_low =
- cpu_to_le32((u32)(sg_page_iter_dma_address(&dma_iter)));
- vpbl.pbl_vbase[cur_pbl_index].pa_high =
- cpu_to_le32((u32)((u64)(sg_page_iter_dma_address(&dma_iter))));
- cur_pbl_index++;
- page_count++;
- }
-
-enough_pages:
- nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x,"
- " stag_key=0x%08x\n",
- stag_index, driver_key, stag_key);
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
-
- iova_start = virt;
- /* Make the leaf PBL the root if only one PBL */
- if (root_pbl_index == 1) {
- root_vpbl.pbl_pbase = vpbl.pbl_pbase;
- }
-
- if (single_page) {
- pbl_count = 0;
- } else {
- pbl_count = root_pbl_index;
- first_dma_addr = 0;
- }
- nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%08X, length = 0x%08X,"
- " index = 0x%08X, region->length=0x%08llx, pbl_count = %u\n",
- stag, (unsigned int)iova_start,
- (unsigned int)region_length, stag_index,
- (unsigned long long)region->length, pbl_count);
- ret = nes_reg_mr(nesdev, nespd, stag, region->length, &root_vpbl,
- first_dma_addr, pbl_count, (u16)cur_pbl_index, acc,
- &iova_start, &nesmr->pbls_used, &nesmr->pbl_4k);
-
- nes_debug(NES_DBG_MR, "ret=%d\n", ret);
-
- if (ret == 0) {
- nesmr->ibmr.rkey = stag;
- nesmr->ibmr.lkey = stag;
- nesmr->mode = IWNES_MEMREG_TYPE_MEM;
- ibmr = &nesmr->ibmr;
- } else {
- ib_umem_release(region);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- }
-
-reg_user_mr_err:
- /* free the resources */
- if (root_pbl_index == 1) {
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- } else {
- for (page_index=0; page_index<root_pbl_index; page_index++) {
- pci_free_consistent(nesdev->pcidev, 4096,
- root_vpbl.leaf_vpbl[page_index].pbl_vbase,
- root_vpbl.leaf_vpbl[page_index].pbl_pbase);
- }
- kfree(root_vpbl.leaf_vpbl);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- }
-
- nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr);
-
- return ibmr;
- case IWNES_MEMREG_TYPE_QP:
- case IWNES_MEMREG_TYPE_CQ:
- if (!region->length) {
- nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n");
- ib_umem_release(region);
- return ERR_PTR(-EINVAL);
- }
- nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
- if (!nespbl) {
- ib_umem_release(region);
- return ERR_PTR(-ENOMEM);
- }
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- ib_umem_release(region);
- kfree(nespbl);
- return ERR_PTR(-ENOMEM);
- }
- nesmr->region = region;
- pbl_depth = region->length >> 12;
- pbl_depth += (region->length & (4096-1)) ? 1 : 0;
- nespbl->pbl_size = pbl_depth*sizeof(u64);
- if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
- nes_debug(NES_DBG_MR, "Attempting to allocate QP PBL memory");
- } else {
- nes_debug(NES_DBG_MR, "Attempting to allocate CP PBL memory");
- }
-
- nes_debug(NES_DBG_MR, " %u bytes, %u entries.\n",
- nespbl->pbl_size, pbl_depth);
- pbl = pci_alloc_consistent(nesdev->pcidev, nespbl->pbl_size,
- &nespbl->pbl_pbase);
- if (!pbl) {
- ib_umem_release(region);
- kfree(nesmr);
- kfree(nespbl);
- nes_debug(NES_DBG_MR, "Unable to allocate PBL memory\n");
- return ERR_PTR(-ENOMEM);
- }
-
- nespbl->pbl_vbase = (u64 *)pbl;
- nespbl->user_base = start;
- nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%lx,"
- " pbl_vbase=%p user_base=0x%lx\n",
- nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase,
- (void *) nespbl->pbl_vbase, nespbl->user_base);
-
- nespbl->page = sg_page(region->sg_head.sgl);
- for_each_sg_dma_page(region->sg_head.sgl, &dma_iter, region->nmap, 0) {
- ((__le32 *)pbl)[0] = cpu_to_le32((u32)(sg_page_iter_dma_address(&dma_iter)));
- ((__le32 *)pbl)[1] = cpu_to_le32(((u64)(sg_page_iter_dma_address(&dma_iter)))>>32);
- nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
- (unsigned long long)*pbl,
- le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
- pbl++;
- }
-
- if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
- list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list);
- } else {
- list_add_tail(&nespbl->list, &nes_ucontext->cq_reg_mem_list);
- }
- nesmr->ibmr.rkey = -1;
- nesmr->ibmr.lkey = -1;
- nesmr->mode = req.reg_type;
- return &nesmr->ibmr;
- }
-
- ib_umem_release(region);
- return ERR_PTR(-ENOSYS);
-}
-
-
-/**
- * nes_dereg_mr
- */
-static int nes_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
-{
- struct nes_mr *nesmr = to_nesmr(ib_mr);
- struct nes_vnic *nesvnic = to_nesvnic(ib_mr->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- int ret;
- u16 major_code;
- u16 minor_code;
-
-
- if (nesmr->pages)
- pci_free_consistent(nesdev->pcidev,
- nesmr->max_pages * sizeof(u64),
- nesmr->pages,
- nesmr->paddr);
-
- if (nesmr->region) {
- ib_umem_release(nesmr->region);
- }
- if (nesmr->mode != IWNES_MEMREG_TYPE_MEM) {
- kfree(nesmr);
- return 0;
- }
-
- /* Deallocate the region with the adapter */
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO |
- NES_CQP_STAG_DEALLOC_PBLS | NES_CQP_STAG_MR);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey);
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Deallocate STag 0x%08X completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X\n",
- ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code);
-
- major_code = cqp_request->major_code;
- minor_code = cqp_request->minor_code;
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (!ret) {
- nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag,"
- " ib_mr=%p, rkey = 0x%08X\n",
- ib_mr, ib_mr->rkey);
- return -ETIME;
- } else if (major_code) {
- nes_debug(NES_DBG_MR, "Error (0x%04X:0x%04X) while attempting"
- " to destroy STag, ib_mr=%p, rkey = 0x%08X\n",
- major_code, minor_code, ib_mr, ib_mr->rkey);
- return -EIO;
- }
-
- if (nesmr->pbls_used != 0) {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (nesmr->pbl_4k) {
- nesadapter->free_4kpbl += nesmr->pbls_used;
- if (nesadapter->free_4kpbl > nesadapter->max_4kpbl)
- printk(KERN_ERR PFX "free 4KB PBLs(%u) has "
- "exceeded the max(%u)\n",
- nesadapter->free_4kpbl,
- nesadapter->max_4kpbl);
- } else {
- nesadapter->free_256pbl += nesmr->pbls_used;
- if (nesadapter->free_256pbl > nesadapter->max_256pbl)
- printk(KERN_ERR PFX "free 256B PBLs(%u) has "
- "exceeded the max(%u)\n",
- nesadapter->free_256pbl,
- nesadapter->max_256pbl);
- }
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- (ib_mr->rkey & 0x0fffff00) >> 8);
-
- kfree(nesmr);
-
- return 0;
-}
-
-
-/**
- * show_rev
- */
-static ssize_t hw_rev_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nes_ib_device *nesibdev =
- rdma_device_to_drv_device(dev, struct nes_ib_device, ibdev);
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
-
- nes_debug(NES_DBG_INIT, "\n");
- return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev);
-}
-static DEVICE_ATTR_RO(hw_rev);
-
-/**
- * show_hca
- */
-static ssize_t hca_type_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- nes_debug(NES_DBG_INIT, "\n");
- return sprintf(buf, "NES020\n");
-}
-static DEVICE_ATTR_RO(hca_type);
-
-/**
- * show_board
- */
-static ssize_t board_id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- nes_debug(NES_DBG_INIT, "\n");
- return sprintf(buf, "%.*s\n", 32, "NES020 Board ID");
-}
-static DEVICE_ATTR_RO(board_id);
-
-static struct attribute *nes_dev_attributes[] = {
- &dev_attr_hw_rev.attr,
- &dev_attr_hca_type.attr,
- &dev_attr_board_id.attr,
- NULL
-};
-
-static const struct attribute_group nes_attr_group = {
- .attrs = nes_dev_attributes,
-};
-
-/**
- * nes_query_qp
- */
-static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_qp_init_attr *init_attr)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
-
- nes_debug(NES_DBG_QP, "\n");
-
- attr->qp_access_flags = 0;
- attr->cap.max_send_wr = nesqp->hwqp.sq_size;
- attr->cap.max_recv_wr = nesqp->hwqp.rq_size;
- attr->cap.max_recv_sge = 1;
- if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA)
- attr->cap.max_inline_data = 0;
- else
- attr->cap.max_inline_data = 64;
-
- init_attr->event_handler = nesqp->ibqp.event_handler;
- init_attr->qp_context = nesqp->ibqp.qp_context;
- init_attr->send_cq = nesqp->ibqp.send_cq;
- init_attr->recv_cq = nesqp->ibqp.recv_cq;
- init_attr->srq = nesqp->ibqp.srq;
- init_attr->cap = attr->cap;
-
- return 0;
-}
-
-
-/**
- * nes_hw_modify_qp
- */
-int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
- u32 next_iwarp_state, u32 termlen, u32 wait_completion)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- /* struct iw_cm_id *cm_id = nesqp->cm_id; */
- /* struct iw_cm_event cm_event; */
- struct nes_cqp_request *cqp_request;
- int ret;
- u16 major_code;
-
- nes_debug(NES_DBG_MOD_QP, "QP%u, refcount=%d\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MOD_QP, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- if (wait_completion) {
- cqp_request->waiting = 1;
- } else {
- cqp_request->waiting = 0;
- }
- cqp_wqe = &cqp_request->cqp_wqe;
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- NES_CQP_MODIFY_QP | NES_CQP_QP_TYPE_IWARP | next_iwarp_state);
- nes_debug(NES_DBG_MOD_QP, "using next_iwarp_state=%08x, wqe_words=%08x\n",
- next_iwarp_state, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]));
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
-
- /* If sending a terminate message, fill in the length (in words) */
- if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) &&
- !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) {
- termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen);
- }
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- if (wait_completion) {
- /* nes_debug(NES_DBG_MOD_QP, "Waiting for modify iWARP QP%u to complete.\n",
- nesqp->hwqp.qp_id); */
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u completed, wait_event_timeout ret=%u, "
- "CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- nesqp->hwqp.qp_id, ret, cqp_request->major_code, cqp_request->minor_code);
- major_code = cqp_request->major_code;
- if (major_code) {
- nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u failed"
- "CQP Major:Minor codes = 0x%04X:0x%04X, intended next state = 0x%08X.\n",
- nesqp->hwqp.qp_id, cqp_request->major_code,
- cqp_request->minor_code, next_iwarp_state);
- }
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (!ret)
- return -ETIME;
- else if (major_code)
- return -EIO;
- else
- return 0;
- } else {
- return 0;
- }
-}
-
-
-/**
- * nes_modify_qp
- */
-int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- /* u32 cqp_head; */
- /* u32 counter; */
- u32 next_iwarp_state = 0;
- int err;
- unsigned long qplockflags;
- int ret;
- u16 original_last_aeq;
- u8 issue_modify_qp = 0;
- u8 dont_wait = 0;
-
- nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u,"
- " iwarp_state=0x%X, refcount=%d\n",
- nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state,
- nesqp->iwarp_state, atomic_read(&nesqp->refcount));
-
- spin_lock_irqsave(&nesqp->lock, qplockflags);
-
- nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X,"
- " QP Access Flags=0x%X, attr_mask = 0x%0x\n",
- nesqp->hwqp.qp_id, nesqp->hw_iwarp_state,
- nesqp->hw_tcp_state, attr->qp_access_flags, attr_mask);
-
- if (attr_mask & IB_QP_STATE) {
- switch (attr->qp_state) {
- case IB_QPS_INIT:
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = init\n",
- nesqp->hwqp.qp_id);
- if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
- issue_modify_qp = 1;
- break;
- case IB_QPS_RTR:
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rtr\n",
- nesqp->hwqp.qp_id);
- if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
- issue_modify_qp = 1;
- break;
- case IB_QPS_RTS:
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rts\n",
- nesqp->hwqp.qp_id);
- if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- if (nesqp->cm_id == NULL) {
- nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n",
- nesqp->hwqp.qp_id );
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS;
- if (nesqp->iwarp_state != NES_CQP_QP_IWARP_STATE_RTS)
- next_iwarp_state |= NES_CQP_QP_CONTEXT_VALID |
- NES_CQP_QP_ARP_VALID | NES_CQP_QP_ORD_VALID;
- issue_modify_qp = 1;
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_ESTABLISHED;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_RTS;
- nesqp->hte_added = 1;
- break;
- case IB_QPS_SQD:
- issue_modify_qp = 1;
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state=closing. SQ head=%u, SQ tail=%u\n",
- nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail);
- if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return 0;
- } else {
- if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
- nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing"
- " ignored due to current iWARP state\n",
- nesqp->hwqp.qp_id);
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) {
- nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing"
- " already done based on hw state.\n",
- nesqp->hwqp.qp_id);
- issue_modify_qp = 0;
- }
- switch (nesqp->hw_iwarp_state) {
- case NES_AEQE_IWARP_STATE_CLOSING:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- break;
- case NES_AEQE_IWARP_STATE_TERMINATE:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
- break;
- case NES_AEQE_IWARP_STATE_ERROR:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
- break;
- default:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
- break;
- }
- }
- break;
- case IB_QPS_SQE:
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = terminate\n",
- nesqp->hwqp.qp_id);
- if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
- issue_modify_qp = 1;
- break;
- case IB_QPS_ERR:
- case IB_QPS_RESET:
- if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
- nesqp->hwqp.qp_id);
- if (nesqp->term_flags)
- del_timer(&nesqp->terminate_timer);
-
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
- /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
- if (nesqp->hte_added) {
- nes_debug(NES_DBG_MOD_QP, "set CQP_QP_DEL_HTE\n");
- next_iwarp_state |= NES_CQP_QP_DEL_HTE;
- nesqp->hte_added = 0;
- }
- if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) &&
- (nesdev->iw_status) &&
- (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) {
- next_iwarp_state |= NES_CQP_QP_RESET;
- } else {
- nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n",
- nesqp->hwqp.qp_id, nesqp->hw_tcp_state);
- dont_wait = 1;
- }
- issue_modify_qp = 1;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
- break;
- default:
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- break;
- }
-
- nesqp->ibqp_state = attr->qp_state;
- nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
- nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
- nesqp->iwarp_state);
- }
-
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN |
- NES_QPCONTEXT_MISC_RDMA_READ_EN);
- issue_modify_qp = 1;
- }
- if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN);
- issue_modify_qp = 1;
- }
- if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_READ_EN);
- issue_modify_qp = 1;
- }
- if (attr->qp_access_flags & IB_ACCESS_MW_BIND) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WBIND_EN);
- issue_modify_qp = 1;
- }
-
- if (nesqp->user_mode) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN |
- NES_QPCONTEXT_MISC_RDMA_READ_EN);
- issue_modify_qp = 1;
- }
- }
-
- original_last_aeq = nesqp->last_aeq;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
-
- nes_debug(NES_DBG_MOD_QP, "issue_modify_qp=%u\n", issue_modify_qp);
-
- ret = 0;
-
-
- if (issue_modify_qp) {
- nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n");
- ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1);
- if (ret)
- nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)"
- " failed for QP%u.\n",
- next_iwarp_state, nesqp->hwqp.qp_id);
-
- }
-
- if ((issue_modify_qp) && (nesqp->ibqp_state > IB_QPS_RTS)) {
- nes_debug(NES_DBG_MOD_QP, "QP%u Issued ModifyQP refcount (%d),"
- " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
- if (dont_wait) {
- if (nesqp->cm_id && nesqp->hw_tcp_state != 0) {
- nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d),"
- " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- /* this one is for the cm_disconnect thread */
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_cm_disconn(nesqp);
- } else {
- nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
- }
- } else {
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- if (nesqp->cm_id) {
- /* These two are for the timer thread */
- if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
- nesqp->cm_id->add_ref(nesqp->cm_id);
- nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
- " need ae to finish up, original_last_aeq = 0x%04X."
- " last_aeq = 0x%04X, scheduling timer.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- schedule_nes_timer(nesqp->cm_node, (struct sk_buff *) nesqp, NES_TIMER_TYPE_CLOSE, 1, 0);
- }
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
- " need ae to finish up, original_last_aeq = 0x%04X."
- " last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- }
- }
- } else {
- nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
- " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- }
- } else {
- nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
- " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- }
-
- err = 0;
-
- nes_debug(NES_DBG_MOD_QP, "QP%u Leaving, refcount=%d\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
-
- return err;
-}
-
-static inline void
-fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr,
- u32 uselkey)
-{
- int sge_index;
- int total_payload_length = 0;
- for (sge_index = 0; sge_index < ib_wr->num_sge; sge_index++) {
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX+(sge_index*4),
- ib_wr->sg_list[sge_index].addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_LENGTH0_IDX + (sge_index*4),
- ib_wr->sg_list[sge_index].length);
- if (uselkey)
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4),
- (ib_wr->sg_list[sge_index].lkey));
- else
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), 0);
-
- total_payload_length += ib_wr->sg_list[sge_index].length;
- }
- nes_debug(NES_DBG_IW_TX, "UC UC UC, sending total_payload_length=%u \n",
- total_payload_length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- total_payload_length);
-}
-
-/**
- * nes_post_send
- */
-static int nes_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr,
- const struct ib_send_wr **bad_wr)
-{
- u64 u64temp;
- unsigned long flags = 0;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_hw_qp_wqe *wqe;
- int err = 0;
- u32 qsize = nesqp->hwqp.sq_size;
- u32 head;
- u32 wqe_misc = 0;
- u32 wqe_count = 0;
- u32 counter;
-
- if (nesqp->ibqp_state > IB_QPS_RTS) {
- err = -EINVAL;
- goto out;
- }
-
- spin_lock_irqsave(&nesqp->lock, flags);
-
- head = nesqp->hwqp.sq_head;
-
- while (ib_wr) {
- /* Check for QP error */
- if (nesqp->term_flags) {
- err = -EINVAL;
- break;
- }
-
- /* Check for SQ overflow */
- if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
- err = -ENOMEM;
- break;
- }
-
- wqe = &nesqp->hwqp.sq_vbase[head];
- /* nes_debug(NES_DBG_IW_TX, "processing sq wqe for QP%u at %p, head = %u.\n",
- nesqp->hwqp.qp_id, wqe, head); */
- nes_fill_init_qp_wqe(wqe, nesqp, head);
- u64temp = (u64)(ib_wr->wr_id);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
- u64temp);
- switch (ib_wr->opcode) {
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_INV:
- if (IB_WR_SEND == ib_wr->opcode) {
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- wqe_misc = NES_IWARP_SQ_OP_SENDSE;
- else
- wqe_misc = NES_IWARP_SQ_OP_SEND;
- } else {
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- wqe_misc = NES_IWARP_SQ_OP_SENDSEINV;
- else
- wqe_misc = NES_IWARP_SQ_OP_SENDINV;
-
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
- ib_wr->ex.invalidate_rkey);
- }
-
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- err = -EINVAL;
- break;
- }
-
- if (ib_wr->send_flags & IB_SEND_FENCE)
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
- if ((ib_wr->send_flags & IB_SEND_INLINE) &&
- ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
- (ib_wr->sg_list[0].length <= 64)) {
- memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
- (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- ib_wr->sg_list[0].length);
- wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
- } else {
- fill_wqe_sg_send(wqe, ib_wr, 1);
- }
-
- break;
- case IB_WR_RDMA_WRITE:
- wqe_misc = NES_IWARP_SQ_OP_RDMAW;
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
- ib_wr->num_sge, nesdev->nesadapter->max_sge);
- err = -EINVAL;
- break;
- }
-
- if (ib_wr->send_flags & IB_SEND_FENCE)
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- rdma_wr(ib_wr)->rkey);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- rdma_wr(ib_wr)->remote_addr);
-
- if ((ib_wr->send_flags & IB_SEND_INLINE) &&
- ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
- (ib_wr->sg_list[0].length <= 64)) {
- memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
- (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- ib_wr->sg_list[0].length);
- wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
- } else {
- fill_wqe_sg_send(wqe, ib_wr, 1);
- }
-
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
- break;
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_READ_WITH_INV:
- /* iWARP only supports 1 sge for RDMA reads */
- if (ib_wr->num_sge > 1) {
- nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
- ib_wr->num_sge);
- err = -EINVAL;
- break;
- }
- if (ib_wr->opcode == IB_WR_RDMA_READ) {
- wqe_misc = NES_IWARP_SQ_OP_RDMAR;
- } else {
- wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV;
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
- ib_wr->ex.invalidate_rkey);
- }
-
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- rdma_wr(ib_wr)->remote_addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- rdma_wr(ib_wr)->rkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
- ib_wr->sg_list->length);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
- ib_wr->sg_list->addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
- ib_wr->sg_list->lkey);
- break;
- case IB_WR_LOCAL_INV:
- wqe_misc = NES_IWARP_SQ_OP_LOCINV;
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
- ib_wr->ex.invalidate_rkey);
- break;
- case IB_WR_REG_MR:
- {
- struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr);
- int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
- int flags = reg_wr(ib_wr)->access;
-
- if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
- nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
- err = -EINVAL;
- break;
- }
- wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
- set_wqe_64bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
- mr->ibmr.iova);
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
- lower_32_bits(mr->ibmr.length));
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
- reg_wr(ib_wr)->key);
-
- if (page_shift == 12) {
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
- } else if (page_shift == 21) {
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
- } else {
- nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
- " ib_wr=%u, max=1\n", ib_wr->num_sge);
- err = -EINVAL;
- break;
- }
-
- /* Set access_flags */
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
- if (flags & IB_ACCESS_LOCAL_WRITE)
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE;
-
- if (flags & IB_ACCESS_REMOTE_WRITE)
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE;
-
- if (flags & IB_ACCESS_REMOTE_READ)
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ;
-
- if (flags & IB_ACCESS_MW_BIND)
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
-
- /* Fill in PBL info: */
- set_wqe_64bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
- mr->paddr);
-
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
- mr->npages * 8);
-
- nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
- "length: %lld, rkey: %0x, pgl_paddr: %llx, "
- "page_list_len: %u, wqe_misc: %x\n",
- (unsigned long long) mr->ibmr.iova,
- mr->ibmr.length,
- reg_wr(ib_wr)->key,
- (unsigned long long) mr->paddr,
- mr->npages,
- wqe_misc);
- break;
- }
- default:
- /* error */
- err = -EINVAL;
- break;
- }
-
- if (err)
- break;
-
- if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all)
- wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
-
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc);
-
- ib_wr = ib_wr->next;
- head++;
- wqe_count++;
- if (head >= qsize)
- head = 0;
-
- }
-
- nesqp->hwqp.sq_head = head;
- barrier();
- while (wqe_count) {
- counter = min(wqe_count, ((u32)255));
- wqe_count -= counter;
- nes_write32(nesdev->regs + NES_WQE_ALLOC,
- (counter << 24) | 0x00800000 | nesqp->hwqp.qp_id);
- }
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
-out:
- if (err)
- *bad_wr = ib_wr;
- return err;
-}
-
-
-/**
- * nes_post_recv
- */
-static int nes_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr,
- const struct ib_recv_wr **bad_wr)
-{
- u64 u64temp;
- unsigned long flags = 0;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_hw_qp_wqe *wqe;
- int err = 0;
- int sge_index;
- u32 qsize = nesqp->hwqp.rq_size;
- u32 head;
- u32 wqe_count = 0;
- u32 counter;
- u32 total_payload_length;
-
- if (nesqp->ibqp_state > IB_QPS_RTS) {
- err = -EINVAL;
- goto out;
- }
-
- spin_lock_irqsave(&nesqp->lock, flags);
-
- head = nesqp->hwqp.rq_head;
-
- while (ib_wr) {
- /* Check for QP error */
- if (nesqp->term_flags) {
- err = -EINVAL;
- break;
- }
-
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- err = -EINVAL;
- break;
- }
- /* Check for RQ overflow */
- if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) {
- err = -ENOMEM;
- break;
- }
-
- nes_debug(NES_DBG_IW_RX, "ibwr sge count = %u.\n", ib_wr->num_sge);
- wqe = &nesqp->hwqp.rq_vbase[head];
-
- /* nes_debug(NES_DBG_IW_RX, "QP%u:processing rq wqe at %p, head = %u.\n",
- nesqp->hwqp.qp_id, wqe, head); */
- nes_fill_init_qp_wqe(wqe, nesqp, head);
- u64temp = (u64)(ib_wr->wr_id);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
- u64temp);
- total_payload_length = 0;
- for (sge_index=0; sge_index < ib_wr->num_sge; sge_index++) {
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_FRAG0_LOW_IDX+(sge_index*4),
- ib_wr->sg_list[sge_index].addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_LENGTH0_IDX+(sge_index*4),
- ib_wr->sg_list[sge_index].length);
- set_wqe_32bit_value(wqe->wqe_words,NES_IWARP_RQ_WQE_STAG0_IDX+(sge_index*4),
- ib_wr->sg_list[sge_index].lkey);
-
- total_payload_length += ib_wr->sg_list[sge_index].length;
- }
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX,
- total_payload_length);
-
- ib_wr = ib_wr->next;
- head++;
- wqe_count++;
- if (head >= qsize)
- head = 0;
- }
-
- nesqp->hwqp.rq_head = head;
- barrier();
- while (wqe_count) {
- counter = min(wqe_count, ((u32)255));
- wqe_count -= counter;
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id);
- }
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
-out:
- if (err)
- *bad_wr = ib_wr;
- return err;
-}
-
-/**
- * nes_drain_sq - drain sq
- * @ibqp: pointer to ibqp
- */
-static void nes_drain_sq(struct ib_qp *ibqp)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
-
- if (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)
- wait_for_completion(&nesqp->sq_drained);
-}
-
-/**
- * nes_drain_rq - drain rq
- * @ibqp: pointer to ibqp
- */
-static void nes_drain_rq(struct ib_qp *ibqp)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
-
- if (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)
- wait_for_completion(&nesqp->rq_drained);
-}
-
-/**
- * nes_poll_cq
- */
-static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
- u64 u64temp;
- u64 wrid;
- unsigned long flags = 0;
- struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_cq *nescq = to_nescq(ibcq);
- struct nes_qp *nesqp;
- struct nes_hw_cqe cqe;
- u32 head;
- u32 wq_tail = 0;
- u32 cq_size;
- u32 cqe_count = 0;
- u32 wqe_index;
- u32 u32temp;
- u32 move_cq_head = 1;
- u32 err_code;
-
- nes_debug(NES_DBG_CQ, "\n");
-
- spin_lock_irqsave(&nescq->lock, flags);
-
- head = nescq->hw_cq.cq_head;
- cq_size = nescq->hw_cq.cq_size;
-
- while (cqe_count < num_entries) {
- if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
- NES_CQE_VALID) == 0)
- break;
-
- /*
- * Make sure we read CQ entry contents *after*
- * we've checked the valid bit.
- */
- rmb();
-
- cqe = nescq->hw_cq.cq_vbase[head];
- u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
- wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1);
- u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
- /* parse CQE, get completion context from WQE (either rq or sq) */
- u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
- ((u64)u32temp);
-
- if (u64temp) {
- nesqp = (struct nes_qp *)(unsigned long)u64temp;
- memset(entry, 0, sizeof *entry);
- if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) {
- entry->status = IB_WC_SUCCESS;
- } else {
- err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]);
- if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) {
- entry->status = err_code & 0x0000ffff;
-
- /* The rest of the cqe's will be marked as flushed */
- nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] =
- cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) |
- NES_IWARP_CQE_MINOR_FLUSH);
- } else
- entry->status = IB_WC_WR_FLUSH_ERR;
- }
-
- entry->qp = &nesqp->ibqp;
- entry->src_qp = nesqp->hwqp.qp_id;
-
- if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) {
- if (nesqp->skip_lsmm) {
- nesqp->skip_lsmm = 0;
- nesqp->hwqp.sq_tail++;
- }
-
- /* Working on a SQ Completion*/
- wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
- ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX])));
- entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]);
-
- switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) {
- case NES_IWARP_SQ_OP_RDMAW:
- nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n");
- entry->opcode = IB_WC_RDMA_WRITE;
- break;
- case NES_IWARP_SQ_OP_RDMAR:
- nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n");
- entry->opcode = IB_WC_RDMA_READ;
- entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]);
- break;
- case NES_IWARP_SQ_OP_SENDINV:
- case NES_IWARP_SQ_OP_SENDSEINV:
- case NES_IWARP_SQ_OP_SEND:
- case NES_IWARP_SQ_OP_SENDSE:
- nes_debug(NES_DBG_CQ, "Operation = Send.\n");
- entry->opcode = IB_WC_SEND;
- break;
- case NES_IWARP_SQ_OP_LOCINV:
- entry->opcode = IB_WC_LOCAL_INV;
- break;
- case NES_IWARP_SQ_OP_FAST_REG:
- entry->opcode = IB_WC_REG_MR;
- break;
- }
-
- nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
- if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) {
- move_cq_head = 0;
- wq_tail = nesqp->hwqp.sq_tail;
- }
- } else {
- /* Working on a RQ Completion*/
- entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]);
- wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
- ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
- entry->opcode = IB_WC_RECV;
-
- nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
- if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) {
- move_cq_head = 0;
- wq_tail = nesqp->hwqp.rq_tail;
- }
- }
-
- if (nesqp->iwarp_state > NES_CQP_QP_IWARP_STATE_RTS) {
- if (nesqp->hwqp.sq_tail == nesqp->hwqp.sq_head)
- complete(&nesqp->sq_drained);
- if (nesqp->hwqp.rq_tail == nesqp->hwqp.rq_head)
- complete(&nesqp->rq_drained);
- }
-
- entry->wr_id = wrid;
- entry++;
- cqe_count++;
- }
-
- if (move_cq_head) {
- nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
- if (++head >= cq_size)
- head = 0;
- nescq->polled_completions++;
-
- if ((nescq->polled_completions > (cq_size / 2)) ||
- (nescq->polled_completions == 255)) {
- nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes"
- " are pending %u of %u.\n",
- nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
- nescq->polled_completions = 0;
- }
- } else {
- /* Update the wqe index and set status to flush */
- wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
- wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail;
- nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] =
- cpu_to_le32(wqe_index);
- move_cq_head = 1; /* ready for next pass */
- }
- }
-
- if (nescq->polled_completions) {
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
- nescq->polled_completions = 0;
- }
-
- nescq->hw_cq.cq_head = head;
- nes_debug(NES_DBG_CQ, "Reporting %u completions for CQ%u.\n",
- cqe_count, nescq->hw_cq.cq_number);
-
- spin_unlock_irqrestore(&nescq->lock, flags);
-
- return cqe_count;
-}
-
-
-/**
- * nes_req_notify_cq
- */
-static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
- {
- struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_cq *nescq = to_nescq(ibcq);
- u32 cq_arm;
-
- nes_debug(NES_DBG_CQ, "Requesting notification for CQ%u.\n",
- nescq->hw_cq.cq_number);
-
- cq_arm = nescq->hw_cq.cq_number;
- if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
- cq_arm |= NES_CQE_ALLOC_NOTIFY_NEXT;
- else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
- cq_arm |= NES_CQE_ALLOC_NOTIFY_SE;
- else
- return -EINVAL;
-
- nes_write32(nesdev->regs+NES_CQE_ALLOC, cq_arm);
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
-
- return 0;
-}
-
-static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
- err = nes_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
- return 0;
-}
-
-static void get_dev_fw_str(struct ib_device *dev, char *str)
-{
- struct nes_ib_device *nesibdev =
- container_of(dev, struct nes_ib_device, ibdev);
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
-
- nes_debug(NES_DBG_INIT, "\n");
- snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u",
- (nesvnic->nesdev->nesadapter->firmware_version >> 16),
- (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
-}
-
-static const struct ib_device_ops nes_dev_ops = {
- .alloc_mr = nes_alloc_mr,
- .alloc_mw = nes_alloc_mw,
- .alloc_pd = nes_alloc_pd,
- .alloc_ucontext = nes_alloc_ucontext,
- .create_cq = nes_create_cq,
- .create_qp = nes_create_qp,
- .dealloc_mw = nes_dealloc_mw,
- .dealloc_pd = nes_dealloc_pd,
- .dealloc_ucontext = nes_dealloc_ucontext,
- .dereg_mr = nes_dereg_mr,
- .destroy_cq = nes_destroy_cq,
- .destroy_qp = nes_destroy_qp,
- .drain_rq = nes_drain_rq,
- .drain_sq = nes_drain_sq,
- .get_dev_fw_str = get_dev_fw_str,
- .get_dma_mr = nes_get_dma_mr,
- .get_port_immutable = nes_port_immutable,
- .iw_accept = nes_accept,
- .iw_add_ref = nes_add_ref,
- .iw_connect = nes_connect,
- .iw_create_listen = nes_create_listen,
- .iw_destroy_listen = nes_destroy_listen,
- .iw_get_qp = nes_get_qp,
- .iw_reject = nes_reject,
- .iw_rem_ref = nes_rem_ref,
- .map_mr_sg = nes_map_mr_sg,
- .mmap = nes_mmap,
- .modify_qp = nes_modify_qp,
- .poll_cq = nes_poll_cq,
- .post_recv = nes_post_recv,
- .post_send = nes_post_send,
- .query_device = nes_query_device,
- .query_gid = nes_query_gid,
- .query_pkey = nes_query_pkey,
- .query_port = nes_query_port,
- .query_qp = nes_query_qp,
- .reg_user_mr = nes_reg_user_mr,
- .req_notify_cq = nes_req_notify_cq,
- INIT_RDMA_OBJ_SIZE(ib_pd, nes_pd, ibpd),
- INIT_RDMA_OBJ_SIZE(ib_ucontext, nes_ucontext, ibucontext),
-};
-
-/**
- * nes_init_ofa_device
- */
-struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
-{
- struct nes_ib_device *nesibdev;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
-
- nesibdev = ib_alloc_device(nes_ib_device, ibdev);
- if (nesibdev == NULL) {
- return NULL;
- }
- nesibdev->ibdev.owner = THIS_MODULE;
-
- nesibdev->ibdev.node_type = RDMA_NODE_RNIC;
- memset(&nesibdev->ibdev.node_guid, 0, sizeof(nesibdev->ibdev.node_guid));
- memcpy(&nesibdev->ibdev.node_guid, netdev->dev_addr, 6);
-
- nesibdev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_BIND_MW) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND);
-
- nesibdev->ibdev.phys_port_cnt = 1;
- nesibdev->ibdev.num_comp_vectors = 1;
- nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
-
- ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops);
- memcpy(nesibdev->ibdev.iw_ifname, netdev->name,
- sizeof(nesibdev->ibdev.iw_ifname));
-
- return nesibdev;
-}
-
-
-/**
- * nes_handle_delayed_event
- */
-static void nes_handle_delayed_event(struct timer_list *t)
-{
- struct nes_vnic *nesvnic = from_timer(nesvnic, t, event_timer);
-
- if (nesvnic->delayed_event != nesvnic->last_dispatched_event) {
- struct ib_event event;
-
- event.device = &nesvnic->nesibdev->ibdev;
- if (!event.device)
- goto stop_timer;
- event.event = nesvnic->delayed_event;
- event.element.port_num = nesvnic->logical_port + 1;
- ib_dispatch_event(&event);
- }
-
-stop_timer:
- nesvnic->event_timer.function = NULL;
-}
-
-
-void nes_port_ibevent(struct nes_vnic *nesvnic)
-{
- struct nes_ib_device *nesibdev = nesvnic->nesibdev;
- struct nes_device *nesdev = nesvnic->nesdev;
- struct ib_event event;
- event.device = &nesibdev->ibdev;
- event.element.port_num = nesvnic->logical_port + 1;
- event.event = nesdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
-
- if (!nesvnic->event_timer.function) {
- ib_dispatch_event(&event);
- nesvnic->last_dispatched_event = event.event;
- nesvnic->event_timer.function = nes_handle_delayed_event;
- nesvnic->event_timer.expires = jiffies + NES_EVENT_DELAY;
- add_timer(&nesvnic->event_timer);
- } else {
- mod_timer(&nesvnic->event_timer, jiffies + NES_EVENT_DELAY);
- }
- nesvnic->delayed_event = event.event;
-}
-
-
-/**
- * nes_destroy_ofa_device
- */
-void nes_destroy_ofa_device(struct nes_ib_device *nesibdev)
-{
- if (nesibdev == NULL)
- return;
-
- nes_unregister_ofa_device(nesibdev);
-
- ib_dealloc_device(&nesibdev->ibdev);
-}
-
-
-/**
- * nes_register_ofa_device
- */
-int nes_register_ofa_device(struct nes_ib_device *nesibdev)
-{
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- int ret;
-
- rdma_set_device_sysfs_group(&nesvnic->nesibdev->ibdev, &nes_attr_group);
- nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES;
- ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d");
- if (ret) {
- return ret;
- }
-
- /* Get the resources allocated to this device */
- nesibdev->max_cq = (nesadapter->max_cq-NES_FIRST_QPN) / nesadapter->port_count;
- nesibdev->max_mr = nesadapter->max_mr / nesadapter->port_count;
- nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count;
- nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count;
-
- nesvnic->of_device_registered = 1;
-
- return 0;
-}
-
-
-/**
- * nes_unregister_ofa_device
- */
-static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
-{
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
-
- if (nesvnic->of_device_registered)
- ib_unregister_device(&nesibdev->ibdev);
-
- nesvnic->of_device_registered = 0;
-}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
deleted file mode 100644
index 114a9b59fefd..000000000000
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef NES_VERBS_H
-#define NES_VERBS_H
-
-struct nes_device;
-
-#define NES_MAX_USER_DB_REGIONS 4096
-#define NES_MAX_USER_WQ_REGIONS 4096
-
-#define NES_TERM_SENT 0x01
-#define NES_TERM_RCVD 0x02
-#define NES_TERM_DONE 0x04
-
-struct nes_ucontext {
- struct ib_ucontext ibucontext;
- struct nes_device *nesdev;
- unsigned long mmap_wq_offset;
- unsigned long mmap_cq_offset; /* to be removed */
- int index; /* rnic index (minor) */
- unsigned long allocated_doorbells[BITS_TO_LONGS(NES_MAX_USER_DB_REGIONS)];
- u16 mmap_db_index[NES_MAX_USER_DB_REGIONS];
- u16 first_free_db;
- unsigned long allocated_wqs[BITS_TO_LONGS(NES_MAX_USER_WQ_REGIONS)];
- struct nes_qp *mmap_nesqp[NES_MAX_USER_WQ_REGIONS];
- u16 first_free_wq;
- struct list_head cq_reg_mem_list;
- struct list_head qp_reg_mem_list;
- u32 mcrqf;
-};
-
-struct nes_pd {
- struct ib_pd ibpd;
- u16 pd_id;
- atomic_t sqp_count;
- u16 mmap_db_index;
-};
-
-struct nes_mr {
- union {
- struct ib_mr ibmr;
- struct ib_mw ibmw;
- struct ib_fmr ibfmr;
- };
- struct ib_umem *region;
- u16 pbls_used;
- u8 mode;
- u8 pbl_4k;
- __le64 *pages;
- dma_addr_t paddr;
- u32 max_pages;
- u32 npages;
-};
-
-struct nes_hw_pb {
- __le32 pa_low;
- __le32 pa_high;
-};
-
-struct nes_vpbl {
- dma_addr_t pbl_pbase;
- struct nes_hw_pb *pbl_vbase;
-};
-
-struct nes_root_vpbl {
- dma_addr_t pbl_pbase;
- struct nes_hw_pb *pbl_vbase;
- struct nes_vpbl *leaf_vpbl;
-};
-
-struct nes_fmr {
- struct nes_mr nesmr;
- u32 leaf_pbl_cnt;
- struct nes_root_vpbl root_vpbl;
- struct ib_qp *ib_qp;
- int access_rights;
- struct ib_fmr_attr attr;
-};
-
-struct nes_av;
-
-struct nes_cq {
- struct ib_cq ibcq;
- struct nes_hw_cq hw_cq;
- u32 polled_completions;
- u32 cq_mem_size;
- spinlock_t lock;
- u8 virtual_cq;
- u8 pad[3];
- u32 mcrqf;
-};
-
-struct nes_wq {
- spinlock_t lock;
-};
-
-struct disconn_work {
- struct work_struct work;
- struct nes_qp *nesqp;
-};
-
-struct iw_cm_id;
-struct ietf_mpa_frame;
-
-struct nes_qp {
- struct ib_qp ibqp;
- void *allocated_buffer;
- struct iw_cm_id *cm_id;
- struct nes_cq *nesscq;
- struct nes_cq *nesrcq;
- struct nes_pd *nespd;
- void *cm_node; /* handle of the node this QP is associated with */
- void *ietf_frame;
- u8 ietf_frame_size;
- dma_addr_t ietf_frame_pbase;
- struct ib_mr *lsmm_mr;
- struct nes_hw_qp hwqp;
- struct work_struct work;
- enum ib_qp_state ibqp_state;
- u32 iwarp_state;
- u32 hte_index;
- u32 last_aeq;
- u32 qp_mem_size;
- atomic_t refcount;
- atomic_t close_timer_started;
- u32 mmap_sq_db_index;
- u32 mmap_rq_db_index;
- spinlock_t lock;
- spinlock_t pau_lock;
- struct nes_qp_context *nesqp_context;
- dma_addr_t nesqp_context_pbase;
- void *pbl_vbase;
- dma_addr_t pbl_pbase;
- struct page *page;
- struct timer_list terminate_timer;
- enum ib_event_type terminate_eventtype;
- struct sk_buff_head pau_list;
- u32 pau_rcv_nxt;
- u16 active_conn:1;
- u16 skip_lsmm:1;
- u16 user_mode:1;
- u16 hte_added:1;
- u16 flush_issued:1;
- u16 destroyed:1;
- u16 sig_all:1;
- u16 pau_mode:1;
- u16 rsvd:8;
- u16 private_data_len;
- u16 term_sq_flush_code;
- u16 term_rq_flush_code;
- u8 hw_iwarp_state;
- u8 hw_tcp_state;
- u8 term_flags;
- u8 sq_kmapped;
- u8 pau_busy;
- u8 pau_pending;
- u8 pau_state;
- __u64 nesuqp_addr;
- struct completion sq_drained;
- struct completion rq_drained;
-};
-
-struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
- u64 addr, u64 size, int acc, u64 *iova_start);
-
-#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 5127e2ea4bdd..d82d3ec3649e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1351,7 +1351,6 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa);
mqe->u.nonemb_req.sge[0].len = dma.size;
- memset(dma.va, 0, dma.size);
ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va,
OCRDMA_CMD_GET_CTRL_ATTRIBUTES,
OCRDMA_SUBSYS_COMMON,
@@ -1690,7 +1689,6 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
goto mem_err_ah;
dev->av_tbl.pa = pa;
dev->av_tbl.num_ah = max_ah;
- memset(dev->av_tbl.va, 0, dev->av_tbl.size);
pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
@@ -1888,14 +1886,13 @@ mem_err:
return status;
}
-int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
+void ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
{
- int status = -ENOMEM;
struct ocrdma_destroy_cq *cmd;
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_CQ, sizeof(*cmd));
if (!cmd)
- return status;
+ return;
ocrdma_init_mch(&cmd->req, OCRDMA_CMD_DELETE_CQ,
OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
@@ -1903,11 +1900,10 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
(cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) &
OCRDMA_DESTROY_CQ_QID_MASK;
- status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
ocrdma_unbind_eq(dev, cq->eqn);
dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa);
kfree(cmd);
- return status;
}
int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
@@ -2905,7 +2901,6 @@ static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
mqe_sge->pa_hi = (u32) upper_32_bits(pa);
mqe_sge->len = cmd.hdr.pyld_len;
- memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req));
ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG,
OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len);
req->param_type = ptype;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index 06ec59326a90..12c23a7652b9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -122,7 +122,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
u32 pd_id, int acc);
int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *,
int entries, int dpp_cq, u16 pd_id);
-int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *);
+void ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq);
int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index fc6c0962dea9..c15cfc6cef81 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -144,6 +144,10 @@ static const struct attribute_group ocrdma_attr_group = {
};
static const struct ib_device_ops ocrdma_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_OCRDMA,
+ .uverbs_abi_ver = OCRDMA_ABI_VERSION,
+
.alloc_mr = ocrdma_alloc_mr,
.alloc_pd = ocrdma_alloc_pd,
.alloc_ucontext = ocrdma_alloc_ucontext,
@@ -178,6 +182,7 @@ static const struct ib_device_ops ocrdma_dev_ops = {
.resize_cq = ocrdma_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, ocrdma_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, ocrdma_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, ocrdma_ucontext, ibucontext),
};
@@ -200,8 +205,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
sizeof(OCRDMA_NODE_DESC));
- dev->ibdev.owner = THIS_MODULE;
- dev->ibdev.uverbs_abi_ver = OCRDMA_ABI_VERSION;
dev->ibdev.uverbs_cmd_mask =
OCRDMA_UVERBS(GET_CONTEXT) |
OCRDMA_UVERBS(QUERY_DEVICE) |
@@ -249,7 +252,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops);
}
rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group);
- dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA;
ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1);
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 35ec87015792..bccc11378109 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -925,8 +925,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
/* it could be user registered memory. */
- if (mr->umem)
- ib_umem_release(mr->umem);
+ ib_umem_release(mr->umem);
kfree(mr);
/* Don't stop cleanup, in case FW is unresponsive */
@@ -977,12 +976,12 @@ err:
return status;
}
-struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
- struct ocrdma_cq *cq;
+ struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
udata, struct ocrdma_ucontext, ibucontext);
@@ -991,16 +990,13 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
struct ocrdma_create_cq_ureq ureq;
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (udata) {
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
} else
ureq.dpp_cq = 0;
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
spin_lock_init(&cq->cq_lock);
spin_lock_init(&cq->comp_handler_lock);
@@ -1011,10 +1007,9 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
pd_id = uctx->cntxt_pd->id;
status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
- if (status) {
- kfree(cq);
- return ERR_PTR(status);
- }
+ if (status)
+ return status;
+
if (udata) {
status = ocrdma_copy_cq_uresp(dev, cq, udata);
if (status)
@@ -1022,12 +1017,11 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
}
cq->phase = OCRDMA_CQE_VALID;
dev->cq_tbl[cq->id] = cq;
- return &cq->ibcq;
+ return 0;
ctx_err:
ocrdma_mbx_destroy_cq(dev, cq);
- kfree(cq);
- return ERR_PTR(status);
+ return status;
}
int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
@@ -1070,7 +1064,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq)
spin_unlock_irqrestore(&cq->cq_lock, flags);
}
-int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
struct ocrdma_eq *eq = NULL;
@@ -1080,14 +1074,13 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
dev->cq_tbl[cq->id] = NULL;
indx = ocrdma_get_eq_table_index(dev, cq->eqn);
- BUG_ON(indx == -EINVAL);
eq = &dev->eq_tbl[indx];
irq = ocrdma_get_irq(dev, eq);
synchronize_irq(irq);
ocrdma_flush_cq(cq);
- (void)ocrdma_mbx_destroy_cq(dev, cq);
+ ocrdma_mbx_destroy_cq(dev, cq);
if (cq->ucontext) {
pdid = cq->ucontext->cntxt_pd->id;
ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
@@ -1096,9 +1089,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
ocrdma_get_db_addr(dev, pdid),
dev->nic_info.db_page_size);
}
-
- kfree(cq);
- return 0;
}
static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index d76aae7ed0d3..32488da1b752 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -71,11 +71,10 @@ int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
struct ib_qp *ocrdma_create_qp(struct ib_pd *,
struct ib_qp_init_attr *attrs,
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 5ebf3c53b3fb..533157a2a3be 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -183,6 +183,10 @@ static void qedr_roce_register_device(struct qedr_dev *dev)
}
static const struct ib_device_ops qedr_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_QEDR,
+ .uverbs_abi_ver = QEDR_ABI_VERSION,
+
.alloc_mr = qedr_alloc_mr,
.alloc_pd = qedr_alloc_pd,
.alloc_ucontext = qedr_alloc_ucontext,
@@ -220,6 +224,7 @@ static const struct ib_device_ops qedr_dev_ops = {
.resize_cq = qedr_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext),
@@ -231,8 +236,6 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.node_guid = dev->attr.node_guid;
memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
- dev->ibdev.owner = THIS_MODULE;
- dev->ibdev.uverbs_abi_ver = QEDR_ABI_VERSION;
dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) |
QEDR_UVERBS(QUERY_DEVICE) |
@@ -274,7 +277,6 @@ static int qedr_register_device(struct qedr_dev *dev)
rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group);
ib_set_device_ops(&dev->ibdev, &qedr_dev_ops);
- dev->ibdev.driver_id = RDMA_DRIVER_QEDR;
rc = ib_device_set_netdev(&dev->ibdev, dev->ndev, 1);
if (rc)
return rc;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 3d7bde19838e..27d90a84ea01 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -159,54 +159,47 @@ int qedr_query_device(struct ib_device *ibdev,
return 0;
}
-#define QEDR_SPEED_SDR (1)
-#define QEDR_SPEED_DDR (2)
-#define QEDR_SPEED_QDR (4)
-#define QEDR_SPEED_FDR10 (8)
-#define QEDR_SPEED_FDR (16)
-#define QEDR_SPEED_EDR (32)
-
static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
u8 *ib_width)
{
switch (speed) {
case 1000:
- *ib_speed = QEDR_SPEED_SDR;
+ *ib_speed = IB_SPEED_SDR;
*ib_width = IB_WIDTH_1X;
break;
case 10000:
- *ib_speed = QEDR_SPEED_QDR;
+ *ib_speed = IB_SPEED_QDR;
*ib_width = IB_WIDTH_1X;
break;
case 20000:
- *ib_speed = QEDR_SPEED_DDR;
+ *ib_speed = IB_SPEED_DDR;
*ib_width = IB_WIDTH_4X;
break;
case 25000:
- *ib_speed = QEDR_SPEED_EDR;
+ *ib_speed = IB_SPEED_EDR;
*ib_width = IB_WIDTH_1X;
break;
case 40000:
- *ib_speed = QEDR_SPEED_QDR;
+ *ib_speed = IB_SPEED_QDR;
*ib_width = IB_WIDTH_4X;
break;
case 50000:
- *ib_speed = QEDR_SPEED_QDR;
- *ib_width = IB_WIDTH_4X;
+ *ib_speed = IB_SPEED_HDR;
+ *ib_width = IB_WIDTH_1X;
break;
case 100000:
- *ib_speed = QEDR_SPEED_EDR;
+ *ib_speed = IB_SPEED_EDR;
*ib_width = IB_WIDTH_4X;
break;
default:
/* Unsupported */
- *ib_speed = QEDR_SPEED_SDR;
+ *ib_speed = IB_SPEED_SDR;
*ib_width = IB_WIDTH_1X;
}
}
@@ -813,20 +806,20 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
return 0;
}
-struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
udata, struct qedr_ucontext, ibucontext);
struct qed_rdma_destroy_cq_out_params destroy_oparams;
struct qed_rdma_destroy_cq_in_params destroy_iparams;
struct qedr_dev *dev = get_qedr_dev(ibdev);
struct qed_rdma_create_cq_in_params params;
- struct qedr_create_cq_ureq ureq;
+ struct qedr_create_cq_ureq ureq = {};
int vector = attr->comp_vector;
int entries = attr->cqe;
- struct qedr_cq *cq;
+ struct qedr_cq *cq = get_qedr_cq(ibcq);
int chain_entries;
int page_cnt;
u64 pbl_ptr;
@@ -841,18 +834,13 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
DP_ERR(dev,
"create cq: the number of entries %d is too high. Must be equal or below %d.\n",
entries, QEDR_MAX_CQES);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
chain_entries = qedr_align_cq_entries(entries);
chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
-
if (udata) {
- memset(&ureq, 0, sizeof(ureq));
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
DP_ERR(dev,
"create cq: problem copying data from user space\n");
@@ -930,7 +918,7 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
"create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
cq->icid, cq, params.cq_size);
- return &cq->ibcq;
+ return 0;
err3:
destroy_iparams.icid = cq->icid;
@@ -945,8 +933,7 @@ err1:
if (udata)
ib_umem_release(cq->q.umem);
err0:
- kfree(cq);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
@@ -962,14 +949,13 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
#define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
#define QEDR_DESTROY_CQ_ITER_DURATION (10)
-int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibcq->device);
struct qed_rdma_destroy_cq_out_params oparams;
struct qed_rdma_destroy_cq_in_params iparams;
struct qedr_cq *cq = get_qedr_cq(ibcq);
int iter;
- int rc;
DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
@@ -977,13 +963,10 @@ int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
/* GSIs CQs are handled by driver, so they don't exist in the FW */
if (cq->cq_type == QEDR_CQ_TYPE_GSI)
- goto done;
+ return;
iparams.icid = cq->icid;
- rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
- if (rc)
- return rc;
-
+ dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
dev->ops->common->chain_free(dev->cdev, &cq->pbl);
if (udata) {
@@ -1014,27 +997,11 @@ int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
iter--;
}
- if (oparams.num_cq_notif != cq->cnq_notif)
- goto err;
-
/* Note that we don't need to have explicit code to wait for the
* completion of the event handler because it is invoked from the EQ.
* Since the destroy CQ ramrod has also been received on the EQ we can
* be certain that there's no event handler in process.
*/
-done:
- cq->sig = ~cq->sig;
-
- kfree(cq);
-
- return 0;
-
-err:
- DP_ERR(dev,
- "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
- cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
-
- return -EINVAL;
}
static inline int get_gid_info_from_table(struct ib_qp *ibqp,
@@ -1605,12 +1572,10 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
{
- if (qp->usq.umem)
- ib_umem_release(qp->usq.umem);
+ ib_umem_release(qp->usq.umem);
qp->usq.umem = NULL;
- if (qp->urq.umem)
- ib_umem_release(qp->urq.umem);
+ ib_umem_release(qp->urq.umem);
qp->urq.umem = NULL;
}
@@ -2713,8 +2678,7 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
/* it could be user registered memory. */
- if (mr->umem)
- ib_umem_release(mr->umem);
+ ib_umem_release(mr->umem);
kfree(mr);
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 9328c80375ef..9aaa90283d6e 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -50,11 +50,10 @@ int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs,
struct ib_udata *);
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index a81905df2d0f..8d0563ef5be1 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 - 2019 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -398,7 +398,7 @@ int qib_check_send_wqe(struct rvt_qp *qp,
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ ah = rvt_get_swqe_ah(wqe);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
/* progress hint */
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 2ac4c67f5ba1..1d5e2d4ee257 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -921,20 +921,11 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
rvt_add_retry_timer(qp);
while (qp->s_last != qp->s_acked) {
- u32 s_last;
-
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
- s_last = qp->s_last;
- if (++s_last >= qp->s_size)
- s_last = 0;
- qp->s_last = s_last;
- /* see post_send() */
- barrier();
- rvt_put_qp_swqe(qp, wqe);
- rvt_qp_swqe_complete(qp,
+ rvt_qp_complete_swqe(qp,
wqe,
ib_qib_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
@@ -972,21 +963,12 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
* is finished.
*/
if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
- qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
- u32 s_last;
-
- rvt_put_qp_swqe(qp, wqe);
- s_last = qp->s_last;
- if (++s_last >= qp->s_size)
- s_last = 0;
- qp->s_last = s_last;
- /* see post_send() */
- barrier();
- rvt_qp_swqe_complete(qp,
+ qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0)
+ rvt_qp_complete_swqe(qp,
wqe,
ib_qib_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
- } else
+ else
this_cpu_inc(*ibp->rvp.rc_delayed_comp);
qp->s_retry = qp->s_retry_cnt;
@@ -1909,8 +1891,7 @@ send_last:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- ib_bth_is_solicited(ohdr));
+ rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_FIRST):
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 30c70ad0f4bf..e17b91e2c22a 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -400,8 +400,7 @@ last_imm:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- ib_bth_is_solicited(ohdr));
+ rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_FIRST):
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 5cdedba2d164..93ca21347959 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2012 - 2019 Intel Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -63,7 +64,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
enum ib_qp_type sqptype, dqptype;
rcu_read_lock();
- qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn);
+ qp = rvt_lookup_qpn(rdi, &ibp->rvp, rvt_get_swqe_remote_qpn(swqe));
if (!qp) {
ibp->rvp.n_pkt_drops++;
goto drop;
@@ -80,7 +81,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto drop;
}
- ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(swqe);
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
@@ -110,8 +111,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_num) {
u32 qkey;
- qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
- sqp->qkey : swqe->ud_wr.remote_qkey;
+ qkey = (int)rvt_get_swqe_remote_qkey(swqe) < 0 ?
+ sqp->qkey : rvt_get_swqe_remote_qkey(swqe);
if (unlikely(qkey != qp->qkey))
goto drop;
}
@@ -203,15 +204,14 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
wc.qp = &qp->ibqp;
wc.src_qp = sqp->ibqp.qp_num;
wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
- swqe->ud_wr.pkey_index : 0;
+ rvt_get_swqe_pkey_index(swqe) : 0;
wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
wc.sl = rdma_ah_get_sl(ah_attr);
wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- swqe->wr.send_flags & IB_SEND_SOLICITED);
+ rvt_recv_cq(qp, &wc, swqe->wr.send_flags & IB_SEND_SOLICITED);
ibp->rvp.n_loop_pkts++;
bail_unlock:
spin_unlock_irqrestore(&qp->r_lock, flags);
@@ -271,7 +271,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
if (rdma_ah_get_dlid(ah_attr) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
if (rdma_ah_get_dlid(ah_attr) !=
be16_to_cpu(IB_LID_PERMISSIVE))
@@ -363,7 +363,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
bth0 |= extra_bytes << 20;
bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
- wqe->ud_wr.pkey_index : qp->s_pkey_index);
+ rvt_get_swqe_pkey_index(wqe) : qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
/*
* Use the multicast QP if the destination LID is a multicast LID.
@@ -372,14 +372,15 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
be16_to_cpu(IB_MULTICAST_LID_BASE) &&
rdma_ah_get_dlid(ah_attr) != be16_to_cpu(IB_LID_PERMISSIVE) ?
cpu_to_be32(QIB_MULTICAST_QPN) :
- cpu_to_be32(wqe->ud_wr.remote_qpn);
+ cpu_to_be32(rvt_get_swqe_remote_qpn(wqe));
ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
- qp->qkey : wqe->ud_wr.remote_qkey);
+ ohdr->u.ud.deth[0] =
+ cpu_to_be32((int)rvt_get_swqe_remote_qkey(wqe) < 0 ? qp->qkey :
+ rvt_get_swqe_remote_qkey(wqe));
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
@@ -573,8 +574,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- ib_bth_is_solicited(ohdr));
+ rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
return;
drop:
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index f712fb7fa82f..bfbfbb7e0ff4 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -40,13 +40,10 @@
static void __qib_release_user_pages(struct page **p, size_t num_pages,
int dirty)
{
- size_t i;
-
- for (i = 0; i < num_pages; i++) {
- if (dirty)
- set_page_dirty_lock(p[i]);
- put_page(p[i]);
- }
+ if (dirty)
+ put_user_pages_dirty_lock(p, num_pages);
+ else
+ put_user_pages(p, num_pages);
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 0c204776263f..05190edc2611 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -317,7 +317,7 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
* the caller can ignore this page.
*/
if (put) {
- put_page(page);
+ put_user_page(page);
} else {
/* coalesce case */
kunmap(page);
@@ -631,7 +631,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
kunmap(pkt->addr[i].page);
if (pkt->addr[i].put_page)
- put_page(pkt->addr[i].page);
+ put_user_page(pkt->addr[i].page);
else
__free_page(pkt->addr[i].page);
} else if (pkt->addr[i].kvaddr) {
@@ -706,7 +706,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
/* if error, return all pages not managed by pkt */
free_pages:
while (i < j)
- put_page(pages[i++]);
+ put_user_page(pages[i++]);
done:
return ret;
@@ -904,10 +904,11 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
}
if (frag_size) {
- int pktsize, tidsmsize, n;
+ int tidsmsize, n;
+ size_t pktsize;
n = npages*((2*PAGE_SIZE/frag_size)+1);
- pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n;
+ pktsize = struct_size(pkt, addr, n);
/*
* Determine if this is tid-sdma or just sdma.
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 2c4e569ce438..33778d451b82 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1480,6 +1480,9 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
}
static const struct ib_device_ops qib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_QIB,
+
.init_port = qib_create_port_files,
.modify_device = qib_modify_device,
.process_mad = qib_process_mad,
@@ -1543,7 +1546,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
if (!ib_qib_sys_image_guid)
ib_qib_sys_image_guid = ppd->guid;
- ibdev->owner = THIS_MODULE;
ibdev->node_guid = ppd->guid;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
@@ -1614,7 +1616,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group);
ib_set_device_ops(ibdev, &qib_dev_ops);
- ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB);
+ ret = rvt_register_device(&dd->verbs_dev.rdi);
if (ret)
goto err_tx;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h
index 525bf272671e..84dd682d2334 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib.h
@@ -61,6 +61,10 @@ struct usnic_ib_pd {
struct usnic_uiom_pd *umem_pd;
};
+struct usnic_ib_cq {
+ struct ib_cq ibcq;
+};
+
struct usnic_ib_mr {
struct ib_mr ibmr;
struct usnic_uiom_reg *umem;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 34c1f9d6c915..03f54eb9404b 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -329,6 +329,10 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str)
}
static const struct ib_device_ops usnic_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_USNIC,
+ .uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION,
+
.alloc_pd = usnic_ib_alloc_pd,
.alloc_ucontext = usnic_ib_alloc_ucontext,
.create_cq = usnic_ib_create_cq,
@@ -350,6 +354,7 @@ static const struct ib_device_ops usnic_dev_ops = {
.query_qp = usnic_ib_query_qp,
.reg_user_mr = usnic_ib_reg_mr,
INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_cq, usnic_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, usnic_ib_ucontext, ibucontext),
};
@@ -384,12 +389,10 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->pdev = dev;
us_ibdev->netdev = pci_get_drvdata(dev);
- us_ibdev->ib_dev.owner = THIS_MODULE;
us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
us_ibdev->ib_dev.dev.parent = &dev->dev;
- us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
us_ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -412,7 +415,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops);
- us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
ret = ib_device_set_netdev(&us_ibdev->ib_dev, us_ibdev->netdev, 1);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index e9352750e029..eeb07b245ef9 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -587,28 +587,18 @@ out_unlock:
return status;
}
-struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
- struct ib_cq *cq;
-
- usnic_dbg("\n");
if (attr->flags)
- return ERR_PTR(-EINVAL);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-EBUSY);
+ return -EINVAL;
- return cq;
+ return 0;
}
-int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
- usnic_dbg("\n");
- kfree(cq);
- return 0;
+ return;
}
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 028f322f8e9b..2aedf78c13cf 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -58,10 +58,9 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
-struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index e312f522a66d..0b0237d41613 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -75,9 +75,10 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
for_each_sg(chunk->page_list, sg, chunk->nents, i) {
page = sg_page(sg);
pa = sg_phys(sg);
- if (!PageDirty(page) && dirty)
- set_page_dirty_lock(page);
- put_page(page);
+ if (dirty)
+ put_user_pages_dirty_lock(&page, 1);
+ else
+ put_user_page(page);
usnic_dbg("pa: %pa\n", &pa);
}
kfree(chunk);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 3c633ab58052..c142f5e7f25f 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -456,7 +456,7 @@ static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op)
return PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP;
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
return PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD;
- case IB_WR_REG_SIG_MR:
+ case IB_WR_REG_MR_INTEGRITY:
return PVRDMA_WR_REG_SIG_MR;
default:
return PVRDMA_WR_ERROR;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index d7deb19a2800..7800e6930502 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -92,20 +92,19 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq,
/**
* pvrdma_create_cq - create completion queue
- * @ibdev: the device
+ * @ibcq: Allocated CQ
* @attr: completion queue attributes
* @udata: user data
*
- * @return: ib_cq completion queue pointer on success,
- * otherwise returns negative errno.
+ * @return: 0 on success
*/
-struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
struct pvrdma_dev *dev = to_vdev(ibdev);
- struct pvrdma_cq *cq;
+ struct pvrdma_cq *cq = to_vcq(ibcq);
int ret;
int npages;
unsigned long flags;
@@ -113,7 +112,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
union pvrdma_cmd_resp rsp;
struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
- struct pvrdma_create_cq_resp cq_resp = {0};
+ struct pvrdma_create_cq_resp cq_resp = {};
struct pvrdma_create_cq ucmd;
struct pvrdma_ucontext *context = rdma_udata_to_drv_context(
udata, struct pvrdma_ucontext, ibucontext);
@@ -122,16 +121,10 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
entries = roundup_pow_of_two(entries);
if (entries < 1 || entries > dev->dsr->caps.max_cqe)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
- return ERR_PTR(-ENOMEM);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq) {
- atomic_dec(&dev->num_cqs);
- return ERR_PTR(-ENOMEM);
- }
+ return -ENOMEM;
cq->ibcq.cqe = entries;
cq->is_kernel = !udata;
@@ -211,22 +204,19 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
dev_warn(&dev->pdev->dev,
"failed to copy back udata\n");
pvrdma_destroy_cq(&cq->ibcq, udata);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
}
- return &cq->ibcq;
+ return 0;
err_page_dir:
pvrdma_page_dir_cleanup(dev, &cq->pdir);
err_umem:
- if (!cq->is_kernel)
- ib_umem_release(cq->umem);
+ ib_umem_release(cq->umem);
err_cq:
atomic_dec(&dev->num_cqs);
- kfree(cq);
-
- return ERR_PTR(ret);
+ return ret;
}
static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
@@ -235,21 +225,17 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
complete(&cq->free);
wait_for_completion(&cq->free);
- if (!cq->is_kernel)
- ib_umem_release(cq->umem);
+ ib_umem_release(cq->umem);
pvrdma_page_dir_cleanup(dev, &cq->pdir);
- kfree(cq);
}
/**
* pvrdma_destroy_cq - destroy completion queue
* @cq: the completion queue to destroy.
* @udata: user data or null for kernel object
- *
- * @return: 0 for success.
*/
-int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct pvrdma_cq *vcq = to_vcq(cq);
union pvrdma_cmd_req req;
@@ -275,8 +261,6 @@ int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
pvrdma_free_cq(dev, vcq);
atomic_dec(&dev->num_cqs);
-
- return ret;
}
static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 40182297f87f..e580ae9cc55a 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -144,6 +144,10 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
}
static const struct ib_device_ops pvrdma_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_VMW_PVRDMA,
+ .uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION,
+
.add_gid = pvrdma_add_gid,
.alloc_mr = pvrdma_alloc_mr,
.alloc_pd = pvrdma_alloc_pd,
@@ -178,6 +182,7 @@ static const struct ib_device_ops pvrdma_dev_ops = {
.req_notify_cq = pvrdma_req_notify_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext),
};
@@ -198,10 +203,8 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
dev->flags = 0;
- dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.num_comp_vectors = 1;
dev->ib_dev.dev.parent = &dev->pdev->dev;
- dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -261,7 +264,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
if (!dev->srq_tbl)
goto err_qp_free;
}
- dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA;
ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1);
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index 65dc47ffb8f3..f3a3d22ee8d7 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -290,8 +290,7 @@ int pvrdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
"could not deregister mem region, error: %d\n", ret);
pvrdma_page_dir_cleanup(dev, &mr->pdir);
- if (mr->umem)
- ib_umem_release(mr->umem);
+ ib_umem_release(mr->umem);
kfree(mr->pages);
kfree(mr);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 0eaaead5baec..bca6a58a442e 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -391,12 +391,8 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
err_pdir:
pvrdma_page_dir_cleanup(dev, &qp->pdir);
err_umem:
- if (!qp->is_kernel) {
- if (qp->rumem)
- ib_umem_release(qp->rumem);
- if (qp->sumem)
- ib_umem_release(qp->sumem);
- }
+ ib_umem_release(qp->rumem);
+ ib_umem_release(qp->sumem);
err_qp:
kfree(qp);
atomic_dec(&dev->num_qps);
@@ -429,12 +425,8 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
complete(&qp->free);
wait_for_completion(&qp->free);
- if (!qp->is_kernel) {
- if (qp->rumem)
- ib_umem_release(qp->rumem);
- if (qp->sumem)
- ib_umem_release(qp->sumem);
- }
+ ib_umem_release(qp->rumem);
+ ib_umem_release(qp->sumem);
pvrdma_page_dir_cleanup(dev, &qp->pdir);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 9d7b021e1c59..e4a48f5c0c85 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -409,10 +409,9 @@ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg, struct ib_udata *udata);
int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
-struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile
index ab48a9b60844..68e0230f8f31 100644
--- a/drivers/infiniband/sw/Makefile
+++ b/drivers/infiniband/sw/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/
obj-$(CONFIG_RDMA_RXE) += rxe/
+obj-$(CONFIG_RDMA_SIW) += siw/
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 0e147b32cbe9..fe99da0ff060 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -119,8 +119,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
rdma_copy_ah_attr(&ah->attr, ah_attr);
- atomic_set(&ah->refcount, 0);
-
if (dev->driver_f.notify_new_ah)
dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah);
@@ -141,8 +139,6 @@ void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
struct rvt_ah *ah = ibah_to_rvtah(ibah);
unsigned long flags;
- WARN_ON_ONCE(atomic_read(&ah->refcount));
-
spin_lock_irqsave(&dev->n_ahs_lock, flags);
dev->n_ahs_allocated--;
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index a06e6da7a026..a85571a4cf57 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -60,22 +60,39 @@ static struct workqueue_struct *comp_vector_wq;
* @solicited: true if @entry is solicited
*
* This may be called with qp->s_lock held.
+ *
+ * Return: return true on success, else return
+ * false if cq is full.
*/
-void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
+bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
{
- struct rvt_cq_wc *wc;
+ struct ib_uverbs_wc *uqueue = NULL;
+ struct ib_wc *kqueue = NULL;
+ struct rvt_cq_wc *u_wc = NULL;
+ struct rvt_k_cq_wc *k_wc = NULL;
unsigned long flags;
u32 head;
u32 next;
+ u32 tail;
spin_lock_irqsave(&cq->lock, flags);
+ if (cq->ip) {
+ u_wc = cq->queue;
+ uqueue = &u_wc->uqueue[0];
+ head = RDMA_READ_UAPI_ATOMIC(u_wc->head);
+ tail = RDMA_READ_UAPI_ATOMIC(u_wc->tail);
+ } else {
+ k_wc = cq->kqueue;
+ kqueue = &k_wc->kqueue[0];
+ head = k_wc->head;
+ tail = k_wc->tail;
+ }
+
/*
- * Note that the head pointer might be writable by user processes.
- * Take care to verify it is a sane value.
+ * Note that the head pointer might be writable by
+ * user processes.Take care to verify it is a sane value.
*/
- wc = cq->queue;
- head = wc->head;
if (head >= (unsigned)cq->ibcq.cqe) {
head = cq->ibcq.cqe;
next = 0;
@@ -83,7 +100,12 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
next = head + 1;
}
- if (unlikely(next == wc->tail)) {
+ if (unlikely(next == tail || cq->cq_full)) {
+ struct rvt_dev_info *rdi = cq->rdi;
+
+ if (!cq->cq_full)
+ rvt_pr_err_ratelimited(rdi, "CQ is full!\n");
+ cq->cq_full = true;
spin_unlock_irqrestore(&cq->lock, flags);
if (cq->ibcq.event_handler) {
struct ib_event ev;
@@ -93,30 +115,30 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
ev.event = IB_EVENT_CQ_ERR;
cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
}
- return;
+ return false;
}
trace_rvt_cq_enter(cq, entry, head);
- if (cq->ip) {
- wc->uqueue[head].wr_id = entry->wr_id;
- wc->uqueue[head].status = entry->status;
- wc->uqueue[head].opcode = entry->opcode;
- wc->uqueue[head].vendor_err = entry->vendor_err;
- wc->uqueue[head].byte_len = entry->byte_len;
- wc->uqueue[head].ex.imm_data = entry->ex.imm_data;
- wc->uqueue[head].qp_num = entry->qp->qp_num;
- wc->uqueue[head].src_qp = entry->src_qp;
- wc->uqueue[head].wc_flags = entry->wc_flags;
- wc->uqueue[head].pkey_index = entry->pkey_index;
- wc->uqueue[head].slid = ib_lid_cpu16(entry->slid);
- wc->uqueue[head].sl = entry->sl;
- wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
- wc->uqueue[head].port_num = entry->port_num;
+ if (uqueue) {
+ uqueue[head].wr_id = entry->wr_id;
+ uqueue[head].status = entry->status;
+ uqueue[head].opcode = entry->opcode;
+ uqueue[head].vendor_err = entry->vendor_err;
+ uqueue[head].byte_len = entry->byte_len;
+ uqueue[head].ex.imm_data = entry->ex.imm_data;
+ uqueue[head].qp_num = entry->qp->qp_num;
+ uqueue[head].src_qp = entry->src_qp;
+ uqueue[head].wc_flags = entry->wc_flags;
+ uqueue[head].pkey_index = entry->pkey_index;
+ uqueue[head].slid = ib_lid_cpu16(entry->slid);
+ uqueue[head].sl = entry->sl;
+ uqueue[head].dlid_path_bits = entry->dlid_path_bits;
+ uqueue[head].port_num = entry->port_num;
/* Make sure entry is written before the head index. */
- smp_wmb();
+ RDMA_WRITE_UAPI_ATOMIC(u_wc->head, next);
} else {
- wc->kqueue[head] = *entry;
+ kqueue[head] = *entry;
+ k_wc->head = next;
}
- wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED &&
@@ -132,6 +154,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
}
spin_unlock_irqrestore(&cq->lock, flags);
+ return true;
}
EXPORT_SYMBOL(rvt_cq_enter);
@@ -166,43 +189,38 @@ static void send_complete(struct work_struct *work)
/**
* rvt_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
+ * @ibcq: Allocated CQ
* @attr: creation attributes
* @udata: user data for libibverbs.so
*
* Called by ib_create_cq() in the generic verbs code.
*
- * Return: pointer to the completion queue or negative errno values
- * for failure.
+ * Return: 0 on success
*/
-struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
- struct rvt_cq *cq;
- struct rvt_cq_wc *wc;
- struct ib_cq *ret;
+ struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+ struct rvt_cq_wc *u_wc = NULL;
+ struct rvt_k_cq_wc *k_wc = NULL;
u32 sz;
unsigned int entries = attr->cqe;
int comp_vector = attr->comp_vector;
+ int err;
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (entries < 1 || entries > rdi->dparms.props.max_cqe)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (comp_vector < 0)
comp_vector = 0;
comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
- /* Allocate the completion queue structure. */
- cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
- if (!cq)
- return ERR_PTR(-ENOMEM);
-
/*
* Allocate the completion queue entries and head/tail pointers.
* This is allocated separately so that it can be resized and
@@ -210,17 +228,18 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
* We need to use vmalloc() in order to support mmap and large
* numbers of entries.
*/
- sz = sizeof(*wc);
- if (udata && udata->outlen >= sizeof(__u64))
- sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
- else
- sz += sizeof(struct ib_wc) * (entries + 1);
- wc = udata ?
- vmalloc_user(sz) :
- vzalloc_node(sz, rdi->dparms.node);
- if (!wc) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_cq;
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ sz = sizeof(struct ib_uverbs_wc) * (entries + 1);
+ sz += sizeof(*u_wc);
+ u_wc = vmalloc_user(sz);
+ if (!u_wc)
+ return -ENOMEM;
+ } else {
+ sz = sizeof(struct ib_wc) * (entries + 1);
+ sz += sizeof(*k_wc);
+ k_wc = vzalloc_node(sz, rdi->dparms.node);
+ if (!k_wc)
+ return -ENOMEM;
}
/*
@@ -228,26 +247,22 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
* See rvt_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
- int err;
-
- cq->ip = rvt_create_mmap_info(rdi, sz, udata, wc);
+ cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc);
if (!cq->ip) {
- ret = ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
goto bail_wc;
}
err = ib_copy_to_udata(udata, &cq->ip->offset,
sizeof(cq->ip->offset));
- if (err) {
- ret = ERR_PTR(err);
+ if (err)
goto bail_ip;
- }
}
spin_lock_irq(&rdi->n_cqs_lock);
if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
spin_unlock_irq(&rdi->n_cqs_lock);
- ret = ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
goto bail_ip;
}
@@ -277,21 +292,20 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
cq->notify = RVT_CQ_NONE;
spin_lock_init(&cq->lock);
INIT_WORK(&cq->comptask, send_complete);
- cq->queue = wc;
-
- ret = &cq->ibcq;
+ if (u_wc)
+ cq->queue = u_wc;
+ else
+ cq->kqueue = k_wc;
trace_rvt_create_cq(cq, attr);
- goto done;
+ return 0;
bail_ip:
kfree(cq->ip);
bail_wc:
- vfree(wc);
-bail_cq:
- kfree(cq);
-done:
- return ret;
+ vfree(u_wc);
+ vfree(k_wc);
+ return err;
}
/**
@@ -300,10 +314,8 @@ done:
* @udata: user data or NULL for kernel object
*
* Called by ib_destroy_cq() in the generic verbs code.
- *
- * Return: always 0
*/
-int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
struct rvt_dev_info *rdi = cq->rdi;
@@ -316,9 +328,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
vfree(cq->queue);
- kfree(cq);
-
- return 0;
}
/**
@@ -345,9 +354,16 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
- if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
- cq->queue->head != cq->queue->tail)
- ret = 1;
+ if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+ if (cq->queue) {
+ if (RDMA_READ_UAPI_ATOMIC(cq->queue->head) !=
+ RDMA_READ_UAPI_ATOMIC(cq->queue->tail))
+ ret = 1;
+ } else {
+ if (cq->kqueue->head != cq->kqueue->tail)
+ ret = 1;
+ }
+ }
spin_unlock_irqrestore(&cq->lock, flags);
@@ -363,12 +379,14 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
- struct rvt_cq_wc *old_wc;
- struct rvt_cq_wc *wc;
u32 head, tail, n;
int ret;
u32 sz;
struct rvt_dev_info *rdi = cq->rdi;
+ struct rvt_cq_wc *u_wc = NULL;
+ struct rvt_cq_wc *old_u_wc = NULL;
+ struct rvt_k_cq_wc *k_wc = NULL;
+ struct rvt_k_cq_wc *old_k_wc = NULL;
if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
return -EINVAL;
@@ -376,17 +394,19 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
- sz = sizeof(*wc);
- if (udata && udata->outlen >= sizeof(__u64))
- sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
- else
- sz += sizeof(struct ib_wc) * (cqe + 1);
- wc = udata ?
- vmalloc_user(sz) :
- vzalloc_node(sz, rdi->dparms.node);
- if (!wc)
- return -ENOMEM;
-
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ sz = sizeof(struct ib_uverbs_wc) * (cqe + 1);
+ sz += sizeof(*u_wc);
+ u_wc = vmalloc_user(sz);
+ if (!u_wc)
+ return -ENOMEM;
+ } else {
+ sz = sizeof(struct ib_wc) * (cqe + 1);
+ sz += sizeof(*k_wc);
+ k_wc = vzalloc_node(sz, rdi->dparms.node);
+ if (!k_wc)
+ return -ENOMEM;
+ }
/* Check that we can write the offset to mmap. */
if (udata && udata->outlen >= sizeof(__u64)) {
__u64 offset = 0;
@@ -401,11 +421,18 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
* Make sure head and tail are sane since they
* might be user writable.
*/
- old_wc = cq->queue;
- head = old_wc->head;
+ if (u_wc) {
+ old_u_wc = cq->queue;
+ head = RDMA_READ_UAPI_ATOMIC(old_u_wc->head);
+ tail = RDMA_READ_UAPI_ATOMIC(old_u_wc->tail);
+ } else {
+ old_k_wc = cq->kqueue;
+ head = old_k_wc->head;
+ tail = old_k_wc->tail;
+ }
+
if (head > (u32)cq->ibcq.cqe)
head = (u32)cq->ibcq.cqe;
- tail = old_wc->tail;
if (tail > (u32)cq->ibcq.cqe)
tail = (u32)cq->ibcq.cqe;
if (head < tail)
@@ -417,27 +444,36 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
goto bail_unlock;
}
for (n = 0; tail != head; n++) {
- if (cq->ip)
- wc->uqueue[n] = old_wc->uqueue[tail];
+ if (u_wc)
+ u_wc->uqueue[n] = old_u_wc->uqueue[tail];
else
- wc->kqueue[n] = old_wc->kqueue[tail];
+ k_wc->kqueue[n] = old_k_wc->kqueue[tail];
if (tail == (u32)cq->ibcq.cqe)
tail = 0;
else
tail++;
}
cq->ibcq.cqe = cqe;
- wc->head = n;
- wc->tail = 0;
- cq->queue = wc;
+ if (u_wc) {
+ RDMA_WRITE_UAPI_ATOMIC(u_wc->head, n);
+ RDMA_WRITE_UAPI_ATOMIC(u_wc->tail, 0);
+ cq->queue = u_wc;
+ } else {
+ k_wc->head = n;
+ k_wc->tail = 0;
+ cq->kqueue = k_wc;
+ }
spin_unlock_irq(&cq->lock);
- vfree(old_wc);
+ if (u_wc)
+ vfree(old_u_wc);
+ else
+ vfree(old_k_wc);
if (cq->ip) {
struct rvt_mmap_info *ip = cq->ip;
- rvt_update_mmap_info(rdi, ip, sz, wc);
+ rvt_update_mmap_info(rdi, ip, sz, u_wc);
/*
* Return the offset to mmap.
@@ -461,7 +497,9 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
bail_unlock:
spin_unlock_irq(&cq->lock);
bail_free:
- vfree(wc);
+ vfree(u_wc);
+ vfree(k_wc);
+
return ret;
}
@@ -479,7 +517,7 @@ bail_free:
int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
- struct rvt_cq_wc *wc;
+ struct rvt_k_cq_wc *wc;
unsigned long flags;
int npolled;
u32 tail;
@@ -490,7 +528,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
spin_lock_irqsave(&cq->lock, flags);
- wc = cq->queue;
+ wc = cq->kqueue;
tail = wc->tail;
if (tail > (u32)cq->ibcq.cqe)
tail = (u32)cq->ibcq.cqe;
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
index 3ad6faf18ecb..5e26a2eb19a4 100644
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -51,10 +51,9 @@
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_cq.h>
-struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index f48240f66b8f..a6a39f01dca3 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -562,8 +562,7 @@ int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (ret)
goto out;
rvt_deinit_mregion(&mr->mr);
- if (mr->umem)
- ib_umem_release(mr->umem);
+ ib_umem_release(mr->umem);
kfree(mr);
out:
return ret;
@@ -613,8 +612,8 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
n = mapped_segs % RVT_SEGSZ;
mr->mr.map[m]->segs[n].vaddr = (void *)addr;
mr->mr.map[m]->segs[n].length = ps;
- trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
mr->mr.length += ps;
+ trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
return 0;
}
@@ -643,6 +642,7 @@ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
mr->mr.iova = ibmr->iova;
mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
mr->mr.length = (size_t)ibmr->length;
+ trace_rvt_map_mr_sg(ibmr, sg_nents, sg_offset);
return ret;
}
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index c5a50614a6c6..0b0a241c57ff 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 - 2018 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -58,6 +58,8 @@
#include "vt.h"
#include "trace.h"
+#define RVT_RWQ_COUNT_THRESHOLD 16
+
static void rvt_rc_timeout(struct timer_list *t);
/*
@@ -803,6 +805,47 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
}
/**
+ * rvt_alloc_rq - allocate memory for user or kernel buffer
+ * @rq: receive queue data structure
+ * @size: number of request queue entries
+ * @node: The NUMA node
+ * @udata: True if user data is available or not false
+ *
+ * Return: If memory allocation failed, return -ENONEM
+ * This function is used by both shared receive
+ * queues and non-shared receive queues to allocate
+ * memory.
+ */
+int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node,
+ struct ib_udata *udata)
+{
+ if (udata) {
+ rq->wq = vmalloc_user(sizeof(struct rvt_rwq) + size);
+ if (!rq->wq)
+ goto bail;
+ /* need kwq with no buffers */
+ rq->kwq = kzalloc_node(sizeof(*rq->kwq), GFP_KERNEL, node);
+ if (!rq->kwq)
+ goto bail;
+ rq->kwq->curr_wq = rq->wq->wq;
+ } else {
+ /* need kwq with buffers */
+ rq->kwq =
+ vzalloc_node(sizeof(struct rvt_krwq) + size, node);
+ if (!rq->kwq)
+ goto bail;
+ rq->kwq->curr_wq = rq->kwq->wq;
+ }
+
+ spin_lock_init(&rq->kwq->p_lock);
+ spin_lock_init(&rq->kwq->c_lock);
+ return 0;
+bail:
+ rvt_free_rq(rq);
+ return -ENOMEM;
+}
+
+/**
* rvt_init_qp - initialize the QP state to the reset state
* @qp: the QP to init or reinit
* @type: the QP type
@@ -852,10 +895,8 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
qp->s_tail_ack_queue = 0;
qp->s_acked_ack_queue = 0;
qp->s_num_rd_atomic = 0;
- if (qp->r_rq.wq) {
- qp->r_rq.wq->head = 0;
- qp->r_rq.wq->tail = 0;
- }
+ if (qp->r_rq.kwq)
+ qp->r_rq.kwq->count = qp->r_rq.size;
qp->r_sge.num_sge = 0;
atomic_set(&qp->s_reserved_used, 0);
}
@@ -928,6 +969,61 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
}
/**
+ * get_allowed_ops - Given a QP type return the appropriate allowed OP
+ * @type: valid, supported, QP type
+ */
+static u8 get_allowed_ops(enum ib_qp_type type)
+{
+ return type == IB_QPT_RC ? IB_OPCODE_RC : type == IB_QPT_UC ?
+ IB_OPCODE_UC : IB_OPCODE_UD;
+}
+
+/**
+ * free_ud_wq_attr - Clean up AH attribute cache for UD QPs
+ * @qp: Valid QP with allowed_ops set
+ *
+ * The rvt_swqe data structure being used is a union, so this is
+ * only valid for UD QPs.
+ */
+static void free_ud_wq_attr(struct rvt_qp *qp)
+{
+ struct rvt_swqe *wqe;
+ int i;
+
+ for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) {
+ wqe = rvt_get_swqe_ptr(qp, i);
+ kfree(wqe->ud_wr.attr);
+ wqe->ud_wr.attr = NULL;
+ }
+}
+
+/**
+ * alloc_ud_wq_attr - AH attribute cache for UD QPs
+ * @qp: Valid QP with allowed_ops set
+ * @node: Numa node for allocation
+ *
+ * The rvt_swqe data structure being used is a union, so this is
+ * only valid for UD QPs.
+ */
+static int alloc_ud_wq_attr(struct rvt_qp *qp, int node)
+{
+ struct rvt_swqe *wqe;
+ int i;
+
+ for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) {
+ wqe = rvt_get_swqe_ptr(qp, i);
+ wqe->ud_wr.attr = kzalloc_node(sizeof(*wqe->ud_wr.attr),
+ GFP_KERNEL, node);
+ if (!wqe->ud_wr.attr) {
+ free_ud_wq_attr(qp);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/**
* rvt_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
* @init_attr: the attributes of the queue pair
@@ -989,9 +1085,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
case IB_QPT_UC:
case IB_QPT_RC:
case IB_QPT_UD:
- sz = sizeof(struct rvt_sge) *
- init_attr->cap.max_send_sge +
- sizeof(struct rvt_swqe);
+ sz = struct_size(swq, sg_list, init_attr->cap.max_send_sge);
swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node);
if (!swq)
return ERR_PTR(-ENOMEM);
@@ -1011,6 +1105,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
rdi->dparms.node);
if (!qp)
goto bail_swq;
+ qp->allowed_ops = get_allowed_ops(init_attr->qp_type);
RCU_INIT_POINTER(qp->next, NULL);
if (init_attr->qp_type == IB_QPT_RC) {
@@ -1048,17 +1143,12 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
sizeof(struct rvt_rwqe);
- if (udata)
- qp->r_rq.wq = vmalloc_user(
- sizeof(struct rvt_rwq) +
- qp->r_rq.size * sz);
- else
- qp->r_rq.wq = vzalloc_node(
- sizeof(struct rvt_rwq) +
- qp->r_rq.size * sz,
- rdi->dparms.node);
- if (!qp->r_rq.wq)
+ err = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz,
+ rdi->dparms.node, udata);
+ if (err) {
+ ret = ERR_PTR(err);
goto bail_driver_priv;
+ }
}
/*
@@ -1068,7 +1158,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_lock);
spin_lock_init(&qp->s_hlock);
spin_lock_init(&qp->s_lock);
- spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
atomic_set(&qp->local_ops_pending, 0);
init_waitqueue_head(&qp->wait);
@@ -1080,6 +1169,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->s_max_sge = init_attr->cap.max_send_sge;
if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
qp->s_flags = RVT_S_SIGNAL_REQ_WR;
+ err = alloc_ud_wq_attr(qp, rdi->dparms.node);
+ if (err) {
+ ret = (ERR_PTR(err));
+ goto bail_driver_priv;
+ }
err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
init_attr->qp_type,
@@ -1172,28 +1266,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
ret = &qp->ibqp;
- /*
- * We have our QP and its good, now keep track of what types of opcodes
- * can be processed on this QP. We do this by keeping track of what the
- * 3 high order bits of the opcode are.
- */
- switch (init_attr->qp_type) {
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- case IB_QPT_UD:
- qp->allowed_ops = IB_OPCODE_UD;
- break;
- case IB_QPT_RC:
- qp->allowed_ops = IB_OPCODE_RC;
- break;
- case IB_QPT_UC:
- qp->allowed_ops = IB_OPCODE_UC;
- break;
- default:
- ret = ERR_PTR(-EINVAL);
- goto bail_ip;
- }
-
return ret;
bail_ip:
@@ -1204,8 +1276,8 @@ bail_qpn:
rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
bail_rq_wq:
- if (!qp->ip)
- vfree(qp->r_rq.wq);
+ rvt_free_rq(&qp->r_rq);
+ free_ud_wq_attr(qp);
bail_driver_priv:
rdi->driver_f.qp_priv_free(rdi, qp);
@@ -1271,19 +1343,26 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
}
wc.status = IB_WC_WR_FLUSH_ERR;
- if (qp->r_rq.wq) {
- struct rvt_rwq *wq;
+ if (qp->r_rq.kwq) {
u32 head;
u32 tail;
-
- spin_lock(&qp->r_rq.lock);
-
+ struct rvt_rwq *wq = NULL;
+ struct rvt_krwq *kwq = NULL;
+
+ spin_lock(&qp->r_rq.kwq->c_lock);
+ /* qp->ip used to validate if there is a user buffer mmaped */
+ if (qp->ip) {
+ wq = qp->r_rq.wq;
+ head = RDMA_READ_UAPI_ATOMIC(wq->head);
+ tail = RDMA_READ_UAPI_ATOMIC(wq->tail);
+ } else {
+ kwq = qp->r_rq.kwq;
+ head = kwq->head;
+ tail = kwq->tail;
+ }
/* sanity check pointers before trusting them */
- wq = qp->r_rq.wq;
- head = wq->head;
if (head >= qp->r_rq.size)
head = 0;
- tail = wq->tail;
if (tail >= qp->r_rq.size)
tail = 0;
while (tail != head) {
@@ -1292,9 +1371,11 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
tail = 0;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
}
- wq->tail = tail;
-
- spin_unlock(&qp->r_rq.lock);
+ if (qp->ip)
+ RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail);
+ else
+ kwq->tail = tail;
+ spin_unlock(&qp->r_rq.kwq->c_lock);
} else if (qp->ibqp.event_handler) {
ret = 1;
}
@@ -1636,12 +1717,12 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (qp->ip)
kref_put(&qp->ip->ref, rvt_release_mmap_info);
- else
- vfree(qp->r_rq.wq);
+ kvfree(qp->r_rq.kwq);
rdi->driver_f.qp_priv_free(rdi, qp);
kfree(qp->s_ack_queue);
rdma_destroy_ah_attr(&qp->remote_ah_attr);
rdma_destroy_ah_attr(&qp->alt_ah_attr);
+ free_ud_wq_attr(qp);
vfree(qp->s_wq);
kfree(qp);
return 0;
@@ -1723,7 +1804,7 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
- struct rvt_rwq *wq = qp->r_rq.wq;
+ struct rvt_krwq *wq = qp->r_rq.kwq;
unsigned long flags;
int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
!qp->ibqp.srq;
@@ -1744,12 +1825,12 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
return -EINVAL;
}
- spin_lock_irqsave(&qp->r_rq.lock, flags);
+ spin_lock_irqsave(&qp->r_rq.kwq->p_lock, flags);
next = wq->head + 1;
if (next >= qp->r_rq.size)
next = 0;
- if (next == wq->tail) {
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ if (next == READ_ONCE(wq->tail)) {
+ spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags);
*bad_wr = wr;
return -ENOMEM;
}
@@ -1766,16 +1847,18 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
wqe->wr_id = wr->wr_id;
wqe->num_sge = wr->num_sge;
- for (i = 0; i < wr->num_sge; i++)
- wqe->sg_list[i] = wr->sg_list[i];
+ for (i = 0; i < wr->num_sge; i++) {
+ wqe->sg_list[i].addr = wr->sg_list[i].addr;
+ wqe->sg_list[i].length = wr->sg_list[i].length;
+ wqe->sg_list[i].lkey = wr->sg_list[i].lkey;
+ }
/*
* Make sure queue entry is written
* before the head index.
*/
- smp_wmb();
- wq->head = next;
+ smp_store_release(&wq->head, next);
}
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags);
}
return 0;
}
@@ -1856,10 +1939,9 @@ static inline int rvt_qp_is_avail(
/* see rvt_qp_wqe_unreserve() */
smp_mb__before_atomic();
- reserved_used = atomic_read(&qp->s_reserved_used);
if (unlikely(reserved_op)) {
/* see rvt_qp_wqe_unreserve() */
- smp_mb__before_atomic();
+ reserved_used = atomic_read(&qp->s_reserved_used);
if (reserved_used >= rdi->dparms.reserved_operations)
return -ENOMEM;
return 0;
@@ -1867,14 +1949,13 @@ static inline int rvt_qp_is_avail(
/* non-reserved operations */
if (likely(qp->s_avail))
return 0;
- slast = READ_ONCE(qp->s_last);
+ /* See rvt_qp_complete_swqe() */
+ slast = smp_load_acquire(&qp->s_last);
if (qp->s_head >= slast)
avail = qp->s_size - (qp->s_head - slast);
else
avail = slast - qp->s_head;
- /* see rvt_qp_wqe_unreserve() */
- smp_mb__before_atomic();
reserved_used = atomic_read(&qp->s_reserved_used);
avail = avail - 1 -
(rdi->dparms.reserved_operations - reserved_used);
@@ -2011,10 +2092,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
*/
log_pmtu = qp->log_pmtu;
if (qp->allowed_ops == IB_OPCODE_UD) {
- struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ struct rvt_ah *ah = rvt_get_swqe_ah(wqe);
log_pmtu = ah->log_pmtu;
- atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
+ rdma_copy_ah_attr(wqe->ud_wr.attr, &ah->attr);
}
if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
@@ -2059,7 +2140,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
bail_inval_free_ref:
if (qp->allowed_ops == IB_OPCODE_UD)
- atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
+ rdma_destroy_ah_attr(wqe->ud_wr.attr);
bail_inval_free:
/* release mr holds */
while (j) {
@@ -2145,7 +2226,7 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
- struct rvt_rwq *wq;
+ struct rvt_krwq *wq;
unsigned long flags;
for (; wr; wr = wr->next) {
@@ -2158,13 +2239,13 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
return -EINVAL;
}
- spin_lock_irqsave(&srq->rq.lock, flags);
- wq = srq->rq.wq;
+ spin_lock_irqsave(&srq->rq.kwq->p_lock, flags);
+ wq = srq->rq.kwq;
next = wq->head + 1;
if (next >= srq->rq.size)
next = 0;
- if (next == wq->tail) {
- spin_unlock_irqrestore(&srq->rq.lock, flags);
+ if (next == READ_ONCE(wq->tail)) {
+ spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags);
*bad_wr = wr;
return -ENOMEM;
}
@@ -2172,17 +2253,35 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
wqe->wr_id = wr->wr_id;
wqe->num_sge = wr->num_sge;
- for (i = 0; i < wr->num_sge; i++)
- wqe->sg_list[i] = wr->sg_list[i];
+ for (i = 0; i < wr->num_sge; i++) {
+ wqe->sg_list[i].addr = wr->sg_list[i].addr;
+ wqe->sg_list[i].length = wr->sg_list[i].length;
+ wqe->sg_list[i].lkey = wr->sg_list[i].lkey;
+ }
/* Make sure queue entry is written before the head index. */
- smp_wmb();
- wq->head = next;
- spin_unlock_irqrestore(&srq->rq.lock, flags);
+ smp_store_release(&wq->head, next);
+ spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags);
}
return 0;
}
/*
+ * rvt used the internal kernel struct as part of its ABI, for now make sure
+ * the kernel struct does not change layout. FIXME: rvt should never cast the
+ * user struct to a kernel struct.
+ */
+static struct ib_sge *rvt_cast_sge(struct rvt_wqe_sge *sge)
+{
+ BUILD_BUG_ON(offsetof(struct ib_sge, addr) !=
+ offsetof(struct rvt_wqe_sge, addr));
+ BUILD_BUG_ON(offsetof(struct ib_sge, length) !=
+ offsetof(struct rvt_wqe_sge, length));
+ BUILD_BUG_ON(offsetof(struct ib_sge, lkey) !=
+ offsetof(struct rvt_wqe_sge, lkey));
+ return (struct ib_sge *)sge;
+}
+
+/*
* Validate a RWQE and fill in the SGE state.
* Return 1 if OK.
*/
@@ -2205,7 +2304,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
continue;
/* Check LKEY */
ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- NULL, &wqe->sg_list[i],
+ NULL, rvt_cast_sge(&wqe->sg_list[i]),
IB_ACCESS_LOCAL_WRITE);
if (unlikely(ret <= 0))
goto bad_lkey;
@@ -2234,6 +2333,50 @@ bad_lkey:
}
/**
+ * get_count - count numbers of request work queue entries
+ * in circular buffer
+ * @rq: data structure for request queue entry
+ * @tail: tail indices of the circular buffer
+ * @head: head indices of the circular buffer
+ *
+ * Return - total number of entries in the circular buffer
+ */
+static u32 get_count(struct rvt_rq *rq, u32 tail, u32 head)
+{
+ u32 count;
+
+ count = head;
+
+ if (count >= rq->size)
+ count = 0;
+ if (count < tail)
+ count += rq->size - tail;
+ else
+ count -= tail;
+
+ return count;
+}
+
+/**
+ * get_rvt_head - get head indices of the circular buffer
+ * @rq: data structure for request queue entry
+ * @ip: the QP
+ *
+ * Return - head index value
+ */
+static inline u32 get_rvt_head(struct rvt_rq *rq, void *ip)
+{
+ u32 head;
+
+ if (ip)
+ head = RDMA_READ_UAPI_ATOMIC(rq->wq->head);
+ else
+ head = rq->kwq->head;
+
+ return head;
+}
+
+/**
* rvt_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
* @wr_id_only: update qp->r_wr_id only, not qp->r_sge
@@ -2247,39 +2390,54 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only)
{
unsigned long flags;
struct rvt_rq *rq;
+ struct rvt_krwq *kwq = NULL;
struct rvt_rwq *wq;
struct rvt_srq *srq;
struct rvt_rwqe *wqe;
void (*handler)(struct ib_event *, void *);
u32 tail;
+ u32 head;
int ret;
+ void *ip = NULL;
if (qp->ibqp.srq) {
srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
handler = srq->ibsrq.event_handler;
rq = &srq->rq;
+ ip = srq->ip;
} else {
srq = NULL;
handler = NULL;
rq = &qp->r_rq;
+ ip = qp->ip;
}
- spin_lock_irqsave(&rq->lock, flags);
+ spin_lock_irqsave(&rq->kwq->c_lock, flags);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
ret = 0;
goto unlock;
}
+ kwq = rq->kwq;
+ if (ip) {
+ wq = rq->wq;
+ tail = RDMA_READ_UAPI_ATOMIC(wq->tail);
+ } else {
+ tail = kwq->tail;
+ }
- wq = rq->wq;
- tail = wq->tail;
/* Validate tail before using it since it is user writable. */
if (tail >= rq->size)
tail = 0;
- if (unlikely(tail == wq->head)) {
+
+ if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) {
+ head = get_rvt_head(rq, ip);
+ kwq->count = get_count(rq, tail, head);
+ }
+ if (unlikely(kwq->count == 0)) {
ret = 0;
goto unlock;
}
- /* Make sure entry is read after head index is read. */
+ /* Make sure entry is read after the count is read. */
smp_rmb();
wqe = rvt_get_rwqe_ptr(rq, tail);
/*
@@ -2289,43 +2447,41 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only)
*/
if (++tail >= rq->size)
tail = 0;
- wq->tail = tail;
+ if (ip)
+ RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail);
+ else
+ kwq->tail = tail;
if (!wr_id_only && !init_sge(qp, wqe)) {
ret = -1;
goto unlock;
}
qp->r_wr_id = wqe->wr_id;
+ kwq->count--;
ret = 1;
set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
if (handler) {
- u32 n;
-
/*
* Validate head pointer value and compute
* the number of remaining WQEs.
*/
- n = wq->head;
- if (n >= rq->size)
- n = 0;
- if (n < tail)
- n += rq->size - tail;
- else
- n -= tail;
- if (n < srq->limit) {
- struct ib_event ev;
-
- srq->limit = 0;
- spin_unlock_irqrestore(&rq->lock, flags);
- ev.device = qp->ibqp.device;
- ev.element.srq = qp->ibqp.srq;
- ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- handler(&ev, srq->ibsrq.srq_context);
- goto bail;
+ if (kwq->count < srq->limit) {
+ kwq->count = get_count(rq, tail, get_rvt_head(rq, ip));
+ if (kwq->count < srq->limit) {
+ struct ib_event ev;
+
+ srq->limit = 0;
+ spin_unlock_irqrestore(&rq->kwq->c_lock, flags);
+ ev.device = qp->ibqp.device;
+ ev.element.srq = qp->ibqp.srq;
+ ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ handler(&ev, srq->ibsrq.srq_context);
+ goto bail;
+ }
}
}
unlock:
- spin_unlock_irqrestore(&rq->lock, flags);
+ spin_unlock_irqrestore(&rq->kwq->c_lock, flags);
bail:
return ret;
}
@@ -2667,27 +2823,16 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
- struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ struct rvt_dev_info *rdi;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
+ rdi = ib_to_rvt(qp->ibqp.device);
- last = qp->s_last;
- old_last = last;
- trace_rvt_qp_send_completion(qp, wqe, last);
- if (++last >= qp->s_size)
- last = 0;
- trace_rvt_qp_send_completion(qp, wqe, last);
- qp->s_last = last;
- /* See post_send() */
- barrier();
- rvt_put_qp_swqe(qp, wqe);
-
- rvt_qp_swqe_complete(qp,
- wqe,
- rdi->wc_opcode[wqe->wr.opcode],
- status);
-
+ old_last = qp->s_last;
+ trace_rvt_qp_send_completion(qp, wqe, old_last);
+ last = rvt_qp_complete_swqe(qp, wqe, rdi->wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
if (qp->s_cur == old_last)
@@ -3021,8 +3166,7 @@ do_write:
wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
+ rvt_recv_cq(qp, &wc, wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
spin_unlock_irqrestore(&qp->r_lock, flags);
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 6db1619389b0..2cdba1283bf6 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -68,4 +68,6 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
int rvt_wss_init(struct rvt_dev_info *rdi);
void rvt_wss_exit(struct rvt_dev_info *rdi);
+int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node,
+ struct ib_udata *udata);
#endif /* DEF_RVTQP_H */
diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c
index 09f0cf538be6..890d7b760d2e 100644
--- a/drivers/infiniband/sw/rdmavt/rc.c
+++ b/drivers/infiniband/sw/rdmavt/rc.c
@@ -104,26 +104,33 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp)
} else {
u32 min, max, x;
u32 credits;
- struct rvt_rwq *wq = qp->r_rq.wq;
u32 head;
u32 tail;
- /* sanity check pointers before trusting them */
- head = wq->head;
- if (head >= qp->r_rq.size)
- head = 0;
- tail = wq->tail;
- if (tail >= qp->r_rq.size)
- tail = 0;
- /*
- * Compute the number of credits available (RWQEs).
- * There is a small chance that the pair of reads are
- * not atomic, which is OK, since the fuzziness is
- * resolved as further ACKs go out.
- */
- credits = head - tail;
- if ((int)credits < 0)
- credits += qp->r_rq.size;
+ credits = READ_ONCE(qp->r_rq.kwq->count);
+ if (credits == 0) {
+ /* sanity check pointers before trusting them */
+ if (qp->ip) {
+ head = RDMA_READ_UAPI_ATOMIC(qp->r_rq.wq->head);
+ tail = RDMA_READ_UAPI_ATOMIC(qp->r_rq.wq->tail);
+ } else {
+ head = READ_ONCE(qp->r_rq.kwq->head);
+ tail = READ_ONCE(qp->r_rq.kwq->tail);
+ }
+ if (head >= qp->r_rq.size)
+ head = 0;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ /*
+ * Compute the number of credits available (RWQEs).
+ * There is a small chance that the pair of reads are
+ * not atomic, which is OK, since the fuzziness is
+ * resolved as further ACKs go out.
+ */
+ credits = head - tail;
+ if ((int)credits < 0)
+ credits += qp->r_rq.size;
+ }
/*
* Binary search the credit table to find the code to
* use.
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 8d6b3e764255..24fef021d51d 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -52,7 +52,7 @@
#include "srq.h"
#include "vt.h"
-
+#include "qp.h"
/**
* rvt_driver_srq_init - init srq resources on a per driver basis
* @rdi: rvt dev structure
@@ -97,11 +97,8 @@ int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr,
srq->rq.max_sge = srq_init_attr->attr.max_sge;
sz = sizeof(struct ib_sge) * srq->rq.max_sge +
sizeof(struct rvt_rwqe);
- srq->rq.wq = udata ?
- vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz) :
- vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz,
- dev->dparms.node);
- if (!srq->rq.wq) {
+ if (rvt_alloc_rq(&srq->rq, srq->rq.size * sz,
+ dev->dparms.node, udata)) {
ret = -ENOMEM;
goto bail_srq;
}
@@ -152,7 +149,7 @@ int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr,
bail_ip:
kfree(srq->ip);
bail_wq:
- vfree(srq->rq.wq);
+ rvt_free_rq(&srq->rq);
bail_srq:
return ret;
}
@@ -172,11 +169,12 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
{
struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
- struct rvt_rwq *wq;
+ struct rvt_rq tmp_rq = {};
int ret = 0;
if (attr_mask & IB_SRQ_MAX_WR) {
- struct rvt_rwq *owq;
+ struct rvt_krwq *okwq = NULL;
+ struct rvt_rwq *owq = NULL;
struct rvt_rwqe *p;
u32 sz, size, n, head, tail;
@@ -185,17 +183,12 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
((attr_mask & IB_SRQ_LIMIT) ?
attr->srq_limit : srq->limit) > attr->max_wr)
return -EINVAL;
-
sz = sizeof(struct rvt_rwqe) +
srq->rq.max_sge * sizeof(struct ib_sge);
size = attr->max_wr + 1;
- wq = udata ?
- vmalloc_user(sizeof(struct rvt_rwq) + size * sz) :
- vzalloc_node(sizeof(struct rvt_rwq) + size * sz,
- dev->dparms.node);
- if (!wq)
+ if (rvt_alloc_rq(&tmp_rq, size * sz, dev->dparms.node,
+ udata))
return -ENOMEM;
-
/* Check that we can write the offset to mmap. */
if (udata && udata->inlen >= sizeof(__u64)) {
__u64 offset_addr;
@@ -213,14 +206,20 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
goto bail_free;
}
- spin_lock_irq(&srq->rq.lock);
+ spin_lock_irq(&srq->rq.kwq->c_lock);
/*
* validate head and tail pointer values and compute
* the number of remaining WQEs.
*/
- owq = srq->rq.wq;
- head = owq->head;
- tail = owq->tail;
+ if (udata) {
+ owq = srq->rq.wq;
+ head = RDMA_READ_UAPI_ATOMIC(owq->head);
+ tail = RDMA_READ_UAPI_ATOMIC(owq->tail);
+ } else {
+ okwq = srq->rq.kwq;
+ head = okwq->head;
+ tail = okwq->tail;
+ }
if (head >= srq->rq.size || tail >= srq->rq.size) {
ret = -EINVAL;
goto bail_unlock;
@@ -235,7 +234,7 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
goto bail_unlock;
}
n = 0;
- p = wq->wq;
+ p = tmp_rq.kwq->curr_wq;
while (tail != head) {
struct rvt_rwqe *wqe;
int i;
@@ -250,22 +249,29 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
if (++tail >= srq->rq.size)
tail = 0;
}
- srq->rq.wq = wq;
+ srq->rq.kwq = tmp_rq.kwq;
+ if (udata) {
+ srq->rq.wq = tmp_rq.wq;
+ RDMA_WRITE_UAPI_ATOMIC(tmp_rq.wq->head, n);
+ RDMA_WRITE_UAPI_ATOMIC(tmp_rq.wq->tail, 0);
+ } else {
+ tmp_rq.kwq->head = n;
+ tmp_rq.kwq->tail = 0;
+ }
srq->rq.size = size;
- wq->head = n;
- wq->tail = 0;
if (attr_mask & IB_SRQ_LIMIT)
srq->limit = attr->srq_limit;
- spin_unlock_irq(&srq->rq.lock);
+ spin_unlock_irq(&srq->rq.kwq->c_lock);
vfree(owq);
+ kvfree(okwq);
if (srq->ip) {
struct rvt_mmap_info *ip = srq->ip;
struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device);
u32 s = sizeof(struct rvt_rwq) + size * sz;
- rvt_update_mmap_info(dev, ip, s, wq);
+ rvt_update_mmap_info(dev, ip, s, tmp_rq.wq);
/*
* Return the offset to mmap.
@@ -289,19 +295,19 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
spin_unlock_irq(&dev->pending_lock);
}
} else if (attr_mask & IB_SRQ_LIMIT) {
- spin_lock_irq(&srq->rq.lock);
+ spin_lock_irq(&srq->rq.kwq->c_lock);
if (attr->srq_limit >= srq->rq.size)
ret = -EINVAL;
else
srq->limit = attr->srq_limit;
- spin_unlock_irq(&srq->rq.lock);
+ spin_unlock_irq(&srq->rq.kwq->c_lock);
}
return ret;
bail_unlock:
- spin_unlock_irq(&srq->rq.lock);
+ spin_unlock_irq(&srq->rq.kwq->c_lock);
bail_free:
- vfree(wq);
+ rvt_free_rq(&tmp_rq);
return ret;
}
@@ -336,6 +342,5 @@ void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
spin_unlock(&dev->n_srqs_lock);
if (srq->ip)
kref_put(&srq->ip->ref, rvt_release_mmap_info);
- else
- vfree(srq->rq.wq);
+ kvfree(srq->rq.kwq);
}
diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h
index 976e482930a3..95b8a0e3b8bd 100644
--- a/drivers/infiniband/sw/rdmavt/trace_mr.h
+++ b/drivers/infiniband/sw/rdmavt/trace_mr.h
@@ -54,6 +54,8 @@
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_mr.h>
+#include "mr.h"
+
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt_mr
DECLARE_EVENT_CLASS(
@@ -64,8 +66,12 @@ DECLARE_EVENT_CLASS(
RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device))
__field(void *, vaddr)
__field(struct page *, page)
+ __field(u64, iova)
+ __field(u64, user_base)
__field(size_t, len)
+ __field(size_t, length)
__field(u32, lkey)
+ __field(u32, offset)
__field(u16, m)
__field(u16, n)
),
@@ -73,18 +79,28 @@ DECLARE_EVENT_CLASS(
RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device));
__entry->vaddr = v;
__entry->page = virt_to_page(v);
+ __entry->iova = mr->iova;
+ __entry->user_base = mr->user_base;
+ __entry->lkey = mr->lkey;
__entry->m = m;
__entry->n = n;
__entry->len = len;
+ __entry->length = mr->length;
+ __entry->offset = mr->offset;
),
TP_printk(
- "[%s] vaddr %p page %p m %u n %u len %ld",
+ "[%s] lkey %x iova %llx user_base %llx mr_len %lu vaddr %llx page %p m %u n %u len %lu off %u",
__get_str(dev),
- __entry->vaddr,
+ __entry->lkey,
+ __entry->iova,
+ __entry->user_base,
+ __entry->length,
+ (unsigned long long)__entry->vaddr,
__entry->page,
__entry->m,
__entry->n,
- __entry->len
+ __entry->len,
+ __entry->offset
)
);
@@ -165,6 +181,40 @@ DEFINE_EVENT(
TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
TP_ARGS(sge, isge));
+TRACE_EVENT(
+ rvt_map_mr_sg,
+ TP_PROTO(struct ib_mr *ibmr, int sg_nents, unsigned int *sg_offset),
+ TP_ARGS(ibmr, sg_nents, sg_offset),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(to_imr(ibmr)->mr.pd->device))
+ __field(u64, iova)
+ __field(u64, ibmr_iova)
+ __field(u64, user_base)
+ __field(u64, ibmr_length)
+ __field(int, sg_nents)
+ __field(uint, sg_offset)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(to_imr(ibmr)->mr.pd->device))
+ __entry->ibmr_iova = ibmr->iova;
+ __entry->iova = to_imr(ibmr)->mr.iova;
+ __entry->user_base = to_imr(ibmr)->mr.user_base;
+ __entry->ibmr_length = to_imr(ibmr)->mr.length;
+ __entry->sg_nents = sg_nents;
+ __entry->sg_offset = sg_offset ? *sg_offset : 0;
+ ),
+ TP_printk(
+ "[%s] ibmr_iova %llx iova %llx user_base %llx length %llx sg_nents %d sg_offset %u",
+ __get_str(dev),
+ __entry->ibmr_iova,
+ __entry->iova,
+ __entry->user_base,
+ __entry->ibmr_length,
+ __entry->sg_nents,
+ __entry->sg_offset
+ )
+);
+
#endif /* __RVT_TRACE_MR_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 9546a837a8ac..18da1e1ea979 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -382,6 +382,8 @@ enum {
};
static const struct ib_device_ops rvt_dev_ops = {
+ .uverbs_abi_ver = RVT_UVERBS_ABI_VERSION,
+
.alloc_fmr = rvt_alloc_fmr,
.alloc_mr = rvt_alloc_mr,
.alloc_pd = rvt_alloc_pd,
@@ -427,6 +429,7 @@ static const struct ib_device_ops rvt_dev_ops = {
.unmap_fmr = rvt_unmap_fmr,
INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, rvt_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, rvt_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext),
@@ -530,7 +533,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
*
* Return: 0 on success otherwise an errno.
*/
-int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
+int rvt_register_device(struct rvt_dev_info *rdi)
{
int ret = 0, i;
@@ -600,7 +603,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
* exactly which functions rdmavt supports, nor do they know the ABI
* version, so we do all of this sort of stuff here.
*/
- rdi->ibdev.uverbs_abi_ver = RVT_UVERBS_ABI_VERSION;
rdi->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -636,7 +638,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
if (!rdi->ibdev.num_comp_vectors)
rdi->ibdev.num_comp_vectors = 1;
- rdi->ibdev.driver_id = driver_id;
/* We are now good to announce we exist */
ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev));
if (ret) {
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
index 0675ea6c3872..d19ff817c2c7 100644
--- a/drivers/infiniband/sw/rdmavt/vt.h
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -78,6 +78,12 @@
fmt, \
##__VA_ARGS__)
+#define rvt_pr_err_ratelimited(rdi, fmt, ...) \
+ __rvt_pr_err_ratelimited((rdi)->driver_f.get_pci_dev(rdi), \
+ rvt_get_ibdev_name(rdi), \
+ fmt, \
+ ##__VA_ARGS__)
+
#define __rvt_pr_info(pdev, name, fmt, ...) \
dev_info(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
@@ -87,6 +93,9 @@
#define __rvt_pr_err(pdev, name, fmt, ...) \
dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+#define __rvt_pr_err_ratelimited(pdev, name, fmt, ...) \
+ dev_err_ratelimited(&(pdev)->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num)
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 00eb99d3df86..116cafc9afcf 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -558,7 +558,7 @@ int rxe_completer(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- struct rxe_send_wqe *wqe = wqe;
+ struct rxe_send_wqe *wqe = NULL;
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index f501f72489d8..ea6a819b7167 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -96,8 +96,7 @@ void rxe_mem_cleanup(struct rxe_pool_entry *arg)
struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem);
int i;
- if (mem->umem)
- ib_umem_release(mem->umem);
+ ib_umem_release(mem->umem);
if (mem->map) {
for (i = 0; i < mem->num_map; i++)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 56cf18af016a..fbcbac52290b 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -72,6 +72,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_CQ] = {
.name = "rxe-cq",
.size = sizeof(struct rxe_cq),
+ .flags = RXE_POOL_NO_ALLOC,
.cleanup = rxe_cq_cleanup,
},
[RXE_TYPE_MR] = {
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index aca9f60f9b21..1cbfbd98eb22 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -431,6 +431,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
qp->resp.va = reth_va(pkt);
qp->resp.rkey = reth_rkey(pkt);
qp->resp.resid = reth_len(pkt);
+ qp->resp.length = reth_len(pkt);
}
access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
: IB_ACCESS_REMOTE_WRITE;
@@ -856,7 +857,9 @@ static enum resp_states do_complete(struct rxe_qp *qp,
pkt->mask & RXE_WRITE_MASK) ?
IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
wc->vendor_err = 0;
- wc->byte_len = wqe->dma.length - wqe->dma.resid;
+ wc->byte_len = (pkt->mask & RXE_IMMDT_MASK &&
+ pkt->mask & RXE_WRITE_MASK) ?
+ qp->resp.length : wqe->dma.length - wqe->dma.resid;
/* fields after byte_len are different between kernel and user
* space
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 8c3e2a18cfe4..4ebdfcf4d33e 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -778,55 +778,43 @@ err1:
return err;
}
-static struct ib_cq *rxe_create_cq(struct ib_device *dev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
int err;
+ struct ib_device *dev = ibcq->device;
struct rxe_dev *rxe = to_rdev(dev);
- struct rxe_cq *cq;
+ struct rxe_cq *cq = to_rcq(ibcq);
struct rxe_create_cq_resp __user *uresp = NULL;
if (udata) {
if (udata->outlen < sizeof(*uresp))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
uresp = udata->outbuf;
}
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
if (err)
- goto err1;
-
- cq = rxe_alloc(&rxe->cq_pool);
- if (!cq) {
- err = -ENOMEM;
- goto err1;
- }
+ return err;
err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
uresp);
if (err)
- goto err2;
-
- return &cq->ibcq;
+ return err;
-err2:
- rxe_drop_ref(cq);
-err1:
- return ERR_PTR(err);
+ return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem);
}
-static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+static void rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rxe_cq *cq = to_rcq(ibcq);
rxe_cq_disable(cq);
rxe_drop_ref(cq);
- return 0;
}
static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
@@ -1111,6 +1099,10 @@ static int rxe_enable_driver(struct ib_device *ib_dev)
}
static const struct ib_device_ops rxe_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_RXE,
+ .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
+
.alloc_hw_stats = rxe_ib_alloc_hw_stats,
.alloc_mr = rxe_alloc_mr,
.alloc_pd = rxe_alloc_pd,
@@ -1157,6 +1149,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.resize_cq = rxe_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
@@ -1170,7 +1163,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
- dev->owner = THIS_MODULE;
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
dev->num_comp_vectors = num_possible_cpus();
@@ -1182,7 +1174,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
dma_coerce_mask_and_coherent(&dev->dev,
dma_get_required_mask(&dev->dev));
- dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
| BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
| BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)
@@ -1230,7 +1221,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
rxe->tfm = tfm;
rdma_set_device_sysfs_group(dev, &rxe_attr_group);
- dev->driver_id = RDMA_DRIVER_RXE;
err = ib_register_device(dev, ibdev_name);
if (err)
pr_warn("%s failed with error %d\n", __func__, err);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index e8be7f44e3be..5c4b2239129c 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -85,8 +85,8 @@ struct rxe_cqe {
};
struct rxe_cq {
- struct rxe_pool_entry pelem;
struct ib_cq ibcq;
+ struct rxe_pool_entry pelem;
struct rxe_queue *queue;
spinlock_t cq_lock;
u8 notify;
@@ -213,6 +213,7 @@ struct rxe_resp_info {
struct rxe_mem *mr;
u32 resid;
u32 rkey;
+ u32 length;
u64 atomic_orig;
/* SRQ only */
diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig
new file mode 100644
index 000000000000..dace276aea14
--- /dev/null
+++ b/drivers/infiniband/sw/siw/Kconfig
@@ -0,0 +1,18 @@
+config RDMA_SIW
+ tristate "Software RDMA over TCP/IP (iWARP) driver"
+ depends on INET && INFINIBAND && LIBCRC32C && 64BIT
+ select DMA_VIRT_OPS
+ help
+ This driver implements the iWARP RDMA transport over
+ the Linux TCP/IP network stack. It enables a system with a
+ standard Ethernet adapter to interoperate with a iWARP
+ adapter or with another system running the SIW driver.
+ (See also RXE which is a similar software driver for RoCE.)
+
+ The driver interfaces with the Linux RDMA stack and
+ implements both a kernel and user space RDMA verbs API.
+ The user space verbs API requires a support
+ library named libsiw which is loaded by the generic user
+ space verbs API, libibverbs. To implement RDMA over
+ TCP/IP, the driver further interfaces with the Linux
+ in-kernel TCP socket layer.
diff --git a/drivers/infiniband/sw/siw/Makefile b/drivers/infiniband/sw/siw/Makefile
new file mode 100644
index 000000000000..f5f7e3867889
--- /dev/null
+++ b/drivers/infiniband/sw/siw/Makefile
@@ -0,0 +1,11 @@
+obj-$(CONFIG_RDMA_SIW) += siw.o
+
+siw-y := \
+ siw_cm.o \
+ siw_cq.o \
+ siw_main.o \
+ siw_mem.o \
+ siw_qp.o \
+ siw_qp_tx.o \
+ siw_qp_rx.o \
+ siw_verbs.o
diff --git a/drivers/infiniband/sw/siw/iwarp.h b/drivers/infiniband/sw/siw/iwarp.h
new file mode 100644
index 000000000000..e8a04d9c89cb
--- /dev/null
+++ b/drivers/infiniband/sw/siw/iwarp.h
@@ -0,0 +1,380 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#ifndef _IWARP_H
+#define _IWARP_H
+
+#include <rdma/rdma_user_cm.h> /* RDMA_MAX_PRIVATE_DATA */
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#define RDMAP_VERSION 1
+#define DDP_VERSION 1
+#define MPA_REVISION_1 1
+#define MPA_REVISION_2 2
+#define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+#define MPA_IRD_ORD_MASK 0x3fff
+
+struct mpa_rr_params {
+ __be16 bits;
+ __be16 pd_len;
+};
+
+/*
+ * MPA request/response header bits & fields
+ */
+enum {
+ MPA_RR_FLAG_MARKERS = cpu_to_be16(0x8000),
+ MPA_RR_FLAG_CRC = cpu_to_be16(0x4000),
+ MPA_RR_FLAG_REJECT = cpu_to_be16(0x2000),
+ MPA_RR_FLAG_ENHANCED = cpu_to_be16(0x1000),
+ MPA_RR_FLAG_GSO_EXP = cpu_to_be16(0x0800),
+ MPA_RR_MASK_REVISION = cpu_to_be16(0x00ff)
+};
+
+/*
+ * MPA request/reply header
+ */
+struct mpa_rr {
+ __u8 key[16];
+ struct mpa_rr_params params;
+};
+
+static inline void __mpa_rr_set_revision(__be16 *bits, u8 rev)
+{
+ *bits = (*bits & ~MPA_RR_MASK_REVISION) |
+ (cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
+}
+
+static inline u8 __mpa_rr_revision(__be16 mpa_rr_bits)
+{
+ __be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
+
+ return be16_to_cpu(rev);
+}
+
+enum mpa_v2_ctrl {
+ MPA_V2_PEER_TO_PEER = cpu_to_be16(0x8000),
+ MPA_V2_ZERO_LENGTH_RTR = cpu_to_be16(0x4000),
+ MPA_V2_RDMA_WRITE_RTR = cpu_to_be16(0x8000),
+ MPA_V2_RDMA_READ_RTR = cpu_to_be16(0x4000),
+ MPA_V2_RDMA_NO_RTR = cpu_to_be16(0x0000),
+ MPA_V2_MASK_IRD_ORD = cpu_to_be16(0x3fff)
+};
+
+struct mpa_v2_data {
+ __be16 ird;
+ __be16 ord;
+};
+
+struct mpa_marker {
+ __be16 rsvd;
+ __be16 fpdu_hmd; /* FPDU header-marker distance (= MPA's FPDUPTR) */
+};
+
+/*
+ * maximum MPA trailer
+ */
+struct mpa_trailer {
+ __u8 pad[4];
+ __be32 crc;
+};
+
+#define MPA_HDR_SIZE 2
+#define MPA_CRC_SIZE 4
+
+/*
+ * Common portion of iWARP headers (MPA, DDP, RDMAP)
+ * for any FPDU
+ */
+struct iwarp_ctrl {
+ __be16 mpa_len;
+ __be16 ddp_rdmap_ctrl;
+};
+
+/*
+ * DDP/RDMAP Hdr bits & fields
+ */
+enum {
+ DDP_FLAG_TAGGED = cpu_to_be16(0x8000),
+ DDP_FLAG_LAST = cpu_to_be16(0x4000),
+ DDP_MASK_RESERVED = cpu_to_be16(0x3C00),
+ DDP_MASK_VERSION = cpu_to_be16(0x0300),
+ RDMAP_MASK_VERSION = cpu_to_be16(0x00C0),
+ RDMAP_MASK_RESERVED = cpu_to_be16(0x0030),
+ RDMAP_MASK_OPCODE = cpu_to_be16(0x000f)
+};
+
+static inline u8 __ddp_get_version(struct iwarp_ctrl *ctrl)
+{
+ return be16_to_cpu(ctrl->ddp_rdmap_ctrl & DDP_MASK_VERSION) >> 8;
+}
+
+static inline void __ddp_set_version(struct iwarp_ctrl *ctrl, u8 version)
+{
+ ctrl->ddp_rdmap_ctrl =
+ (ctrl->ddp_rdmap_ctrl & ~DDP_MASK_VERSION) |
+ (cpu_to_be16((u16)version << 8) & DDP_MASK_VERSION);
+}
+
+static inline u8 __rdmap_get_version(struct iwarp_ctrl *ctrl)
+{
+ __be16 ver = ctrl->ddp_rdmap_ctrl & RDMAP_MASK_VERSION;
+
+ return be16_to_cpu(ver) >> 6;
+}
+
+static inline void __rdmap_set_version(struct iwarp_ctrl *ctrl, u8 version)
+{
+ ctrl->ddp_rdmap_ctrl = (ctrl->ddp_rdmap_ctrl & ~RDMAP_MASK_VERSION) |
+ (cpu_to_be16(version << 6) & RDMAP_MASK_VERSION);
+}
+
+static inline u8 __rdmap_get_opcode(struct iwarp_ctrl *ctrl)
+{
+ return be16_to_cpu(ctrl->ddp_rdmap_ctrl & RDMAP_MASK_OPCODE);
+}
+
+static inline void __rdmap_set_opcode(struct iwarp_ctrl *ctrl, u8 opcode)
+{
+ ctrl->ddp_rdmap_ctrl = (ctrl->ddp_rdmap_ctrl & ~RDMAP_MASK_OPCODE) |
+ (cpu_to_be16(opcode) & RDMAP_MASK_OPCODE);
+}
+
+struct iwarp_rdma_write {
+ struct iwarp_ctrl ctrl;
+ __be32 sink_stag;
+ __be64 sink_to;
+};
+
+struct iwarp_rdma_rreq {
+ struct iwarp_ctrl ctrl;
+ __be32 rsvd;
+ __be32 ddp_qn;
+ __be32 ddp_msn;
+ __be32 ddp_mo;
+ __be32 sink_stag;
+ __be64 sink_to;
+ __be32 read_size;
+ __be32 source_stag;
+ __be64 source_to;
+};
+
+struct iwarp_rdma_rresp {
+ struct iwarp_ctrl ctrl;
+ __be32 sink_stag;
+ __be64 sink_to;
+};
+
+struct iwarp_send {
+ struct iwarp_ctrl ctrl;
+ __be32 rsvd;
+ __be32 ddp_qn;
+ __be32 ddp_msn;
+ __be32 ddp_mo;
+};
+
+struct iwarp_send_inv {
+ struct iwarp_ctrl ctrl;
+ __be32 inval_stag;
+ __be32 ddp_qn;
+ __be32 ddp_msn;
+ __be32 ddp_mo;
+};
+
+struct iwarp_terminate {
+ struct iwarp_ctrl ctrl;
+ __be32 rsvd;
+ __be32 ddp_qn;
+ __be32 ddp_msn;
+ __be32 ddp_mo;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __be32 layer : 4;
+ __be32 etype : 4;
+ __be32 ecode : 8;
+ __be32 flag_m : 1;
+ __be32 flag_d : 1;
+ __be32 flag_r : 1;
+ __be32 reserved : 13;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __be32 reserved : 13;
+ __be32 flag_r : 1;
+ __be32 flag_d : 1;
+ __be32 flag_m : 1;
+ __be32 ecode : 8;
+ __be32 etype : 4;
+ __be32 layer : 4;
+#else
+#error "undefined byte order"
+#endif
+};
+
+/*
+ * Terminate Hdr bits & fields
+ */
+enum {
+ TERM_MASK_LAYER = cpu_to_be32(0xf0000000),
+ TERM_MASK_ETYPE = cpu_to_be32(0x0f000000),
+ TERM_MASK_ECODE = cpu_to_be32(0x00ff0000),
+ TERM_FLAG_M = cpu_to_be32(0x00008000),
+ TERM_FLAG_D = cpu_to_be32(0x00004000),
+ TERM_FLAG_R = cpu_to_be32(0x00002000),
+ TERM_MASK_RESVD = cpu_to_be32(0x00001fff)
+};
+
+static inline u8 __rdmap_term_layer(struct iwarp_terminate *term)
+{
+ return term->layer;
+}
+
+static inline void __rdmap_term_set_layer(struct iwarp_terminate *term,
+ u8 layer)
+{
+ term->layer = layer & 0xf;
+}
+
+static inline u8 __rdmap_term_etype(struct iwarp_terminate *term)
+{
+ return term->etype;
+}
+
+static inline void __rdmap_term_set_etype(struct iwarp_terminate *term,
+ u8 etype)
+{
+ term->etype = etype & 0xf;
+}
+
+static inline u8 __rdmap_term_ecode(struct iwarp_terminate *term)
+{
+ return term->ecode;
+}
+
+static inline void __rdmap_term_set_ecode(struct iwarp_terminate *term,
+ u8 ecode)
+{
+ term->ecode = ecode;
+}
+
+/*
+ * Common portion of iWARP headers (MPA, DDP, RDMAP)
+ * for an FPDU carrying an untagged DDP segment
+ */
+struct iwarp_ctrl_untagged {
+ struct iwarp_ctrl ctrl;
+ __be32 rsvd;
+ __be32 ddp_qn;
+ __be32 ddp_msn;
+ __be32 ddp_mo;
+};
+
+/*
+ * Common portion of iWARP headers (MPA, DDP, RDMAP)
+ * for an FPDU carrying a tagged DDP segment
+ */
+struct iwarp_ctrl_tagged {
+ struct iwarp_ctrl ctrl;
+ __be32 ddp_stag;
+ __be64 ddp_to;
+};
+
+union iwarp_hdr {
+ struct iwarp_ctrl ctrl;
+ struct iwarp_ctrl_untagged c_untagged;
+ struct iwarp_ctrl_tagged c_tagged;
+ struct iwarp_rdma_write rwrite;
+ struct iwarp_rdma_rreq rreq;
+ struct iwarp_rdma_rresp rresp;
+ struct iwarp_terminate terminate;
+ struct iwarp_send send;
+ struct iwarp_send_inv send_inv;
+};
+
+enum term_elayer {
+ TERM_ERROR_LAYER_RDMAP = 0x00,
+ TERM_ERROR_LAYER_DDP = 0x01,
+ TERM_ERROR_LAYER_LLP = 0x02 /* eg., MPA */
+};
+
+enum ddp_etype {
+ DDP_ETYPE_CATASTROPHIC = 0x0,
+ DDP_ETYPE_TAGGED_BUF = 0x1,
+ DDP_ETYPE_UNTAGGED_BUF = 0x2,
+ DDP_ETYPE_RSVD = 0x3
+};
+
+enum ddp_ecode {
+ /* unspecified, set to zero */
+ DDP_ECODE_CATASTROPHIC = 0x00,
+ /* Tagged Buffer Errors */
+ DDP_ECODE_T_INVALID_STAG = 0x00,
+ DDP_ECODE_T_BASE_BOUNDS = 0x01,
+ DDP_ECODE_T_STAG_NOT_ASSOC = 0x02,
+ DDP_ECODE_T_TO_WRAP = 0x03,
+ DDP_ECODE_T_VERSION = 0x04,
+ /* Untagged Buffer Errors */
+ DDP_ECODE_UT_INVALID_QN = 0x01,
+ DDP_ECODE_UT_INVALID_MSN_NOBUF = 0x02,
+ DDP_ECODE_UT_INVALID_MSN_RANGE = 0x03,
+ DDP_ECODE_UT_INVALID_MO = 0x04,
+ DDP_ECODE_UT_MSG_TOOLONG = 0x05,
+ DDP_ECODE_UT_VERSION = 0x06
+};
+
+enum rdmap_untagged_qn {
+ RDMAP_UNTAGGED_QN_SEND = 0,
+ RDMAP_UNTAGGED_QN_RDMA_READ = 1,
+ RDMAP_UNTAGGED_QN_TERMINATE = 2,
+ RDMAP_UNTAGGED_QN_COUNT = 3
+};
+
+enum rdmap_etype {
+ RDMAP_ETYPE_CATASTROPHIC = 0x0,
+ RDMAP_ETYPE_REMOTE_PROTECTION = 0x1,
+ RDMAP_ETYPE_REMOTE_OPERATION = 0x2
+};
+
+enum rdmap_ecode {
+ RDMAP_ECODE_INVALID_STAG = 0x00,
+ RDMAP_ECODE_BASE_BOUNDS = 0x01,
+ RDMAP_ECODE_ACCESS_RIGHTS = 0x02,
+ RDMAP_ECODE_STAG_NOT_ASSOC = 0x03,
+ RDMAP_ECODE_TO_WRAP = 0x04,
+ RDMAP_ECODE_VERSION = 0x05,
+ RDMAP_ECODE_OPCODE = 0x06,
+ RDMAP_ECODE_CATASTROPHIC_STREAM = 0x07,
+ RDMAP_ECODE_CATASTROPHIC_GLOBAL = 0x08,
+ RDMAP_ECODE_CANNOT_INVALIDATE = 0x09,
+ RDMAP_ECODE_UNSPECIFIED = 0xff
+};
+
+enum llp_ecode {
+ LLP_ECODE_TCP_STREAM_LOST = 0x01, /* How to transfer this ?? */
+ LLP_ECODE_RECEIVED_CRC = 0x02,
+ LLP_ECODE_FPDU_START = 0x03,
+ LLP_ECODE_INVALID_REQ_RESP = 0x04,
+
+ /* Errors for Enhanced Connection Establishment only */
+ LLP_ECODE_LOCAL_CATASTROPHIC = 0x05,
+ LLP_ECODE_INSUFFICIENT_IRD = 0x06,
+ LLP_ECODE_NO_MATCHING_RTR = 0x07
+};
+
+enum llp_etype { LLP_ETYPE_MPA = 0x00 };
+
+enum rdma_opcode {
+ RDMAP_RDMA_WRITE = 0x0,
+ RDMAP_RDMA_READ_REQ = 0x1,
+ RDMAP_RDMA_READ_RESP = 0x2,
+ RDMAP_SEND = 0x3,
+ RDMAP_SEND_INVAL = 0x4,
+ RDMAP_SEND_SE = 0x5,
+ RDMAP_SEND_SE_INVAL = 0x6,
+ RDMAP_TERMINATE = 0x7,
+ RDMAP_NOT_SUPPORTED = RDMAP_TERMINATE + 1
+};
+
+#endif
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
new file mode 100644
index 000000000000..03fd7b2f595f
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -0,0 +1,745 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#ifndef _SIW_H
+#define _SIW_H
+
+#include <rdma/ib_verbs.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <crypto/hash.h>
+#include <linux/crc32.h>
+#include <linux/crc32c.h>
+
+#include <rdma/siw-abi.h>
+#include "iwarp.h"
+
+#define SIW_VENDOR_ID 0x626d74 /* ascii 'bmt' for now */
+#define SIW_VENDORT_PART_ID 0
+#define SIW_MAX_QP (1024 * 100)
+#define SIW_MAX_QP_WR (1024 * 32)
+#define SIW_MAX_ORD_QP 128
+#define SIW_MAX_IRD_QP 128
+#define SIW_MAX_SGE_PBL 256 /* max num sge's for PBL */
+#define SIW_MAX_SGE_RD 1 /* iwarp limitation. we could relax */
+#define SIW_MAX_CQ (1024 * 100)
+#define SIW_MAX_CQE (SIW_MAX_QP_WR * 100)
+#define SIW_MAX_MR (SIW_MAX_QP * 10)
+#define SIW_MAX_PD SIW_MAX_QP
+#define SIW_MAX_MW 0 /* to be set if MW's are supported */
+#define SIW_MAX_FMR SIW_MAX_MR
+#define SIW_MAX_SRQ SIW_MAX_QP
+#define SIW_MAX_SRQ_WR (SIW_MAX_QP_WR * 10)
+#define SIW_MAX_CONTEXT SIW_MAX_PD
+
+/* Min number of bytes for using zero copy transmit */
+#define SENDPAGE_THRESH PAGE_SIZE
+
+/* Maximum number of frames which can be send in one SQ processing */
+#define SQ_USER_MAXBURST 100
+
+/* Maximum number of consecutive IRQ elements which get served
+ * if SQ has pending work. Prevents starving local SQ processing
+ * by serving peer Read Requests.
+ */
+#define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
+
+struct siw_dev_cap {
+ int max_qp;
+ int max_qp_wr;
+ int max_ord; /* max. outbound read queue depth */
+ int max_ird; /* max. inbound read queue depth */
+ int max_sge;
+ int max_sge_rd;
+ int max_cq;
+ int max_cqe;
+ int max_mr;
+ int max_pd;
+ int max_mw;
+ int max_fmr;
+ int max_srq;
+ int max_srq_wr;
+ int max_srq_sge;
+};
+
+struct siw_pd {
+ struct ib_pd base_pd;
+};
+
+struct siw_device {
+ struct ib_device base_dev;
+ struct net_device *netdev;
+ struct siw_dev_cap attrs;
+
+ u32 vendor_part_id;
+ int numa_node;
+
+ /* physical port state (only one port per device) */
+ enum ib_port_state state;
+
+ spinlock_t lock;
+
+ struct xarray qp_xa;
+ struct xarray mem_xa;
+
+ struct list_head cep_list;
+ struct list_head qp_list;
+
+ /* active objects statistics to enforce limits */
+ atomic_t num_qp;
+ atomic_t num_cq;
+ atomic_t num_pd;
+ atomic_t num_mr;
+ atomic_t num_srq;
+ atomic_t num_ctx;
+
+ struct work_struct netdev_down;
+};
+
+struct siw_uobj {
+ void *addr;
+ u32 size;
+};
+
+struct siw_ucontext {
+ struct ib_ucontext base_ucontext;
+ struct siw_device *sdev;
+
+ /* xarray of user mappable objects */
+ struct xarray xa;
+ u32 uobj_nextkey;
+};
+
+/*
+ * The RDMA core does not define LOCAL_READ access, which is always
+ * enabled implictely.
+ */
+#define IWARP_ACCESS_MASK \
+ (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | \
+ IB_ACCESS_REMOTE_READ)
+
+/*
+ * siw presentation of user memory registered as source
+ * or target of RDMA operations.
+ */
+
+struct siw_page_chunk {
+ struct page **plist;
+};
+
+struct siw_umem {
+ struct siw_page_chunk *page_chunk;
+ int num_pages;
+ bool writable;
+ u64 fp_addr; /* First page base address */
+ struct mm_struct *owning_mm;
+};
+
+struct siw_pble {
+ u64 addr; /* Address of assigned user buffer */
+ u64 size; /* Size of this entry */
+ u64 pbl_off; /* Total offset from start of PBL */
+};
+
+struct siw_pbl {
+ unsigned int num_buf;
+ unsigned int max_buf;
+ struct siw_pble pbe[1];
+};
+
+struct siw_mr;
+
+/*
+ * Generic memory representation for registered siw memory.
+ * Memory lookup always via higher 24 bit of STag (STag index).
+ */
+struct siw_mem {
+ struct siw_device *sdev;
+ struct kref ref;
+ u64 va; /* VA of memory */
+ u64 len; /* length of the memory buffer in bytes */
+ u32 stag; /* iWarp memory access steering tag */
+ u8 stag_valid; /* VALID or INVALID */
+ u8 is_pbl; /* PBL or user space mem */
+ u8 is_mw; /* Memory Region or Memory Window */
+ enum ib_access_flags perms; /* local/remote READ & WRITE */
+ union {
+ struct siw_umem *umem;
+ struct siw_pbl *pbl;
+ void *mem_obj;
+ };
+ struct ib_pd *pd;
+};
+
+struct siw_mr {
+ struct ib_mr base_mr;
+ struct siw_mem *mem;
+ struct rcu_head rcu;
+};
+
+/*
+ * Error codes for local or remote
+ * access to registered memory
+ */
+enum siw_access_state {
+ E_ACCESS_OK,
+ E_STAG_INVALID,
+ E_BASE_BOUNDS,
+ E_ACCESS_PERM,
+ E_PD_MISMATCH
+};
+
+enum siw_wr_state {
+ SIW_WR_IDLE,
+ SIW_WR_QUEUED, /* processing has not started yet */
+ SIW_WR_INPROGRESS /* initiated processing of the WR */
+};
+
+/* The WQE currently being processed (RX or TX) */
+struct siw_wqe {
+ /* Copy of applications SQE or RQE */
+ union {
+ struct siw_sqe sqe;
+ struct siw_rqe rqe;
+ };
+ struct siw_mem *mem[SIW_MAX_SGE]; /* per sge's resolved mem */
+ enum siw_wr_state wr_status;
+ enum siw_wc_status wc_status;
+ u32 bytes; /* total bytes to process */
+ u32 processed; /* bytes processed */
+};
+
+struct siw_cq {
+ struct ib_cq base_cq;
+ spinlock_t lock;
+ u64 *notify;
+ struct siw_cqe *queue;
+ u32 cq_put;
+ u32 cq_get;
+ u32 num_cqe;
+ bool kernel_verbs;
+ u32 xa_cq_index; /* mmap information for CQE array */
+ u32 id; /* For debugging only */
+};
+
+enum siw_qp_state {
+ SIW_QP_STATE_IDLE,
+ SIW_QP_STATE_RTR,
+ SIW_QP_STATE_RTS,
+ SIW_QP_STATE_CLOSING,
+ SIW_QP_STATE_TERMINATE,
+ SIW_QP_STATE_ERROR,
+ SIW_QP_STATE_COUNT
+};
+
+enum siw_qp_flags {
+ SIW_RDMA_BIND_ENABLED = (1 << 0),
+ SIW_RDMA_WRITE_ENABLED = (1 << 1),
+ SIW_RDMA_READ_ENABLED = (1 << 2),
+ SIW_SIGNAL_ALL_WR = (1 << 3),
+ SIW_MPA_CRC = (1 << 4),
+ SIW_QP_IN_DESTROY = (1 << 5)
+};
+
+enum siw_qp_attr_mask {
+ SIW_QP_ATTR_STATE = (1 << 0),
+ SIW_QP_ATTR_ACCESS_FLAGS = (1 << 1),
+ SIW_QP_ATTR_LLP_HANDLE = (1 << 2),
+ SIW_QP_ATTR_ORD = (1 << 3),
+ SIW_QP_ATTR_IRD = (1 << 4),
+ SIW_QP_ATTR_SQ_SIZE = (1 << 5),
+ SIW_QP_ATTR_RQ_SIZE = (1 << 6),
+ SIW_QP_ATTR_MPA = (1 << 7)
+};
+
+struct siw_srq {
+ struct ib_srq base_srq;
+ spinlock_t lock;
+ u32 max_sge;
+ u32 limit; /* low watermark for async event */
+ struct siw_rqe *recvq;
+ u32 rq_put;
+ u32 rq_get;
+ u32 num_rqe; /* max # of wqe's allowed */
+ u32 xa_srq_index; /* mmap information for SRQ array */
+ char armed; /* inform user if limit hit */
+ char kernel_verbs; /* '1' if kernel client */
+};
+
+struct siw_qp_attrs {
+ enum siw_qp_state state;
+ u32 sq_size;
+ u32 rq_size;
+ u32 orq_size;
+ u32 irq_size;
+ u32 sq_max_sges;
+ u32 rq_max_sges;
+ enum siw_qp_flags flags;
+
+ struct socket *sk;
+};
+
+enum siw_tx_ctx {
+ SIW_SEND_HDR, /* start or continue sending HDR */
+ SIW_SEND_DATA, /* start or continue sending DDP payload */
+ SIW_SEND_TRAILER, /* start or continue sending TRAILER */
+ SIW_SEND_SHORT_FPDU/* send whole FPDU hdr|data|trailer at once */
+};
+
+enum siw_rx_state {
+ SIW_GET_HDR, /* await new hdr or within hdr */
+ SIW_GET_DATA_START, /* start of inbound DDP payload */
+ SIW_GET_DATA_MORE, /* continuation of (misaligned) DDP payload */
+ SIW_GET_TRAILER/* await new trailer or within trailer */
+};
+
+struct siw_rx_stream {
+ struct sk_buff *skb;
+ int skb_new; /* pending unread bytes in skb */
+ int skb_offset; /* offset in skb */
+ int skb_copied; /* processed bytes in skb */
+
+ union iwarp_hdr hdr;
+ struct mpa_trailer trailer;
+
+ enum siw_rx_state state;
+
+ /*
+ * For each FPDU, main RX loop runs through 3 stages:
+ * Receiving protocol headers, placing DDP payload and receiving
+ * trailer information (CRC + possibly padding).
+ * Next two variables keep state on receive status of the
+ * current FPDU part (hdr, data, trailer).
+ */
+ int fpdu_part_rcvd; /* bytes in pkt part copied */
+ int fpdu_part_rem; /* bytes in pkt part not seen */
+
+ /*
+ * Next expected DDP MSN for each QN +
+ * expected steering tag +
+ * expected DDP tagget offset (all HBO)
+ */
+ u32 ddp_msn[RDMAP_UNTAGGED_QN_COUNT];
+ u32 ddp_stag;
+ u64 ddp_to;
+ u32 inval_stag; /* Stag to be invalidated */
+
+ struct shash_desc *mpa_crc_hd;
+ u8 rx_suspend : 1;
+ u8 pad : 2; /* # of pad bytes expected */
+ u8 rdmap_op : 4; /* opcode of current frame */
+};
+
+struct siw_rx_fpdu {
+ /*
+ * Local destination memory of inbound RDMA operation.
+ * Valid, according to wqe->wr_status
+ */
+ struct siw_wqe wqe_active;
+
+ unsigned int pbl_idx; /* Index into current PBL */
+ unsigned int sge_idx; /* current sge in rx */
+ unsigned int sge_off; /* already rcvd in curr. sge */
+
+ char first_ddp_seg; /* this is the first DDP seg */
+ char more_ddp_segs; /* more DDP segs expected */
+ u8 prev_rdmap_op : 4; /* opcode of prev frame */
+};
+
+/*
+ * Shorthands for short packets w/o payload
+ * to be transmitted more efficient.
+ */
+struct siw_send_pkt {
+ struct iwarp_send send;
+ __be32 crc;
+};
+
+struct siw_write_pkt {
+ struct iwarp_rdma_write write;
+ __be32 crc;
+};
+
+struct siw_rreq_pkt {
+ struct iwarp_rdma_rreq rreq;
+ __be32 crc;
+};
+
+struct siw_rresp_pkt {
+ struct iwarp_rdma_rresp rresp;
+ __be32 crc;
+};
+
+struct siw_iwarp_tx {
+ union {
+ union iwarp_hdr hdr;
+
+ /* Generic part of FPDU header */
+ struct iwarp_ctrl ctrl;
+ struct iwarp_ctrl_untagged c_untagged;
+ struct iwarp_ctrl_tagged c_tagged;
+
+ /* FPDU headers */
+ struct iwarp_rdma_write rwrite;
+ struct iwarp_rdma_rreq rreq;
+ struct iwarp_rdma_rresp rresp;
+ struct iwarp_terminate terminate;
+ struct iwarp_send send;
+ struct iwarp_send_inv send_inv;
+
+ /* complete short FPDUs */
+ struct siw_send_pkt send_pkt;
+ struct siw_write_pkt write_pkt;
+ struct siw_rreq_pkt rreq_pkt;
+ struct siw_rresp_pkt rresp_pkt;
+ } pkt;
+
+ struct mpa_trailer trailer;
+ /* DDP MSN for untagged messages */
+ u32 ddp_msn[RDMAP_UNTAGGED_QN_COUNT];
+
+ enum siw_tx_ctx state;
+ u16 ctrl_len; /* ddp+rdmap hdr */
+ u16 ctrl_sent;
+ int burst;
+ int bytes_unsent; /* ddp payload bytes */
+
+ struct shash_desc *mpa_crc_hd;
+
+ u8 do_crc : 1; /* do crc for segment */
+ u8 use_sendpage : 1; /* send w/o copy */
+ u8 tx_suspend : 1; /* stop sending DDP segs. */
+ u8 pad : 2; /* # pad in current fpdu */
+ u8 orq_fence : 1; /* ORQ full or Send fenced */
+ u8 in_syscall : 1; /* TX out of user context */
+ u8 zcopy_tx : 1; /* Use TCP_SENDPAGE if possible */
+ u8 gso_seg_limit; /* Maximum segments for GSO, 0 = unbound */
+
+ u16 fpdu_len; /* len of FPDU to tx */
+ unsigned int tcp_seglen; /* remaining tcp seg space */
+
+ struct siw_wqe wqe_active;
+
+ int pbl_idx; /* Index into current PBL */
+ int sge_idx; /* current sge in tx */
+ u32 sge_off; /* already sent in curr. sge */
+};
+
+struct siw_qp {
+ struct siw_device *sdev;
+ struct ib_qp *ib_qp;
+ struct kref ref;
+ u32 qp_num;
+ struct list_head devq;
+ int tx_cpu;
+ bool kernel_verbs;
+ struct siw_qp_attrs attrs;
+
+ struct siw_cep *cep;
+ struct rw_semaphore state_lock;
+
+ struct ib_pd *pd;
+ struct siw_cq *scq;
+ struct siw_cq *rcq;
+ struct siw_srq *srq;
+
+ struct siw_iwarp_tx tx_ctx; /* Transmit context */
+ spinlock_t sq_lock;
+ struct siw_sqe *sendq; /* send queue element array */
+ uint32_t sq_get; /* consumer index into sq array */
+ uint32_t sq_put; /* kernel prod. index into sq array */
+ struct llist_node tx_list;
+
+ struct siw_sqe *orq; /* outbound read queue element array */
+ spinlock_t orq_lock;
+ uint32_t orq_get; /* consumer index into orq array */
+ uint32_t orq_put; /* shared producer index for ORQ */
+
+ struct siw_rx_stream rx_stream;
+ struct siw_rx_fpdu *rx_fpdu;
+ struct siw_rx_fpdu rx_tagged;
+ struct siw_rx_fpdu rx_untagged;
+ spinlock_t rq_lock;
+ struct siw_rqe *recvq; /* recv queue element array */
+ uint32_t rq_get; /* consumer index into rq array */
+ uint32_t rq_put; /* kernel prod. index into rq array */
+
+ struct siw_sqe *irq; /* inbound read queue element array */
+ uint32_t irq_get; /* consumer index into irq array */
+ uint32_t irq_put; /* producer index into irq array */
+ int irq_burst;
+
+ struct { /* information to be carried in TERMINATE pkt, if valid */
+ u8 valid;
+ u8 in_tx;
+ u8 layer : 4, etype : 4;
+ u8 ecode;
+ } term_info;
+ u32 xa_sq_index; /* mmap information for SQE array */
+ u32 xa_rq_index; /* mmap information for RQE array */
+ struct rcu_head rcu;
+};
+
+struct siw_base_qp {
+ struct ib_qp base_qp;
+ struct siw_qp *qp;
+};
+
+/* helper macros */
+#define rx_qp(rx) container_of(rx, struct siw_qp, rx_stream)
+#define tx_qp(tx) container_of(tx, struct siw_qp, tx_ctx)
+#define tx_wqe(qp) (&(qp)->tx_ctx.wqe_active)
+#define rx_wqe(rctx) (&(rctx)->wqe_active)
+#define rx_mem(rctx) ((rctx)->wqe_active.mem[0])
+#define tx_type(wqe) ((wqe)->sqe.opcode)
+#define rx_type(wqe) ((wqe)->rqe.opcode)
+#define tx_flags(wqe) ((wqe)->sqe.flags)
+
+struct iwarp_msg_info {
+ int hdr_len;
+ struct iwarp_ctrl ctrl;
+ int (*rx_data)(struct siw_qp *qp);
+};
+
+/* Global siw parameters. Currently set in siw_main.c */
+extern const bool zcopy_tx;
+extern const bool try_gso;
+extern const bool loopback_enabled;
+extern const bool mpa_crc_required;
+extern const bool mpa_crc_strict;
+extern const bool siw_tcp_nagle;
+extern u_char mpa_version;
+extern const bool peer_to_peer;
+extern struct task_struct *siw_tx_thread[];
+
+extern struct crypto_shash *siw_crypto_shash;
+extern struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1];
+
+/* QP general functions */
+int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attr,
+ enum siw_qp_attr_mask mask);
+int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl);
+void siw_qp_llp_close(struct siw_qp *qp);
+void siw_qp_cm_drop(struct siw_qp *qp, int schedule);
+void siw_send_terminate(struct siw_qp *qp);
+
+void siw_qp_get_ref(struct ib_qp *qp);
+void siw_qp_put_ref(struct ib_qp *qp);
+int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp);
+void siw_free_qp(struct kref *ref);
+
+void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer,
+ u8 etype, u8 ecode, int in_tx);
+enum ddp_ecode siw_tagged_error(enum siw_access_state state);
+enum rdmap_ecode siw_rdmap_error(enum siw_access_state state);
+
+void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe);
+int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
+ enum siw_wc_status status);
+int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
+ u32 inval_stag, enum siw_wc_status status);
+void siw_qp_llp_data_ready(struct sock *sk);
+void siw_qp_llp_write_space(struct sock *sk);
+
+/* QP TX path functions */
+int siw_run_sq(void *arg);
+int siw_qp_sq_process(struct siw_qp *qp);
+int siw_sq_start(struct siw_qp *qp);
+int siw_activate_tx(struct siw_qp *qp);
+void siw_stop_tx_thread(int nr_cpu);
+int siw_get_tx_cpu(struct siw_device *sdev);
+void siw_put_tx_cpu(int cpu);
+
+/* QP RX path functions */
+int siw_proc_send(struct siw_qp *qp);
+int siw_proc_rreq(struct siw_qp *qp);
+int siw_proc_rresp(struct siw_qp *qp);
+int siw_proc_write(struct siw_qp *qp);
+int siw_proc_terminate(struct siw_qp *qp);
+
+int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
+ unsigned int off, size_t len);
+
+static inline void set_rx_fpdu_context(struct siw_qp *qp, u8 opcode)
+{
+ if (opcode == RDMAP_RDMA_WRITE || opcode == RDMAP_RDMA_READ_RESP)
+ qp->rx_fpdu = &qp->rx_tagged;
+ else
+ qp->rx_fpdu = &qp->rx_untagged;
+
+ qp->rx_stream.rdmap_op = opcode;
+}
+
+static inline struct siw_ucontext *to_siw_ctx(struct ib_ucontext *base_ctx)
+{
+ return container_of(base_ctx, struct siw_ucontext, base_ucontext);
+}
+
+static inline struct siw_base_qp *to_siw_base_qp(struct ib_qp *base_qp)
+{
+ return container_of(base_qp, struct siw_base_qp, base_qp);
+}
+
+static inline struct siw_qp *to_siw_qp(struct ib_qp *base_qp)
+{
+ return to_siw_base_qp(base_qp)->qp;
+}
+
+static inline struct siw_cq *to_siw_cq(struct ib_cq *base_cq)
+{
+ return container_of(base_cq, struct siw_cq, base_cq);
+}
+
+static inline struct siw_srq *to_siw_srq(struct ib_srq *base_srq)
+{
+ return container_of(base_srq, struct siw_srq, base_srq);
+}
+
+static inline struct siw_device *to_siw_dev(struct ib_device *base_dev)
+{
+ return container_of(base_dev, struct siw_device, base_dev);
+}
+
+static inline struct siw_mr *to_siw_mr(struct ib_mr *base_mr)
+{
+ return container_of(base_mr, struct siw_mr, base_mr);
+}
+
+static inline struct siw_qp *siw_qp_id2obj(struct siw_device *sdev, int id)
+{
+ struct siw_qp *qp;
+
+ rcu_read_lock();
+ qp = xa_load(&sdev->qp_xa, id);
+ if (likely(qp && kref_get_unless_zero(&qp->ref))) {
+ rcu_read_unlock();
+ return qp;
+ }
+ rcu_read_unlock();
+ return NULL;
+}
+
+static inline u32 qp_id(struct siw_qp *qp)
+{
+ return qp->qp_num;
+}
+
+static inline void siw_qp_get(struct siw_qp *qp)
+{
+ kref_get(&qp->ref);
+}
+
+static inline void siw_qp_put(struct siw_qp *qp)
+{
+ kref_put(&qp->ref, siw_free_qp);
+}
+
+static inline int siw_sq_empty(struct siw_qp *qp)
+{
+ struct siw_sqe *sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
+
+ return READ_ONCE(sqe->flags) == 0;
+}
+
+static inline struct siw_sqe *sq_get_next(struct siw_qp *qp)
+{
+ struct siw_sqe *sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
+
+ if (READ_ONCE(sqe->flags) & SIW_WQE_VALID)
+ return sqe;
+
+ return NULL;
+}
+
+static inline struct siw_sqe *orq_get_current(struct siw_qp *qp)
+{
+ return &qp->orq[qp->orq_get % qp->attrs.orq_size];
+}
+
+static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp)
+{
+ return &qp->orq[qp->orq_put % qp->attrs.orq_size];
+}
+
+static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
+{
+ struct siw_sqe *orq_e = orq_get_tail(qp);
+
+ if (orq_e && READ_ONCE(orq_e->flags) == 0)
+ return orq_e;
+
+ return NULL;
+}
+
+static inline int siw_orq_empty(struct siw_qp *qp)
+{
+ return qp->orq[qp->orq_get % qp->attrs.orq_size].flags == 0 ? 1 : 0;
+}
+
+static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp)
+{
+ struct siw_sqe *irq_e = &qp->irq[qp->irq_put % qp->attrs.irq_size];
+
+ if (READ_ONCE(irq_e->flags) == 0) {
+ qp->irq_put++;
+ return irq_e;
+ }
+ return NULL;
+}
+
+static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum)
+{
+ return (__force __wsum)crc32c((__force __u32)sum, buff, len);
+}
+
+static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset,
+ int len)
+{
+ return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
+ (__force __u32)csum2, len);
+}
+
+static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
+{
+ const struct skb_checksum_ops siw_cs_ops = {
+ .update = siw_csum_update,
+ .combine = siw_csum_combine,
+ };
+ __wsum crc = *(u32 *)shash_desc_ctx(srx->mpa_crc_hd);
+
+ crc = __skb_checksum(srx->skb, srx->skb_offset, len, crc,
+ &siw_cs_ops);
+ *(u32 *)shash_desc_ctx(srx->mpa_crc_hd) = crc;
+}
+
+#define siw_dbg(ibdev, fmt, ...) \
+ ibdev_dbg(ibdev, "%s: " fmt, __func__, ##__VA_ARGS__)
+
+#define siw_dbg_qp(qp, fmt, ...) \
+ ibdev_dbg(&qp->sdev->base_dev, "QP[%u] %s: " fmt, qp_id(qp), __func__, \
+ ##__VA_ARGS__)
+
+#define siw_dbg_cq(cq, fmt, ...) \
+ ibdev_dbg(cq->base_cq.device, "CQ[%u] %s: " fmt, cq->id, __func__, \
+ ##__VA_ARGS__)
+
+#define siw_dbg_pd(pd, fmt, ...) \
+ ibdev_dbg(pd->device, "PD[%u] %s: " fmt, pd->res.id, __func__, \
+ ##__VA_ARGS__)
+
+#define siw_dbg_mem(mem, fmt, ...) \
+ ibdev_dbg(&mem->sdev->base_dev, \
+ "MEM[0x%08x] %s: " fmt, mem->stag, __func__, ##__VA_ARGS__)
+
+#define siw_dbg_cep(cep, fmt, ...) \
+ ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%p] %s: " fmt, \
+ cep, __func__, ##__VA_ARGS__)
+
+void siw_cq_flush(struct siw_cq *cq);
+void siw_sq_flush(struct siw_qp *qp);
+void siw_rq_flush(struct siw_qp *qp);
+int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc);
+
+#endif
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
new file mode 100644
index 000000000000..a7cde98e73e8
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -0,0 +1,2070 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Fredy Neeser */
+/* Greg Joyce <greg@opengridcomputing.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+/* Copyright (c) 2017, Open Grid Computing, Inc. */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/inet.h>
+#include <linux/tcp.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "siw.h"
+#include "siw_cm.h"
+
+/*
+ * Set to any combination of
+ * MPA_V2_RDMA_NO_RTR, MPA_V2_RDMA_READ_RTR, MPA_V2_RDMA_WRITE_RTR
+ */
+static __be16 rtr_type = MPA_V2_RDMA_READ_RTR | MPA_V2_RDMA_WRITE_RTR;
+static const bool relaxed_ird_negotiation = 1;
+
+static void siw_cm_llp_state_change(struct sock *s);
+static void siw_cm_llp_data_ready(struct sock *s);
+static void siw_cm_llp_write_space(struct sock *s);
+static void siw_cm_llp_error_report(struct sock *s);
+static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason,
+ int status);
+
+static void siw_sk_assign_cm_upcalls(struct sock *sk)
+{
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_state_change = siw_cm_llp_state_change;
+ sk->sk_data_ready = siw_cm_llp_data_ready;
+ sk->sk_write_space = siw_cm_llp_write_space;
+ sk->sk_error_report = siw_cm_llp_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void siw_sk_save_upcalls(struct sock *sk)
+{
+ struct siw_cep *cep = sk_to_cep(sk);
+
+ write_lock_bh(&sk->sk_callback_lock);
+ cep->sk_state_change = sk->sk_state_change;
+ cep->sk_data_ready = sk->sk_data_ready;
+ cep->sk_write_space = sk->sk_write_space;
+ cep->sk_error_report = sk->sk_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void siw_sk_restore_upcalls(struct sock *sk, struct siw_cep *cep)
+{
+ sk->sk_state_change = cep->sk_state_change;
+ sk->sk_data_ready = cep->sk_data_ready;
+ sk->sk_write_space = cep->sk_write_space;
+ sk->sk_error_report = cep->sk_error_report;
+ sk->sk_user_data = NULL;
+}
+
+static void siw_qp_socket_assoc(struct siw_cep *cep, struct siw_qp *qp)
+{
+ struct socket *s = cep->sock;
+ struct sock *sk = s->sk;
+
+ write_lock_bh(&sk->sk_callback_lock);
+
+ qp->attrs.sk = s;
+ sk->sk_data_ready = siw_qp_llp_data_ready;
+ sk->sk_write_space = siw_qp_llp_write_space;
+
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void siw_socket_disassoc(struct socket *s)
+{
+ struct sock *sk = s->sk;
+ struct siw_cep *cep;
+
+ if (sk) {
+ write_lock_bh(&sk->sk_callback_lock);
+ cep = sk_to_cep(sk);
+ if (cep) {
+ siw_sk_restore_upcalls(sk, cep);
+ siw_cep_put(cep);
+ } else {
+ pr_warn("siw: cannot restore sk callbacks: no ep\n");
+ }
+ write_unlock_bh(&sk->sk_callback_lock);
+ } else {
+ pr_warn("siw: cannot restore sk callbacks: no sk\n");
+ }
+}
+
+static void siw_rtr_data_ready(struct sock *sk)
+{
+ struct siw_cep *cep;
+ struct siw_qp *qp = NULL;
+ read_descriptor_t rd_desc;
+
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (!cep) {
+ WARN(1, "No connection endpoint\n");
+ goto out;
+ }
+ qp = sk_to_qp(sk);
+
+ memset(&rd_desc, 0, sizeof(rd_desc));
+ rd_desc.arg.data = qp;
+ rd_desc.count = 1;
+
+ tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
+ /*
+ * Check if first frame was successfully processed.
+ * Signal connection full establishment if yes.
+ * Failed data processing would have already scheduled
+ * connection drop.
+ */
+ if (!qp->rx_stream.rx_suspend)
+ siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
+out:
+ read_unlock(&sk->sk_callback_lock);
+ if (qp)
+ siw_qp_socket_assoc(cep, qp);
+}
+
+static void siw_sk_assign_rtr_upcalls(struct siw_cep *cep)
+{
+ struct sock *sk = cep->sock->sk;
+
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_data_ready = siw_rtr_data_ready;
+ sk->sk_write_space = siw_qp_llp_write_space;
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s)
+{
+ cep->sock = s;
+ siw_cep_get(cep);
+ s->sk->sk_user_data = cep;
+
+ siw_sk_save_upcalls(s->sk);
+ siw_sk_assign_cm_upcalls(s->sk);
+}
+
+static struct siw_cep *siw_cep_alloc(struct siw_device *sdev)
+{
+ struct siw_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
+ unsigned long flags;
+
+ if (!cep)
+ return NULL;
+
+ INIT_LIST_HEAD(&cep->listenq);
+ INIT_LIST_HEAD(&cep->devq);
+ INIT_LIST_HEAD(&cep->work_freelist);
+
+ kref_init(&cep->ref);
+ cep->state = SIW_EPSTATE_IDLE;
+ init_waitqueue_head(&cep->waitq);
+ spin_lock_init(&cep->lock);
+ cep->sdev = sdev;
+ cep->enhanced_rdma_conn_est = false;
+
+ spin_lock_irqsave(&sdev->lock, flags);
+ list_add_tail(&cep->devq, &sdev->cep_list);
+ spin_unlock_irqrestore(&sdev->lock, flags);
+
+ siw_dbg_cep(cep, "new endpoint\n");
+ return cep;
+}
+
+static void siw_cm_free_work(struct siw_cep *cep)
+{
+ struct list_head *w, *tmp;
+ struct siw_cm_work *work;
+
+ list_for_each_safe(w, tmp, &cep->work_freelist) {
+ work = list_entry(w, struct siw_cm_work, list);
+ list_del(&work->list);
+ kfree(work);
+ }
+}
+
+static void siw_cancel_mpatimer(struct siw_cep *cep)
+{
+ spin_lock_bh(&cep->lock);
+ if (cep->mpa_timer) {
+ if (cancel_delayed_work(&cep->mpa_timer->work)) {
+ siw_cep_put(cep);
+ kfree(cep->mpa_timer); /* not needed again */
+ }
+ cep->mpa_timer = NULL;
+ }
+ spin_unlock_bh(&cep->lock);
+}
+
+static void siw_put_work(struct siw_cm_work *work)
+{
+ INIT_LIST_HEAD(&work->list);
+ spin_lock_bh(&work->cep->lock);
+ list_add(&work->list, &work->cep->work_freelist);
+ spin_unlock_bh(&work->cep->lock);
+}
+
+static void siw_cep_set_inuse(struct siw_cep *cep)
+{
+ unsigned long flags;
+ int rv;
+retry:
+ spin_lock_irqsave(&cep->lock, flags);
+
+ if (cep->in_use) {
+ spin_unlock_irqrestore(&cep->lock, flags);
+ rv = wait_event_interruptible(cep->waitq, !cep->in_use);
+ if (signal_pending(current))
+ flush_signals(current);
+ goto retry;
+ } else {
+ cep->in_use = 1;
+ spin_unlock_irqrestore(&cep->lock, flags);
+ }
+}
+
+static void siw_cep_set_free(struct siw_cep *cep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cep->lock, flags);
+ cep->in_use = 0;
+ spin_unlock_irqrestore(&cep->lock, flags);
+
+ wake_up(&cep->waitq);
+}
+
+static void __siw_cep_dealloc(struct kref *ref)
+{
+ struct siw_cep *cep = container_of(ref, struct siw_cep, ref);
+ struct siw_device *sdev = cep->sdev;
+ unsigned long flags;
+
+ WARN_ON(cep->listen_cep);
+
+ /* kfree(NULL) is safe */
+ kfree(cep->mpa.pdata);
+ spin_lock_bh(&cep->lock);
+ if (!list_empty(&cep->work_freelist))
+ siw_cm_free_work(cep);
+ spin_unlock_bh(&cep->lock);
+
+ spin_lock_irqsave(&sdev->lock, flags);
+ list_del(&cep->devq);
+ spin_unlock_irqrestore(&sdev->lock, flags);
+
+ siw_dbg_cep(cep, "free endpoint\n");
+ kfree(cep);
+}
+
+static struct siw_cm_work *siw_get_work(struct siw_cep *cep)
+{
+ struct siw_cm_work *work = NULL;
+
+ spin_lock_bh(&cep->lock);
+ if (!list_empty(&cep->work_freelist)) {
+ work = list_entry(cep->work_freelist.next, struct siw_cm_work,
+ list);
+ list_del_init(&work->list);
+ }
+ spin_unlock_bh(&cep->lock);
+ return work;
+}
+
+static int siw_cm_alloc_work(struct siw_cep *cep, int num)
+{
+ struct siw_cm_work *work;
+
+ while (num--) {
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work) {
+ if (!(list_empty(&cep->work_freelist)))
+ siw_cm_free_work(cep);
+ return -ENOMEM;
+ }
+ work->cep = cep;
+ INIT_LIST_HEAD(&work->list);
+ list_add(&work->list, &cep->work_freelist);
+ }
+ return 0;
+}
+
+/*
+ * siw_cm_upcall()
+ *
+ * Upcall to IWCM to inform about async connection events
+ */
+static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason,
+ int status)
+{
+ struct iw_cm_event event;
+ struct iw_cm_id *id;
+
+ memset(&event, 0, sizeof(event));
+ event.status = status;
+ event.event = reason;
+
+ if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
+ event.provider_data = cep;
+ id = cep->listen_cep->cm_id;
+ } else {
+ id = cep->cm_id;
+ }
+ /* Signal IRD and ORD */
+ if (reason == IW_CM_EVENT_ESTABLISHED ||
+ reason == IW_CM_EVENT_CONNECT_REPLY) {
+ /* Signal negotiated IRD/ORD values we will use */
+ event.ird = cep->ird;
+ event.ord = cep->ord;
+ } else if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
+ event.ird = cep->ord;
+ event.ord = cep->ird;
+ }
+ /* Signal private data and address information */
+ if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
+ reason == IW_CM_EVENT_CONNECT_REPLY) {
+ u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
+
+ if (pd_len) {
+ /*
+ * hand over MPA private data
+ */
+ event.private_data_len = pd_len;
+ event.private_data = cep->mpa.pdata;
+
+ /* Hide MPA V2 IRD/ORD control */
+ if (cep->enhanced_rdma_conn_est) {
+ event.private_data_len -=
+ sizeof(struct mpa_v2_data);
+ event.private_data +=
+ sizeof(struct mpa_v2_data);
+ }
+ }
+ getname_local(cep->sock, &event.local_addr);
+ getname_peer(cep->sock, &event.remote_addr);
+ }
+ siw_dbg_cep(cep, "[QP %u]: id 0x%p, reason=%d, status=%d\n",
+ cep->qp ? qp_id(cep->qp) : -1, id, reason, status);
+
+ return id->event_handler(id, &event);
+}
+
+/*
+ * siw_qp_cm_drop()
+ *
+ * Drops established LLP connection if present and not already
+ * scheduled for dropping. Called from user context, SQ workqueue
+ * or receive IRQ. Caller signals if socket can be immediately
+ * closed (basically, if not in IRQ).
+ */
+void siw_qp_cm_drop(struct siw_qp *qp, int schedule)
+{
+ struct siw_cep *cep = qp->cep;
+
+ qp->rx_stream.rx_suspend = 1;
+ qp->tx_ctx.tx_suspend = 1;
+
+ if (!qp->cep)
+ return;
+
+ if (schedule) {
+ siw_cm_queue_work(cep, SIW_CM_WORK_CLOSE_LLP);
+ } else {
+ siw_cep_set_inuse(cep);
+
+ if (cep->state == SIW_EPSTATE_CLOSED) {
+ siw_dbg_cep(cep, "already closed\n");
+ goto out;
+ }
+ siw_dbg_cep(cep, "immediate close, state %d\n", cep->state);
+
+ if (qp->term_info.valid)
+ siw_send_terminate(qp);
+
+ if (cep->cm_id) {
+ switch (cep->state) {
+ case SIW_EPSTATE_AWAIT_MPAREP:
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -EINVAL);
+ break;
+
+ case SIW_EPSTATE_RDMA_MODE:
+ siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ break;
+
+ case SIW_EPSTATE_IDLE:
+ case SIW_EPSTATE_LISTENING:
+ case SIW_EPSTATE_CONNECTING:
+ case SIW_EPSTATE_AWAIT_MPAREQ:
+ case SIW_EPSTATE_RECVD_MPAREQ:
+ case SIW_EPSTATE_CLOSED:
+ default:
+ break;
+ }
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ siw_cep_put(cep);
+ }
+ cep->state = SIW_EPSTATE_CLOSED;
+
+ if (cep->sock) {
+ siw_socket_disassoc(cep->sock);
+ /*
+ * Immediately close socket
+ */
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+ if (cep->qp) {
+ cep->qp = NULL;
+ siw_qp_put(qp);
+ }
+out:
+ siw_cep_set_free(cep);
+ }
+}
+
+void siw_cep_put(struct siw_cep *cep)
+{
+ WARN_ON(kref_read(&cep->ref) < 1);
+ kref_put(&cep->ref, __siw_cep_dealloc);
+}
+
+void siw_cep_get(struct siw_cep *cep)
+{
+ kref_get(&cep->ref);
+}
+
+/*
+ * Expects params->pd_len in host byte order
+ */
+static int siw_send_mpareqrep(struct siw_cep *cep, const void *pdata, u8 pd_len)
+{
+ struct socket *s = cep->sock;
+ struct mpa_rr *rr = &cep->mpa.hdr;
+ struct kvec iov[3];
+ struct msghdr msg;
+ int rv;
+ int iovec_num = 0;
+ int mpa_len;
+
+ memset(&msg, 0, sizeof(msg));
+
+ iov[iovec_num].iov_base = rr;
+ iov[iovec_num].iov_len = sizeof(*rr);
+ mpa_len = sizeof(*rr);
+
+ if (cep->enhanced_rdma_conn_est) {
+ iovec_num++;
+ iov[iovec_num].iov_base = &cep->mpa.v2_ctrl;
+ iov[iovec_num].iov_len = sizeof(cep->mpa.v2_ctrl);
+ mpa_len += sizeof(cep->mpa.v2_ctrl);
+ }
+ if (pd_len) {
+ iovec_num++;
+ iov[iovec_num].iov_base = (char *)pdata;
+ iov[iovec_num].iov_len = pd_len;
+ mpa_len += pd_len;
+ }
+ if (cep->enhanced_rdma_conn_est)
+ pd_len += sizeof(cep->mpa.v2_ctrl);
+
+ rr->params.pd_len = cpu_to_be16(pd_len);
+
+ rv = kernel_sendmsg(s, &msg, iov, iovec_num + 1, mpa_len);
+
+ return rv < 0 ? rv : 0;
+}
+
+/*
+ * Receive MPA Request/Reply header.
+ *
+ * Returns 0 if complete MPA Request/Reply header including
+ * eventual private data was received. Returns -EAGAIN if
+ * header was partially received or negative error code otherwise.
+ *
+ * Context: May be called in process context only
+ */
+static int siw_recv_mpa_rr(struct siw_cep *cep)
+{
+ struct mpa_rr *hdr = &cep->mpa.hdr;
+ struct socket *s = cep->sock;
+ u16 pd_len;
+ int rcvd, to_rcv;
+
+ if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
+ rcvd = ksock_recv(s, (char *)hdr + cep->mpa.bytes_rcvd,
+ sizeof(struct mpa_rr) - cep->mpa.bytes_rcvd,
+ 0);
+ if (rcvd <= 0)
+ return -ECONNABORTED;
+
+ cep->mpa.bytes_rcvd += rcvd;
+
+ if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr))
+ return -EAGAIN;
+
+ if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA)
+ return -EPROTO;
+ }
+ pd_len = be16_to_cpu(hdr->params.pd_len);
+
+ /*
+ * At least the MPA Request/Reply header (frame not including
+ * private data) has been received.
+ * Receive (or continue receiving) any private data.
+ */
+ to_rcv = pd_len - (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr));
+
+ if (!to_rcv) {
+ /*
+ * We must have hdr->params.pd_len == 0 and thus received a
+ * complete MPA Request/Reply frame.
+ * Check against peer protocol violation.
+ */
+ u32 word;
+
+ rcvd = ksock_recv(s, (char *)&word, sizeof(word), MSG_DONTWAIT);
+ if (rcvd == -EAGAIN)
+ return 0;
+
+ if (rcvd == 0) {
+ siw_dbg_cep(cep, "peer EOF\n");
+ return -EPIPE;
+ }
+ if (rcvd < 0) {
+ siw_dbg_cep(cep, "error: %d\n", rcvd);
+ return rcvd;
+ }
+ siw_dbg_cep(cep, "peer sent extra data: %d\n", rcvd);
+
+ return -EPROTO;
+ }
+
+ /*
+ * At this point, we must have hdr->params.pd_len != 0.
+ * A private data buffer gets allocated if hdr->params.pd_len != 0.
+ */
+ if (!cep->mpa.pdata) {
+ cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
+ if (!cep->mpa.pdata)
+ return -ENOMEM;
+ }
+ rcvd = ksock_recv(
+ s, cep->mpa.pdata + cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
+ to_rcv + 4, MSG_DONTWAIT);
+
+ if (rcvd < 0)
+ return rcvd;
+
+ if (rcvd > to_rcv)
+ return -EPROTO;
+
+ cep->mpa.bytes_rcvd += rcvd;
+
+ if (to_rcv == rcvd) {
+ siw_dbg_cep(cep, "%d bytes private data received\n", pd_len);
+ return 0;
+ }
+ return -EAGAIN;
+}
+
+/*
+ * siw_proc_mpareq()
+ *
+ * Read MPA Request from socket and signal new connection to IWCM
+ * if success. Caller must hold lock on corresponding listening CEP.
+ */
+static int siw_proc_mpareq(struct siw_cep *cep)
+{
+ struct mpa_rr *req;
+ int version, rv;
+ u16 pd_len;
+
+ rv = siw_recv_mpa_rr(cep);
+ if (rv)
+ return rv;
+
+ req = &cep->mpa.hdr;
+
+ version = __mpa_rr_revision(req->params.bits);
+ pd_len = be16_to_cpu(req->params.pd_len);
+
+ if (version > MPA_REVISION_2)
+ /* allow for 0, 1, and 2 only */
+ return -EPROTO;
+
+ if (memcmp(req->key, MPA_KEY_REQ, 16))
+ return -EPROTO;
+
+ /* Prepare for sending MPA reply */
+ memcpy(req->key, MPA_KEY_REP, 16);
+
+ if (version == MPA_REVISION_2 &&
+ (req->params.bits & MPA_RR_FLAG_ENHANCED)) {
+ /*
+ * MPA version 2 must signal IRD/ORD values and P2P mode
+ * in private data if header flag MPA_RR_FLAG_ENHANCED
+ * is set.
+ */
+ if (pd_len < sizeof(struct mpa_v2_data))
+ goto reject_conn;
+
+ cep->enhanced_rdma_conn_est = true;
+ }
+
+ /* MPA Markers: currently not supported. Marker TX to be added. */
+ if (req->params.bits & MPA_RR_FLAG_MARKERS)
+ goto reject_conn;
+
+ if (req->params.bits & MPA_RR_FLAG_CRC) {
+ /*
+ * RFC 5044, page 27: CRC MUST be used if peer requests it.
+ * siw specific: 'mpa_crc_strict' parameter to reject
+ * connection with CRC if local CRC off enforced by
+ * 'mpa_crc_strict' module parameter.
+ */
+ if (!mpa_crc_required && mpa_crc_strict)
+ goto reject_conn;
+
+ /* Enable CRC if requested by module parameter */
+ if (mpa_crc_required)
+ req->params.bits |= MPA_RR_FLAG_CRC;
+ }
+ if (cep->enhanced_rdma_conn_est) {
+ struct mpa_v2_data *v2 = (struct mpa_v2_data *)cep->mpa.pdata;
+
+ /*
+ * Peer requested ORD becomes requested local IRD,
+ * peer requested IRD becomes requested local ORD.
+ * IRD and ORD get limited by global maximum values.
+ */
+ cep->ord = ntohs(v2->ird) & MPA_IRD_ORD_MASK;
+ cep->ord = min(cep->ord, SIW_MAX_ORD_QP);
+ cep->ird = ntohs(v2->ord) & MPA_IRD_ORD_MASK;
+ cep->ird = min(cep->ird, SIW_MAX_IRD_QP);
+
+ /* May get overwritten by locally negotiated values */
+ cep->mpa.v2_ctrl.ird = htons(cep->ird);
+ cep->mpa.v2_ctrl.ord = htons(cep->ord);
+
+ /*
+ * Support for peer sent zero length Write or Read to
+ * let local side enter RTS. Writes are preferred.
+ * Sends would require pre-posting a Receive and are
+ * not supported.
+ * Propose zero length Write if none of Read and Write
+ * is indicated.
+ */
+ if (v2->ird & MPA_V2_PEER_TO_PEER) {
+ cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER;
+
+ if (v2->ord & MPA_V2_RDMA_WRITE_RTR)
+ cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR;
+ else if (v2->ord & MPA_V2_RDMA_READ_RTR)
+ cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_READ_RTR;
+ else
+ cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR;
+ }
+ }
+
+ cep->state = SIW_EPSTATE_RECVD_MPAREQ;
+
+ /* Keep reference until IWCM accepts/rejects */
+ siw_cep_get(cep);
+ rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
+ if (rv)
+ siw_cep_put(cep);
+
+ return rv;
+
+reject_conn:
+ siw_dbg_cep(cep, "reject: crc %d:%d:%d, m %d:%d\n",
+ req->params.bits & MPA_RR_FLAG_CRC ? 1 : 0,
+ mpa_crc_required, mpa_crc_strict,
+ req->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0);
+
+ req->params.bits &= ~MPA_RR_FLAG_MARKERS;
+ req->params.bits |= MPA_RR_FLAG_REJECT;
+
+ if (!mpa_crc_required && mpa_crc_strict)
+ req->params.bits &= ~MPA_RR_FLAG_CRC;
+
+ if (pd_len)
+ kfree(cep->mpa.pdata);
+
+ cep->mpa.pdata = NULL;
+
+ siw_send_mpareqrep(cep, NULL, 0);
+
+ return -EOPNOTSUPP;
+}
+
+static int siw_proc_mpareply(struct siw_cep *cep)
+{
+ struct siw_qp_attrs qp_attrs;
+ enum siw_qp_attr_mask qp_attr_mask;
+ struct siw_qp *qp = cep->qp;
+ struct mpa_rr *rep;
+ int rv;
+ u16 rep_ord;
+ u16 rep_ird;
+ bool ird_insufficient = false;
+ enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR;
+
+ rv = siw_recv_mpa_rr(cep);
+ if (rv != -EAGAIN)
+ siw_cancel_mpatimer(cep);
+ if (rv)
+ goto out_err;
+
+ rep = &cep->mpa.hdr;
+
+ if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) {
+ /* allow for 0, 1, and 2 only */
+ rv = -EPROTO;
+ goto out_err;
+ }
+ if (memcmp(rep->key, MPA_KEY_REP, 16)) {
+ siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, LLP_ETYPE_MPA,
+ LLP_ECODE_INVALID_REQ_RESP, 0);
+ siw_send_terminate(qp);
+ rv = -EPROTO;
+ goto out_err;
+ }
+ if (rep->params.bits & MPA_RR_FLAG_REJECT) {
+ siw_dbg_cep(cep, "got mpa reject\n");
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
+
+ return -ECONNRESET;
+ }
+ if (try_gso && rep->params.bits & MPA_RR_FLAG_GSO_EXP) {
+ siw_dbg_cep(cep, "peer allows GSO on TX\n");
+ qp->tx_ctx.gso_seg_limit = 0;
+ }
+ if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
+ (mpa_crc_required && !(rep->params.bits & MPA_RR_FLAG_CRC)) ||
+ (mpa_crc_strict && !mpa_crc_required &&
+ (rep->params.bits & MPA_RR_FLAG_CRC))) {
+ siw_dbg_cep(cep, "reply unsupp: crc %d:%d:%d, m %d:%d\n",
+ rep->params.bits & MPA_RR_FLAG_CRC ? 1 : 0,
+ mpa_crc_required, mpa_crc_strict,
+ rep->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0);
+
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
+
+ return -EINVAL;
+ }
+ if (cep->enhanced_rdma_conn_est) {
+ struct mpa_v2_data *v2;
+
+ if (__mpa_rr_revision(rep->params.bits) < MPA_REVISION_2 ||
+ !(rep->params.bits & MPA_RR_FLAG_ENHANCED)) {
+ /*
+ * Protocol failure: The responder MUST reply with
+ * MPA version 2 and MUST set MPA_RR_FLAG_ENHANCED.
+ */
+ siw_dbg_cep(cep, "mpa reply error: vers %d, enhcd %d\n",
+ __mpa_rr_revision(rep->params.bits),
+ rep->params.bits & MPA_RR_FLAG_ENHANCED ?
+ 1 :
+ 0);
+
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ return -EINVAL;
+ }
+ v2 = (struct mpa_v2_data *)cep->mpa.pdata;
+ rep_ird = ntohs(v2->ird) & MPA_IRD_ORD_MASK;
+ rep_ord = ntohs(v2->ord) & MPA_IRD_ORD_MASK;
+
+ if (cep->ird < rep_ord &&
+ (relaxed_ird_negotiation == false ||
+ rep_ord > cep->sdev->attrs.max_ird)) {
+ siw_dbg_cep(cep, "ird %d, rep_ord %d, max_ord %d\n",
+ cep->ird, rep_ord,
+ cep->sdev->attrs.max_ord);
+ ird_insufficient = true;
+ }
+ if (cep->ord > rep_ird && relaxed_ird_negotiation == false) {
+ siw_dbg_cep(cep, "ord %d, rep_ird %d\n", cep->ord,
+ rep_ird);
+ ird_insufficient = true;
+ }
+ /*
+ * Always report negotiated peer values to user,
+ * even if IRD/ORD negotiation failed
+ */
+ cep->ird = rep_ord;
+ cep->ord = rep_ird;
+
+ if (ird_insufficient) {
+ /*
+ * If the initiator IRD is insuffient for the
+ * responder ORD, send a TERM.
+ */
+ siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
+ LLP_ETYPE_MPA,
+ LLP_ECODE_INSUFFICIENT_IRD, 0);
+ siw_send_terminate(qp);
+ rv = -ENOMEM;
+ goto out_err;
+ }
+ if (cep->mpa.v2_ctrl_req.ird & MPA_V2_PEER_TO_PEER)
+ mpa_p2p_mode =
+ cep->mpa.v2_ctrl_req.ord &
+ (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR);
+
+ /*
+ * Check if we requested P2P mode, and if peer agrees
+ */
+ if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) {
+ if ((mpa_p2p_mode & v2->ord) == 0) {
+ /*
+ * We requested RTR mode(s), but the peer
+ * did not pick any mode we support.
+ */
+ siw_dbg_cep(cep,
+ "rtr mode: req %2x, got %2x\n",
+ mpa_p2p_mode,
+ v2->ord & (MPA_V2_RDMA_WRITE_RTR |
+ MPA_V2_RDMA_READ_RTR));
+
+ siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
+ LLP_ETYPE_MPA,
+ LLP_ECODE_NO_MATCHING_RTR,
+ 0);
+ siw_send_terminate(qp);
+ rv = -EPROTO;
+ goto out_err;
+ }
+ mpa_p2p_mode = v2->ord & (MPA_V2_RDMA_WRITE_RTR |
+ MPA_V2_RDMA_READ_RTR);
+ }
+ }
+ memset(&qp_attrs, 0, sizeof(qp_attrs));
+
+ if (rep->params.bits & MPA_RR_FLAG_CRC)
+ qp_attrs.flags = SIW_MPA_CRC;
+
+ qp_attrs.irq_size = cep->ird;
+ qp_attrs.orq_size = cep->ord;
+ qp_attrs.sk = cep->sock;
+ qp_attrs.state = SIW_QP_STATE_RTS;
+
+ qp_attr_mask = SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE |
+ SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | SIW_QP_ATTR_MPA;
+
+ /* Move socket RX/TX under QP control */
+ down_write(&qp->state_lock);
+ if (qp->attrs.state > SIW_QP_STATE_RTR) {
+ rv = -EINVAL;
+ up_write(&qp->state_lock);
+ goto out_err;
+ }
+ rv = siw_qp_modify(qp, &qp_attrs, qp_attr_mask);
+
+ siw_qp_socket_assoc(cep, qp);
+
+ up_write(&qp->state_lock);
+
+ /* Send extra RDMA frame to trigger peer RTS if negotiated */
+ if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) {
+ rv = siw_qp_mpa_rts(qp, mpa_p2p_mode);
+ if (rv)
+ goto out_err;
+ }
+ if (!rv) {
+ rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
+ if (!rv)
+ cep->state = SIW_EPSTATE_RDMA_MODE;
+
+ return 0;
+ }
+
+out_err:
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
+
+ return rv;
+}
+
+/*
+ * siw_accept_newconn - accept an incoming pending connection
+ *
+ */
+static void siw_accept_newconn(struct siw_cep *cep)
+{
+ struct socket *s = cep->sock;
+ struct socket *new_s = NULL;
+ struct siw_cep *new_cep = NULL;
+ int rv = 0; /* debug only. should disappear */
+
+ if (cep->state != SIW_EPSTATE_LISTENING)
+ goto error;
+
+ new_cep = siw_cep_alloc(cep->sdev);
+ if (!new_cep)
+ goto error;
+
+ /*
+ * 4: Allocate a sufficient number of work elements
+ * to allow concurrent handling of local + peer close
+ * events, MPA header processing + MPA timeout.
+ */
+ if (siw_cm_alloc_work(new_cep, 4) != 0)
+ goto error;
+
+ /*
+ * Copy saved socket callbacks from listening CEP
+ * and assign new socket with new CEP
+ */
+ new_cep->sk_state_change = cep->sk_state_change;
+ new_cep->sk_data_ready = cep->sk_data_ready;
+ new_cep->sk_write_space = cep->sk_write_space;
+ new_cep->sk_error_report = cep->sk_error_report;
+
+ rv = kernel_accept(s, &new_s, O_NONBLOCK);
+ if (rv != 0) {
+ /*
+ * Connection already aborted by peer..?
+ */
+ siw_dbg_cep(cep, "kernel_accept() error: %d\n", rv);
+ goto error;
+ }
+ new_cep->sock = new_s;
+ siw_cep_get(new_cep);
+ new_s->sk->sk_user_data = new_cep;
+
+ siw_dbg_cep(cep, "listen socket 0x%p, new 0x%p\n", s, new_s);
+
+ if (siw_tcp_nagle == false) {
+ int val = 1;
+
+ rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY,
+ (char *)&val, sizeof(val));
+ if (rv) {
+ siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv);
+ goto error;
+ }
+ }
+ new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ;
+
+ rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT);
+ if (rv)
+ goto error;
+ /*
+ * See siw_proc_mpareq() etc. for the use of new_cep->listen_cep.
+ */
+ new_cep->listen_cep = cep;
+ siw_cep_get(cep);
+
+ if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
+ /*
+ * MPA REQ already queued
+ */
+ siw_dbg_cep(cep, "immediate mpa request\n");
+
+ siw_cep_set_inuse(new_cep);
+ rv = siw_proc_mpareq(new_cep);
+ siw_cep_set_free(new_cep);
+
+ if (rv != -EAGAIN) {
+ siw_cep_put(cep);
+ new_cep->listen_cep = NULL;
+ if (rv)
+ goto error;
+ }
+ }
+ return;
+
+error:
+ if (new_cep)
+ siw_cep_put(new_cep);
+
+ if (new_s) {
+ siw_socket_disassoc(new_s);
+ sock_release(new_s);
+ new_cep->sock = NULL;
+ }
+ siw_dbg_cep(cep, "error %d\n", rv);
+}
+
+static void siw_cm_work_handler(struct work_struct *w)
+{
+ struct siw_cm_work *work;
+ struct siw_cep *cep;
+ int release_cep = 0, rv = 0;
+
+ work = container_of(w, struct siw_cm_work, work.work);
+ cep = work->cep;
+
+ siw_dbg_cep(cep, "[QP %u]: work type: %d, state %d\n",
+ cep->qp ? qp_id(cep->qp) : -1, work->type, cep->state);
+
+ siw_cep_set_inuse(cep);
+
+ switch (work->type) {
+ case SIW_CM_WORK_ACCEPT:
+ siw_accept_newconn(cep);
+ break;
+
+ case SIW_CM_WORK_READ_MPAHDR:
+ if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) {
+ if (cep->listen_cep) {
+ siw_cep_set_inuse(cep->listen_cep);
+
+ if (cep->listen_cep->state ==
+ SIW_EPSTATE_LISTENING)
+ rv = siw_proc_mpareq(cep);
+ else
+ rv = -EFAULT;
+
+ siw_cep_set_free(cep->listen_cep);
+
+ if (rv != -EAGAIN) {
+ siw_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ if (rv)
+ siw_cep_put(cep);
+ }
+ }
+ } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) {
+ rv = siw_proc_mpareply(cep);
+ } else {
+ /*
+ * CEP already moved out of MPA handshake.
+ * any connection management already done.
+ * silently ignore the mpa packet.
+ */
+ if (cep->state == SIW_EPSTATE_RDMA_MODE) {
+ cep->sock->sk->sk_data_ready(cep->sock->sk);
+ siw_dbg_cep(cep, "already in RDMA mode");
+ } else {
+ siw_dbg_cep(cep, "out of state: %d\n",
+ cep->state);
+ }
+ }
+ if (rv && rv != EAGAIN)
+ release_cep = 1;
+ break;
+
+ case SIW_CM_WORK_CLOSE_LLP:
+ /*
+ * QP scheduled LLP close
+ */
+ if (cep->qp && cep->qp->term_info.valid)
+ siw_send_terminate(cep->qp);
+
+ if (cep->cm_id)
+ siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+
+ release_cep = 1;
+ break;
+
+ case SIW_CM_WORK_PEER_CLOSE:
+ if (cep->cm_id) {
+ if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA reply not received, but connection drop
+ */
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ } else if (cep->state == SIW_EPSTATE_RDMA_MODE) {
+ /*
+ * NOTE: IW_CM_EVENT_DISCONNECT is given just
+ * to transition IWCM into CLOSING.
+ */
+ siw_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
+ siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ }
+ /*
+ * for other states there is no connection
+ * known to the IWCM.
+ */
+ } else {
+ if (cep->state == SIW_EPSTATE_RECVD_MPAREQ) {
+ /*
+ * Wait for the ulp/CM to call accept/reject
+ */
+ siw_dbg_cep(cep,
+ "mpa req recvd, wait for ULP\n");
+ } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) {
+ /*
+ * Socket close before MPA request received.
+ */
+ siw_dbg_cep(cep, "no mpareq: drop listener\n");
+ siw_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ }
+ }
+ release_cep = 1;
+ break;
+
+ case SIW_CM_WORK_MPATIMEOUT:
+ cep->mpa_timer = NULL;
+
+ if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA request timed out:
+ * Hide any partially received private data and signal
+ * timeout
+ */
+ cep->mpa.hdr.params.pd_len = 0;
+
+ if (cep->cm_id)
+ siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ETIMEDOUT);
+ release_cep = 1;
+
+ } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) {
+ /*
+ * No MPA request received after peer TCP stream setup.
+ */
+ if (cep->listen_cep) {
+ siw_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ }
+ release_cep = 1;
+ }
+ break;
+
+ default:
+ WARN(1, "Undefined CM work type: %d\n", work->type);
+ }
+ if (release_cep) {
+ siw_dbg_cep(cep,
+ "release: timer=%s, QP[%u], id 0x%p\n",
+ cep->mpa_timer ? "y" : "n",
+ cep->qp ? qp_id(cep->qp) : -1, cep->cm_id);
+
+ siw_cancel_mpatimer(cep);
+
+ cep->state = SIW_EPSTATE_CLOSED;
+
+ if (cep->qp) {
+ struct siw_qp *qp = cep->qp;
+ /*
+ * Serialize a potential race with application
+ * closing the QP and calling siw_qp_cm_drop()
+ */
+ siw_qp_get(qp);
+ siw_cep_set_free(cep);
+
+ siw_qp_llp_close(qp);
+ siw_qp_put(qp);
+
+ siw_cep_set_inuse(cep);
+ cep->qp = NULL;
+ siw_qp_put(qp);
+ }
+ if (cep->sock) {
+ siw_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ siw_cep_put(cep);
+ }
+ }
+ siw_cep_set_free(cep);
+ siw_put_work(work);
+ siw_cep_put(cep);
+}
+
+static struct workqueue_struct *siw_cm_wq;
+
+int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type)
+{
+ struct siw_cm_work *work = siw_get_work(cep);
+ unsigned long delay = 0;
+
+ if (!work) {
+ siw_dbg_cep(cep, "failed with no work available\n");
+ return -ENOMEM;
+ }
+ work->type = type;
+ work->cep = cep;
+
+ siw_cep_get(cep);
+
+ INIT_DELAYED_WORK(&work->work, siw_cm_work_handler);
+
+ if (type == SIW_CM_WORK_MPATIMEOUT) {
+ cep->mpa_timer = work;
+
+ if (cep->state == SIW_EPSTATE_AWAIT_MPAREP)
+ delay = MPAREQ_TIMEOUT;
+ else
+ delay = MPAREP_TIMEOUT;
+ }
+ siw_dbg_cep(cep, "[QP %u]: work type: %d, work 0x%p, timeout %lu\n",
+ cep->qp ? qp_id(cep->qp) : -1, type, work, delay);
+
+ queue_delayed_work(siw_cm_wq, &work->work, delay);
+
+ return 0;
+}
+
+static void siw_cm_llp_data_ready(struct sock *sk)
+{
+ struct siw_cep *cep;
+
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (!cep) {
+ WARN_ON(1);
+ goto out;
+ }
+ siw_dbg_cep(cep, "state: %d\n", cep->state);
+
+ switch (cep->state) {
+ case SIW_EPSTATE_RDMA_MODE:
+ /* fall through */
+ case SIW_EPSTATE_LISTENING:
+ break;
+
+ case SIW_EPSTATE_AWAIT_MPAREQ:
+ /* fall through */
+ case SIW_EPSTATE_AWAIT_MPAREP:
+ siw_cm_queue_work(cep, SIW_CM_WORK_READ_MPAHDR);
+ break;
+
+ default:
+ siw_dbg_cep(cep, "unexpected data, state %d\n", cep->state);
+ break;
+ }
+out:
+ read_unlock(&sk->sk_callback_lock);
+}
+
+static void siw_cm_llp_write_space(struct sock *sk)
+{
+ struct siw_cep *cep = sk_to_cep(sk);
+
+ if (cep)
+ siw_dbg_cep(cep, "state: %d\n", cep->state);
+}
+
+static void siw_cm_llp_error_report(struct sock *sk)
+{
+ struct siw_cep *cep = sk_to_cep(sk);
+
+ if (cep) {
+ siw_dbg_cep(cep, "error %d, socket state: %d, cep state: %d\n",
+ sk->sk_err, sk->sk_state, cep->state);
+ cep->sk_error_report(sk);
+ }
+}
+
+static void siw_cm_llp_state_change(struct sock *sk)
+{
+ struct siw_cep *cep;
+ void (*orig_state_change)(struct sock *s);
+
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (!cep) {
+ /* endpoint already disassociated */
+ read_unlock(&sk->sk_callback_lock);
+ return;
+ }
+ orig_state_change = cep->sk_state_change;
+
+ siw_dbg_cep(cep, "state: %d\n", cep->state);
+
+ switch (sk->sk_state) {
+ case TCP_ESTABLISHED:
+ /*
+ * handle accepting socket as special case where only
+ * new connection is possible
+ */
+ siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT);
+ break;
+
+ case TCP_CLOSE:
+ case TCP_CLOSE_WAIT:
+ if (cep->qp)
+ cep->qp->tx_ctx.tx_suspend = 1;
+ siw_cm_queue_work(cep, SIW_CM_WORK_PEER_CLOSE);
+ break;
+
+ default:
+ siw_dbg_cep(cep, "unexpected socket state %d\n", sk->sk_state);
+ }
+ read_unlock(&sk->sk_callback_lock);
+ orig_state_change(sk);
+}
+
+static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
+ struct sockaddr *raddr)
+{
+ int rv, flags = 0, s_val = 1;
+ size_t size = laddr->sa_family == AF_INET ?
+ sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
+
+ /*
+ * Make address available again asap.
+ */
+ rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
+ sizeof(s_val));
+ if (rv < 0)
+ return rv;
+
+ rv = s->ops->bind(s, laddr, size);
+ if (rv < 0)
+ return rv;
+
+ rv = s->ops->connect(s, raddr, size, flags);
+
+ return rv < 0 ? rv : 0;
+}
+
+int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+ struct siw_device *sdev = to_siw_dev(id->device);
+ struct siw_qp *qp;
+ struct siw_cep *cep = NULL;
+ struct socket *s = NULL;
+ struct sockaddr *laddr = (struct sockaddr *)&id->local_addr,
+ *raddr = (struct sockaddr *)&id->remote_addr;
+ bool p2p_mode = peer_to_peer, v4 = true;
+ u16 pd_len = params->private_data_len;
+ int version = mpa_version, rv;
+
+ if (pd_len > MPA_MAX_PRIVDATA)
+ return -EINVAL;
+
+ if (params->ird > sdev->attrs.max_ird ||
+ params->ord > sdev->attrs.max_ord)
+ return -ENOMEM;
+
+ if (laddr->sa_family == AF_INET6)
+ v4 = false;
+ else if (laddr->sa_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ /*
+ * Respect any iwarp port mapping: Use mapped remote address
+ * if valid. Local address must not be mapped, since siw
+ * uses kernel TCP stack.
+ */
+ if ((v4 && to_sockaddr_in(id->remote_addr).sin_port != 0) ||
+ to_sockaddr_in6(id->remote_addr).sin6_port != 0)
+ raddr = (struct sockaddr *)&id->m_remote_addr;
+
+ qp = siw_qp_id2obj(sdev, params->qpn);
+ if (!qp) {
+ WARN(1, "[QP %u] does not exist\n", params->qpn);
+ rv = -EINVAL;
+ goto error;
+ }
+ if (v4)
+ siw_dbg_qp(qp,
+ "id 0x%p, pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n",
+ id, pd_len,
+ &((struct sockaddr_in *)(laddr))->sin_addr,
+ ntohs(((struct sockaddr_in *)(laddr))->sin_port),
+ &((struct sockaddr_in *)(raddr))->sin_addr,
+ ntohs(((struct sockaddr_in *)(raddr))->sin_port));
+ else
+ siw_dbg_qp(qp,
+ "id 0x%p, pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n",
+ id, pd_len,
+ &((struct sockaddr_in6 *)(laddr))->sin6_addr,
+ ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port),
+ &((struct sockaddr_in6 *)(raddr))->sin6_addr,
+ ntohs(((struct sockaddr_in6 *)(raddr))->sin6_port));
+
+ rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s);
+ if (rv < 0)
+ goto error;
+
+ /*
+ * NOTE: For simplification, connect() is called in blocking
+ * mode. Might be reconsidered for async connection setup at
+ * TCP level.
+ */
+ rv = kernel_bindconnect(s, laddr, raddr);
+ if (rv != 0) {
+ siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
+ goto error;
+ }
+ if (siw_tcp_nagle == false) {
+ int val = 1;
+
+ rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val,
+ sizeof(val));
+ if (rv) {
+ siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv);
+ goto error;
+ }
+ }
+ cep = siw_cep_alloc(sdev);
+ if (!cep) {
+ rv = -ENOMEM;
+ goto error;
+ }
+ siw_cep_set_inuse(cep);
+
+ /* Associate QP with CEP */
+ siw_cep_get(cep);
+ qp->cep = cep;
+
+ /* siw_qp_get(qp) already done by QP lookup */
+ cep->qp = qp;
+
+ id->add_ref(id);
+ cep->cm_id = id;
+
+ /*
+ * 4: Allocate a sufficient number of work elements
+ * to allow concurrent handling of local + peer close
+ * events, MPA header processing + MPA timeout.
+ */
+ rv = siw_cm_alloc_work(cep, 4);
+ if (rv != 0) {
+ rv = -ENOMEM;
+ goto error;
+ }
+ cep->ird = params->ird;
+ cep->ord = params->ord;
+
+ if (p2p_mode && cep->ord == 0)
+ cep->ord = 1;
+
+ cep->state = SIW_EPSTATE_CONNECTING;
+
+ /*
+ * Associate CEP with socket
+ */
+ siw_cep_socket_assoc(cep, s);
+
+ cep->state = SIW_EPSTATE_AWAIT_MPAREP;
+
+ /*
+ * Set MPA Request bits: CRC if required, no MPA Markers,
+ * MPA Rev. according to module parameter 'mpa_version', Key 'Request'.
+ */
+ cep->mpa.hdr.params.bits = 0;
+ if (version > MPA_REVISION_2) {
+ pr_warn("Setting MPA version to %u\n", MPA_REVISION_2);
+ version = MPA_REVISION_2;
+ /* Adjust also module parameter */
+ mpa_version = MPA_REVISION_2;
+ }
+ __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, version);
+
+ if (try_gso)
+ cep->mpa.hdr.params.bits |= MPA_RR_FLAG_GSO_EXP;
+
+ if (mpa_crc_required)
+ cep->mpa.hdr.params.bits |= MPA_RR_FLAG_CRC;
+
+ /*
+ * If MPA version == 2:
+ * o Include ORD and IRD.
+ * o Indicate peer-to-peer mode, if required by module
+ * parameter 'peer_to_peer'.
+ */
+ if (version == MPA_REVISION_2) {
+ cep->enhanced_rdma_conn_est = true;
+ cep->mpa.hdr.params.bits |= MPA_RR_FLAG_ENHANCED;
+
+ cep->mpa.v2_ctrl.ird = htons(cep->ird);
+ cep->mpa.v2_ctrl.ord = htons(cep->ord);
+
+ if (p2p_mode) {
+ cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER;
+ cep->mpa.v2_ctrl.ord |= rtr_type;
+ }
+ /* Remember own P2P mode requested */
+ cep->mpa.v2_ctrl_req.ird = cep->mpa.v2_ctrl.ird;
+ cep->mpa.v2_ctrl_req.ord = cep->mpa.v2_ctrl.ord;
+ }
+ memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, 16);
+
+ rv = siw_send_mpareqrep(cep, params->private_data, pd_len);
+ /*
+ * Reset private data.
+ */
+ cep->mpa.hdr.params.pd_len = 0;
+
+ if (rv >= 0) {
+ rv = siw_cm_queue_work(cep, SIW_CM_WORK_MPATIMEOUT);
+ if (!rv) {
+ siw_dbg_cep(cep, "id 0x%p, [QP %u]: exit\n", id,
+ qp_id(qp));
+ siw_cep_set_free(cep);
+ return 0;
+ }
+ }
+error:
+ siw_dbg_qp(qp, "failed: %d\n", rv);
+
+ if (cep) {
+ siw_socket_disassoc(s);
+ sock_release(s);
+ cep->sock = NULL;
+
+ cep->qp = NULL;
+
+ cep->cm_id = NULL;
+ id->rem_ref(id);
+ siw_cep_put(cep);
+
+ qp->cep = NULL;
+ siw_cep_put(cep);
+
+ cep->state = SIW_EPSTATE_CLOSED;
+
+ siw_cep_set_free(cep);
+
+ siw_cep_put(cep);
+
+ } else if (s) {
+ sock_release(s);
+ }
+ siw_qp_put(qp);
+
+ return rv;
+}
+
+/*
+ * siw_accept - Let SoftiWARP accept an RDMA connection request
+ *
+ * @id: New connection management id to be used for accepted
+ * connection request
+ * @params: Connection parameters provided by ULP for accepting connection
+ *
+ * Transition QP to RTS state, associate new CM id @id with accepted CEP
+ * and get prepared for TCP input by installing socket callbacks.
+ * Then send MPA Reply and generate the "connection established" event.
+ * Socket callbacks must be installed before sending MPA Reply, because
+ * the latter may cause a first RDMA message to arrive from the RDMA Initiator
+ * side very quickly, at which time the socket callbacks must be ready.
+ */
+int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+ struct siw_device *sdev = to_siw_dev(id->device);
+ struct siw_cep *cep = (struct siw_cep *)id->provider_data;
+ struct siw_qp *qp;
+ struct siw_qp_attrs qp_attrs;
+ int rv, max_priv_data = MPA_MAX_PRIVDATA;
+ bool wait_for_peer_rts = false;
+
+ siw_cep_set_inuse(cep);
+ siw_cep_put(cep);
+
+ /* Free lingering inbound private data */
+ if (cep->mpa.hdr.params.pd_len) {
+ cep->mpa.hdr.params.pd_len = 0;
+ kfree(cep->mpa.pdata);
+ cep->mpa.pdata = NULL;
+ }
+ siw_cancel_mpatimer(cep);
+
+ if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) {
+ siw_dbg_cep(cep, "id 0x%p: out of state\n", id);
+
+ siw_cep_set_free(cep);
+ siw_cep_put(cep);
+
+ return -ECONNRESET;
+ }
+ qp = siw_qp_id2obj(sdev, params->qpn);
+ if (!qp) {
+ WARN(1, "[QP %d] does not exist\n", params->qpn);
+ siw_cep_set_free(cep);
+ siw_cep_put(cep);
+
+ return -EINVAL;
+ }
+ down_write(&qp->state_lock);
+ if (qp->attrs.state > SIW_QP_STATE_RTR) {
+ rv = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+ siw_dbg_cep(cep, "id 0x%p\n", id);
+
+ if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) {
+ siw_dbg_cep(cep, "peer allows GSO on TX\n");
+ qp->tx_ctx.gso_seg_limit = 0;
+ }
+ if (params->ord > sdev->attrs.max_ord ||
+ params->ird > sdev->attrs.max_ird) {
+ siw_dbg_cep(
+ cep,
+ "id 0x%p, [QP %u]: ord %d (max %d), ird %d (max %d)\n",
+ id, qp_id(qp), params->ord, sdev->attrs.max_ord,
+ params->ird, sdev->attrs.max_ird);
+ rv = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+ if (cep->enhanced_rdma_conn_est)
+ max_priv_data -= sizeof(struct mpa_v2_data);
+
+ if (params->private_data_len > max_priv_data) {
+ siw_dbg_cep(
+ cep,
+ "id 0x%p, [QP %u]: private data length: %d (max %d)\n",
+ id, qp_id(qp), params->private_data_len, max_priv_data);
+ rv = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+ if (cep->enhanced_rdma_conn_est) {
+ if (params->ord > cep->ord) {
+ if (relaxed_ird_negotiation) {
+ params->ord = cep->ord;
+ } else {
+ cep->ird = params->ird;
+ cep->ord = params->ord;
+ rv = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+ }
+ if (params->ird < cep->ird) {
+ if (relaxed_ird_negotiation &&
+ cep->ird <= sdev->attrs.max_ird)
+ params->ird = cep->ird;
+ else {
+ rv = -ENOMEM;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+ }
+ if (cep->mpa.v2_ctrl.ord &
+ (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR))
+ wait_for_peer_rts = true;
+ /*
+ * Signal back negotiated IRD and ORD values
+ */
+ cep->mpa.v2_ctrl.ord =
+ htons(params->ord & MPA_IRD_ORD_MASK) |
+ (cep->mpa.v2_ctrl.ord & ~MPA_V2_MASK_IRD_ORD);
+ cep->mpa.v2_ctrl.ird =
+ htons(params->ird & MPA_IRD_ORD_MASK) |
+ (cep->mpa.v2_ctrl.ird & ~MPA_V2_MASK_IRD_ORD);
+ }
+ cep->ird = params->ird;
+ cep->ord = params->ord;
+
+ cep->cm_id = id;
+ id->add_ref(id);
+
+ memset(&qp_attrs, 0, sizeof(qp_attrs));
+ qp_attrs.orq_size = cep->ord;
+ qp_attrs.irq_size = cep->ird;
+ qp_attrs.sk = cep->sock;
+ if (cep->mpa.hdr.params.bits & MPA_RR_FLAG_CRC)
+ qp_attrs.flags = SIW_MPA_CRC;
+ qp_attrs.state = SIW_QP_STATE_RTS;
+
+ siw_dbg_cep(cep, "id 0x%p, [QP%u]: moving to rts\n", id, qp_id(qp));
+
+ /* Associate QP with CEP */
+ siw_cep_get(cep);
+ qp->cep = cep;
+
+ /* siw_qp_get(qp) already done by QP lookup */
+ cep->qp = qp;
+
+ cep->state = SIW_EPSTATE_RDMA_MODE;
+
+ /* Move socket RX/TX under QP control */
+ rv = siw_qp_modify(qp, &qp_attrs,
+ SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE |
+ SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD |
+ SIW_QP_ATTR_MPA);
+ up_write(&qp->state_lock);
+
+ if (rv)
+ goto error;
+
+ siw_dbg_cep(cep, "id 0x%p, [QP %u]: send mpa reply, %d byte pdata\n",
+ id, qp_id(qp), params->private_data_len);
+
+ rv = siw_send_mpareqrep(cep, params->private_data,
+ params->private_data_len);
+ if (rv != 0)
+ goto error;
+
+ if (wait_for_peer_rts) {
+ siw_sk_assign_rtr_upcalls(cep);
+ } else {
+ siw_qp_socket_assoc(cep, qp);
+ rv = siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
+ if (rv)
+ goto error;
+ }
+ siw_cep_set_free(cep);
+
+ return 0;
+error:
+ siw_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+
+ cep->state = SIW_EPSTATE_CLOSED;
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(id);
+ cep->cm_id = NULL;
+ }
+ if (qp->cep) {
+ siw_cep_put(cep);
+ qp->cep = NULL;
+ }
+ cep->qp = NULL;
+ siw_qp_put(qp);
+
+ siw_cep_set_free(cep);
+ siw_cep_put(cep);
+
+ return rv;
+}
+
+/*
+ * siw_reject()
+ *
+ * Local connection reject case. Send private data back to peer,
+ * close connection and dereference connection id.
+ */
+int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len)
+{
+ struct siw_cep *cep = (struct siw_cep *)id->provider_data;
+
+ siw_cep_set_inuse(cep);
+ siw_cep_put(cep);
+
+ siw_cancel_mpatimer(cep);
+
+ if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) {
+ siw_dbg_cep(cep, "id 0x%p: out of state\n", id);
+
+ siw_cep_set_free(cep);
+ siw_cep_put(cep); /* put last reference */
+
+ return -ECONNRESET;
+ }
+ siw_dbg_cep(cep, "id 0x%p, cep->state %d, pd_len %d\n", id, cep->state,
+ pd_len);
+
+ if (__mpa_rr_revision(cep->mpa.hdr.params.bits) >= MPA_REVISION_1) {
+ cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
+ siw_send_mpareqrep(cep, pdata, pd_len);
+ }
+ siw_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+
+ cep->state = SIW_EPSTATE_CLOSED;
+
+ siw_cep_set_free(cep);
+ siw_cep_put(cep);
+
+ return 0;
+}
+
+static int siw_listen_address(struct iw_cm_id *id, int backlog,
+ struct sockaddr *laddr, int addr_family)
+{
+ struct socket *s;
+ struct siw_cep *cep = NULL;
+ struct siw_device *sdev = to_siw_dev(id->device);
+ int rv = 0, s_val;
+
+ rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
+ if (rv < 0)
+ return rv;
+
+ /*
+ * Allow binding local port when still in TIME_WAIT from last close.
+ */
+ s_val = 1;
+ rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
+ sizeof(s_val));
+ if (rv) {
+ siw_dbg(id->device, "id 0x%p: setsockopt error: %d\n", id, rv);
+ goto error;
+ }
+ rv = s->ops->bind(s, laddr, addr_family == AF_INET ?
+ sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6));
+ if (rv) {
+ siw_dbg(id->device, "id 0x%p: socket bind error: %d\n", id, rv);
+ goto error;
+ }
+ cep = siw_cep_alloc(sdev);
+ if (!cep) {
+ rv = -ENOMEM;
+ goto error;
+ }
+ siw_cep_socket_assoc(cep, s);
+
+ rv = siw_cm_alloc_work(cep, backlog);
+ if (rv) {
+ siw_dbg(id->device,
+ "id 0x%p: alloc_work error %d, backlog %d\n", id,
+ rv, backlog);
+ goto error;
+ }
+ rv = s->ops->listen(s, backlog);
+ if (rv) {
+ siw_dbg(id->device, "id 0x%p: listen error %d\n", id, rv);
+ goto error;
+ }
+ cep->cm_id = id;
+ id->add_ref(id);
+
+ /*
+ * In case of a wildcard rdma_listen on a multi-homed device,
+ * a listener's IWCM id is associated with more than one listening CEP.
+ *
+ * We currently use id->provider_data in three different ways:
+ *
+ * o For a listener's IWCM id, id->provider_data points to
+ * the list_head of the list of listening CEPs.
+ * Uses: siw_create_listen(), siw_destroy_listen()
+ *
+ * o For each accepted passive-side IWCM id, id->provider_data
+ * points to the CEP itself. This is a consequence of
+ * - siw_cm_upcall() setting event.provider_data = cep and
+ * - the IWCM's cm_conn_req_handler() setting provider_data of the
+ * new passive-side IWCM id equal to event.provider_data
+ * Uses: siw_accept(), siw_reject()
+ *
+ * o For an active-side IWCM id, id->provider_data is not used at all.
+ *
+ */
+ if (!id->provider_data) {
+ id->provider_data =
+ kmalloc(sizeof(struct list_head), GFP_KERNEL);
+ if (!id->provider_data) {
+ rv = -ENOMEM;
+ goto error;
+ }
+ INIT_LIST_HEAD((struct list_head *)id->provider_data);
+ }
+ list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
+ cep->state = SIW_EPSTATE_LISTENING;
+
+ if (addr_family == AF_INET)
+ siw_dbg(id->device, "Listen at laddr %pI4 %u\n",
+ &(((struct sockaddr_in *)laddr)->sin_addr),
+ ((struct sockaddr_in *)laddr)->sin_port);
+ else
+ siw_dbg(id->device, "Listen at laddr %pI6 %u\n",
+ &(((struct sockaddr_in6 *)laddr)->sin6_addr),
+ ((struct sockaddr_in6 *)laddr)->sin6_port);
+
+ return 0;
+
+error:
+ siw_dbg(id->device, "failed: %d\n", rv);
+
+ if (cep) {
+ siw_cep_set_inuse(cep);
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ }
+ cep->sock = NULL;
+ siw_socket_disassoc(s);
+ cep->state = SIW_EPSTATE_CLOSED;
+
+ siw_cep_set_free(cep);
+ siw_cep_put(cep);
+ }
+ sock_release(s);
+
+ return rv;
+}
+
+static void siw_drop_listeners(struct iw_cm_id *id)
+{
+ struct list_head *p, *tmp;
+
+ /*
+ * In case of a wildcard rdma_listen on a multi-homed device,
+ * a listener's IWCM id is associated with more than one listening CEP.
+ */
+ list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
+ struct siw_cep *cep = list_entry(p, struct siw_cep, listenq);
+
+ list_del(p);
+
+ siw_dbg_cep(cep, "id 0x%p: drop cep, state %d\n", id,
+ cep->state);
+
+ siw_cep_set_inuse(cep);
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ }
+ if (cep->sock) {
+ siw_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+ cep->state = SIW_EPSTATE_CLOSED;
+ siw_cep_set_free(cep);
+ siw_cep_put(cep);
+ }
+}
+
+/*
+ * siw_create_listen - Create resources for a listener's IWCM ID @id
+ *
+ * Listens on the socket addresses id->local_addr and id->remote_addr.
+ *
+ * If the listener's @id provides a specific local IP address, at most one
+ * listening socket is created and associated with @id.
+ *
+ * If the listener's @id provides the wildcard (zero) local IP address,
+ * a separate listen is performed for each local IP address of the device
+ * by creating a listening socket and binding to that local IP address.
+ *
+ */
+int siw_create_listen(struct iw_cm_id *id, int backlog)
+{
+ struct net_device *dev = to_siw_dev(id->device)->netdev;
+ int rv = 0, listeners = 0;
+
+ siw_dbg(id->device, "id 0x%p: backlog %d\n", id, backlog);
+
+ /*
+ * For each attached address of the interface, create a
+ * listening socket, if id->local_addr is the wildcard
+ * IP address or matches the IP address.
+ */
+ if (id->local_addr.ss_family == AF_INET) {
+ struct in_device *in_dev = in_dev_get(dev);
+ struct sockaddr_in s_laddr, *s_raddr;
+ const struct in_ifaddr *ifa;
+
+ memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr));
+ s_raddr = (struct sockaddr_in *)&id->remote_addr;
+
+ siw_dbg(id->device,
+ "id 0x%p: laddr %pI4:%d, raddr %pI4:%d\n",
+ id, &s_laddr.sin_addr, ntohs(s_laddr.sin_port),
+ &s_raddr->sin_addr, ntohs(s_raddr->sin_port));
+
+ rtnl_lock();
+ in_dev_for_each_ifa_rtnl(ifa, in_dev) {
+ if (ipv4_is_zeronet(s_laddr.sin_addr.s_addr) ||
+ s_laddr.sin_addr.s_addr == ifa->ifa_address) {
+ s_laddr.sin_addr.s_addr = ifa->ifa_address;
+
+ rv = siw_listen_address(id, backlog,
+ (struct sockaddr *)&s_laddr,
+ AF_INET);
+ if (!rv)
+ listeners++;
+ }
+ }
+ rtnl_unlock();
+ in_dev_put(in_dev);
+ } else if (id->local_addr.ss_family == AF_INET6) {
+ struct inet6_dev *in6_dev = in6_dev_get(dev);
+ struct inet6_ifaddr *ifp;
+ struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr),
+ *s_raddr = &to_sockaddr_in6(id->remote_addr);
+
+ siw_dbg(id->device,
+ "id 0x%p: laddr %pI6:%d, raddr %pI6:%d\n",
+ id, &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port),
+ &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port));
+
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+ struct sockaddr_in6 bind_addr;
+
+ if (ipv6_addr_any(&s_laddr->sin6_addr) ||
+ ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) {
+ bind_addr.sin6_family = AF_INET6;
+ bind_addr.sin6_port = s_laddr->sin6_port;
+ bind_addr.sin6_flowinfo = 0;
+ bind_addr.sin6_addr = ifp->addr;
+ bind_addr.sin6_scope_id = dev->ifindex;
+
+ rv = siw_listen_address(id, backlog,
+ (struct sockaddr *)&bind_addr,
+ AF_INET6);
+ if (!rv)
+ listeners++;
+ }
+ }
+ read_unlock_bh(&in6_dev->lock);
+
+ in6_dev_put(in6_dev);
+ } else {
+ return -EAFNOSUPPORT;
+ }
+ if (listeners)
+ rv = 0;
+ else if (!rv)
+ rv = -EINVAL;
+
+ siw_dbg(id->device, "id 0x%p: %s\n", id, rv ? "FAIL" : "OK");
+
+ return rv;
+}
+
+int siw_destroy_listen(struct iw_cm_id *id)
+{
+ siw_dbg(id->device, "id 0x%p\n", id);
+
+ if (!id->provider_data) {
+ siw_dbg(id->device, "id 0x%p: no cep(s)\n", id);
+ return 0;
+ }
+ siw_drop_listeners(id);
+ kfree(id->provider_data);
+ id->provider_data = NULL;
+
+ return 0;
+}
+
+int siw_cm_init(void)
+{
+ /*
+ * create_single_workqueue for strict ordering
+ */
+ siw_cm_wq = create_singlethread_workqueue("siw_cm_wq");
+ if (!siw_cm_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void siw_cm_exit(void)
+{
+ if (siw_cm_wq) {
+ flush_workqueue(siw_cm_wq);
+ destroy_workqueue(siw_cm_wq);
+ }
+}
diff --git a/drivers/infiniband/sw/siw/siw_cm.h b/drivers/infiniband/sw/siw/siw_cm.h
new file mode 100644
index 000000000000..8c59cb3e2868
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_cm.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Greg Joyce <greg@opengridcomputing.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+/* Copyright (c) 2017, Open Grid Computing, Inc. */
+
+#ifndef _SIW_CM_H
+#define _SIW_CM_H
+
+#include <net/sock.h>
+#include <linux/tcp.h>
+
+#include <rdma/iw_cm.h>
+
+enum siw_cep_state {
+ SIW_EPSTATE_IDLE = 1,
+ SIW_EPSTATE_LISTENING,
+ SIW_EPSTATE_CONNECTING,
+ SIW_EPSTATE_AWAIT_MPAREQ,
+ SIW_EPSTATE_RECVD_MPAREQ,
+ SIW_EPSTATE_AWAIT_MPAREP,
+ SIW_EPSTATE_RDMA_MODE,
+ SIW_EPSTATE_CLOSED
+};
+
+struct siw_mpa_info {
+ struct mpa_rr hdr; /* peer mpa hdr in host byte order */
+ struct mpa_v2_data v2_ctrl;
+ struct mpa_v2_data v2_ctrl_req;
+ char *pdata;
+ int bytes_rcvd;
+};
+
+struct siw_device;
+
+struct siw_cep {
+ struct iw_cm_id *cm_id;
+ struct siw_device *sdev;
+ struct list_head devq;
+ spinlock_t lock;
+ struct kref ref;
+ int in_use;
+ wait_queue_head_t waitq;
+ enum siw_cep_state state;
+
+ struct list_head listenq;
+ struct siw_cep *listen_cep;
+
+ struct siw_qp *qp;
+ struct socket *sock;
+
+ struct siw_cm_work *mpa_timer;
+ struct list_head work_freelist;
+
+ struct siw_mpa_info mpa;
+ int ord;
+ int ird;
+ bool enhanced_rdma_conn_est;
+
+ /* Saved upcalls of socket */
+ void (*sk_state_change)(struct sock *sk);
+ void (*sk_data_ready)(struct sock *sk);
+ void (*sk_write_space)(struct sock *sk);
+ void (*sk_error_report)(struct sock *sk);
+};
+
+/*
+ * Connection initiator waits 10 seconds to receive an
+ * MPA reply after sending out MPA request. Reponder waits for
+ * 5 seconds for MPA request to arrive if new TCP connection
+ * was set up.
+ */
+#define MPAREQ_TIMEOUT (HZ * 10)
+#define MPAREP_TIMEOUT (HZ * 5)
+
+enum siw_work_type {
+ SIW_CM_WORK_ACCEPT = 1,
+ SIW_CM_WORK_READ_MPAHDR,
+ SIW_CM_WORK_CLOSE_LLP, /* close socket */
+ SIW_CM_WORK_PEER_CLOSE, /* socket indicated peer close */
+ SIW_CM_WORK_MPATIMEOUT
+};
+
+struct siw_cm_work {
+ struct delayed_work work;
+ struct list_head list;
+ enum siw_work_type type;
+ struct siw_cep *cep;
+};
+
+#define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a)))
+#define to_sockaddr_in6(a) (*(struct sockaddr_in6 *)(&(a)))
+
+static inline int getname_peer(struct socket *s, struct sockaddr_storage *a)
+{
+ return s->ops->getname(s, (struct sockaddr *)a, 1);
+}
+
+static inline int getname_local(struct socket *s, struct sockaddr_storage *a)
+{
+ return s->ops->getname(s, (struct sockaddr *)a, 0);
+}
+
+static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
+ int flags)
+{
+ struct kvec iov = { buf, size };
+ struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
+
+ return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
+}
+
+int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *parm);
+int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *param);
+int siw_reject(struct iw_cm_id *id, const void *data, u8 len);
+int siw_create_listen(struct iw_cm_id *id, int backlog);
+int siw_destroy_listen(struct iw_cm_id *id);
+
+void siw_cep_get(struct siw_cep *cep);
+void siw_cep_put(struct siw_cep *cep);
+int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type);
+
+int siw_cm_init(void);
+void siw_cm_exit(void);
+
+/*
+ * TCP socket interface
+ */
+#define sk_to_qp(sk) (((struct siw_cep *)((sk)->sk_user_data))->qp)
+#define sk_to_cep(sk) ((struct siw_cep *)((sk)->sk_user_data))
+
+#endif
diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
new file mode 100644
index 000000000000..e381ae9b7d62
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_cq.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "siw.h"
+
+static int map_wc_opcode[SIW_NUM_OPCODES] = {
+ [SIW_OP_WRITE] = IB_WC_RDMA_WRITE,
+ [SIW_OP_SEND] = IB_WC_SEND,
+ [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND,
+ [SIW_OP_READ] = IB_WC_RDMA_READ,
+ [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ,
+ [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP,
+ [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV,
+ [SIW_OP_REG_MR] = IB_WC_REG_MR,
+ [SIW_OP_RECEIVE] = IB_WC_RECV,
+ [SIW_OP_READ_RESPONSE] = -1 /* not used */
+};
+
+static struct {
+ enum siw_wc_status siw;
+ enum ib_wc_status ib;
+} map_cqe_status[SIW_NUM_WC_STATUS] = {
+ { SIW_WC_SUCCESS, IB_WC_SUCCESS },
+ { SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR },
+ { SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR },
+ { SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR },
+ { SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR },
+ { SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR },
+ { SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR },
+ { SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR },
+ { SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR },
+ { SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR }
+};
+
+/*
+ * Reap one CQE from the CQ. Only used by kernel clients
+ * during CQ normal operation. Might be called during CQ
+ * flush for user mapped CQE array as well.
+ */
+int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
+{
+ struct siw_cqe *cqe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ cqe = &cq->queue[cq->cq_get % cq->num_cqe];
+ if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
+ memset(wc, 0, sizeof(*wc));
+ wc->wr_id = cqe->id;
+ wc->status = map_cqe_status[cqe->status].ib;
+ wc->opcode = map_wc_opcode[cqe->opcode];
+ wc->byte_len = cqe->bytes;
+
+ /*
+ * During CQ flush, also user land CQE's may get
+ * reaped here, which do not hold a QP reference
+ * and do not qualify for memory extension verbs.
+ */
+ if (likely(cq->kernel_verbs)) {
+ if (cqe->flags & SIW_WQE_REM_INVAL) {
+ wc->ex.invalidate_rkey = cqe->inval_stag;
+ wc->wc_flags = IB_WC_WITH_INVALIDATE;
+ }
+ wc->qp = cqe->base_qp;
+ siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%p\n",
+ cq->cq_get % cq->num_cqe, cqe->opcode,
+ cqe->flags, (void *)cqe->id);
+ }
+ WRITE_ONCE(cqe->flags, 0);
+ cq->cq_get++;
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ return 1;
+ }
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ return 0;
+}
+
+/*
+ * siw_cq_flush()
+ *
+ * Flush all CQ elements.
+ */
+void siw_cq_flush(struct siw_cq *cq)
+{
+ struct ib_wc wc;
+
+ while (siw_reap_cqe(cq, &wc))
+ ;
+}
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
new file mode 100644
index 000000000000..f55c4e80aea4
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -0,0 +1,685 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <net/net_namespace.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_arp.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/rdma_netlink.h>
+#include <linux/kthread.h>
+
+#include "siw.h"
+#include "siw_verbs.h"
+
+MODULE_AUTHOR("Bernard Metzler");
+MODULE_DESCRIPTION("Software iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+
+/* transmit from user buffer, if possible */
+const bool zcopy_tx = true;
+
+/* Restrict usage of GSO, if hardware peer iwarp is unable to process
+ * large packets. try_gso = true lets siw try to use local GSO,
+ * if peer agrees. Not using GSO severly limits siw maximum tx bandwidth.
+ */
+const bool try_gso;
+
+/* Attach siw also with loopback devices */
+const bool loopback_enabled = true;
+
+/* We try to negotiate CRC on, if true */
+const bool mpa_crc_required;
+
+/* MPA CRC on/off enforced */
+const bool mpa_crc_strict;
+
+/* Control TCP_NODELAY socket option */
+const bool siw_tcp_nagle;
+
+/* Select MPA version to be used during connection setup */
+u_char mpa_version = MPA_REVISION_2;
+
+/* Selects MPA P2P mode (additional handshake during connection
+ * setup, if true.
+ */
+const bool peer_to_peer;
+
+struct task_struct *siw_tx_thread[NR_CPUS];
+struct crypto_shash *siw_crypto_shash;
+
+static int siw_device_register(struct siw_device *sdev, const char *name)
+{
+ struct ib_device *base_dev = &sdev->base_dev;
+ static int dev_id = 1;
+ int rv;
+
+ rv = ib_register_device(base_dev, name);
+ if (rv) {
+ pr_warn("siw: device registration error %d\n", rv);
+ return rv;
+ }
+ sdev->vendor_part_id = dev_id++;
+
+ siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr);
+
+ return 0;
+}
+
+static void siw_device_cleanup(struct ib_device *base_dev)
+{
+ struct siw_device *sdev = to_siw_dev(base_dev);
+
+ xa_destroy(&sdev->qp_xa);
+ xa_destroy(&sdev->mem_xa);
+}
+
+static int siw_create_tx_threads(void)
+{
+ int cpu, assigned = 0;
+
+ for_each_online_cpu(cpu) {
+ /* Skip HT cores */
+ if (cpu % cpumask_weight(topology_sibling_cpumask(cpu)))
+ continue;
+
+ siw_tx_thread[cpu] =
+ kthread_create(siw_run_sq, (unsigned long *)(long)cpu,
+ "siw_tx/%d", cpu);
+ if (IS_ERR(siw_tx_thread[cpu])) {
+ siw_tx_thread[cpu] = NULL;
+ continue;
+ }
+ kthread_bind(siw_tx_thread[cpu], cpu);
+
+ wake_up_process(siw_tx_thread[cpu]);
+ assigned++;
+ }
+ return assigned;
+}
+
+static int siw_dev_qualified(struct net_device *netdev)
+{
+ /*
+ * Additional hardware support can be added here
+ * (e.g. ARPHRD_FDDI, ARPHRD_ATM, ...) - see
+ * <linux/if_arp.h> for type identifiers.
+ */
+ if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 ||
+ (netdev->type == ARPHRD_LOOPBACK && loopback_enabled))
+ return 1;
+
+ return 0;
+}
+
+static DEFINE_PER_CPU(atomic_t, siw_use_cnt);
+
+static struct {
+ struct cpumask **tx_valid_cpus;
+ int num_nodes;
+} siw_cpu_info;
+
+static int siw_init_cpulist(void)
+{
+ int i, num_nodes = num_possible_nodes();
+
+ memset(siw_tx_thread, 0, sizeof(siw_tx_thread));
+
+ siw_cpu_info.num_nodes = num_nodes;
+
+ siw_cpu_info.tx_valid_cpus =
+ kcalloc(num_nodes, sizeof(struct cpumask *), GFP_KERNEL);
+ if (!siw_cpu_info.tx_valid_cpus) {
+ siw_cpu_info.num_nodes = 0;
+ return -ENOMEM;
+ }
+ for (i = 0; i < siw_cpu_info.num_nodes; i++) {
+ siw_cpu_info.tx_valid_cpus[i] =
+ kzalloc(sizeof(struct cpumask), GFP_KERNEL);
+ if (!siw_cpu_info.tx_valid_cpus[i])
+ goto out_err;
+
+ cpumask_clear(siw_cpu_info.tx_valid_cpus[i]);
+ }
+ for_each_possible_cpu(i)
+ cpumask_set_cpu(i, siw_cpu_info.tx_valid_cpus[cpu_to_node(i)]);
+
+ return 0;
+
+out_err:
+ siw_cpu_info.num_nodes = 0;
+ while (i) {
+ kfree(siw_cpu_info.tx_valid_cpus[i]);
+ siw_cpu_info.tx_valid_cpus[i--] = NULL;
+ }
+ kfree(siw_cpu_info.tx_valid_cpus);
+ siw_cpu_info.tx_valid_cpus = NULL;
+
+ return -ENOMEM;
+}
+
+static void siw_destroy_cpulist(void)
+{
+ int i = 0;
+
+ while (i < siw_cpu_info.num_nodes)
+ kfree(siw_cpu_info.tx_valid_cpus[i++]);
+
+ kfree(siw_cpu_info.tx_valid_cpus);
+}
+
+/*
+ * Choose CPU with least number of active QP's from NUMA node of
+ * TX interface.
+ */
+int siw_get_tx_cpu(struct siw_device *sdev)
+{
+ const struct cpumask *tx_cpumask;
+ int i, num_cpus, cpu, min_use, node = sdev->numa_node, tx_cpu = -1;
+
+ if (node < 0)
+ tx_cpumask = cpu_online_mask;
+ else
+ tx_cpumask = siw_cpu_info.tx_valid_cpus[node];
+
+ num_cpus = cpumask_weight(tx_cpumask);
+ if (!num_cpus) {
+ /* no CPU on this NUMA node */
+ tx_cpumask = cpu_online_mask;
+ num_cpus = cpumask_weight(tx_cpumask);
+ }
+ if (!num_cpus)
+ goto out;
+
+ cpu = cpumask_first(tx_cpumask);
+
+ for (i = 0, min_use = SIW_MAX_QP; i < num_cpus;
+ i++, cpu = cpumask_next(cpu, tx_cpumask)) {
+ int usage;
+
+ /* Skip any cores which have no TX thread */
+ if (!siw_tx_thread[cpu])
+ continue;
+
+ usage = atomic_read(&per_cpu(siw_use_cnt, cpu));
+ if (usage <= min_use) {
+ tx_cpu = cpu;
+ min_use = usage;
+ }
+ }
+ siw_dbg(&sdev->base_dev,
+ "tx cpu %d, node %d, %d qp's\n", tx_cpu, node, min_use);
+
+out:
+ if (tx_cpu >= 0)
+ atomic_inc(&per_cpu(siw_use_cnt, tx_cpu));
+ else
+ pr_warn("siw: no tx cpu found\n");
+
+ return tx_cpu;
+}
+
+void siw_put_tx_cpu(int cpu)
+{
+ atomic_dec(&per_cpu(siw_use_cnt, cpu));
+}
+
+static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id)
+{
+ struct siw_qp *qp = siw_qp_id2obj(to_siw_dev(base_dev), id);
+
+ if (qp) {
+ /*
+ * siw_qp_id2obj() increments object reference count
+ */
+ siw_qp_put(qp);
+ return qp->ib_qp;
+ }
+ return NULL;
+}
+
+static void siw_verbs_sq_flush(struct ib_qp *base_qp)
+{
+ struct siw_qp *qp = to_siw_qp(base_qp);
+
+ down_write(&qp->state_lock);
+ siw_sq_flush(qp);
+ up_write(&qp->state_lock);
+}
+
+static void siw_verbs_rq_flush(struct ib_qp *base_qp)
+{
+ struct siw_qp *qp = to_siw_qp(base_qp);
+
+ down_write(&qp->state_lock);
+ siw_rq_flush(qp);
+ up_write(&qp->state_lock);
+}
+
+static const struct ib_device_ops siw_device_ops = {
+ .owner = THIS_MODULE,
+ .uverbs_abi_ver = SIW_ABI_VERSION,
+ .driver_id = RDMA_DRIVER_SIW,
+
+ .alloc_mr = siw_alloc_mr,
+ .alloc_pd = siw_alloc_pd,
+ .alloc_ucontext = siw_alloc_ucontext,
+ .create_cq = siw_create_cq,
+ .create_qp = siw_create_qp,
+ .create_srq = siw_create_srq,
+ .dealloc_driver = siw_device_cleanup,
+ .dealloc_pd = siw_dealloc_pd,
+ .dealloc_ucontext = siw_dealloc_ucontext,
+ .dereg_mr = siw_dereg_mr,
+ .destroy_cq = siw_destroy_cq,
+ .destroy_qp = siw_destroy_qp,
+ .destroy_srq = siw_destroy_srq,
+ .drain_rq = siw_verbs_rq_flush,
+ .drain_sq = siw_verbs_sq_flush,
+ .get_dma_mr = siw_get_dma_mr,
+ .get_port_immutable = siw_get_port_immutable,
+ .iw_accept = siw_accept,
+ .iw_add_ref = siw_qp_get_ref,
+ .iw_connect = siw_connect,
+ .iw_create_listen = siw_create_listen,
+ .iw_destroy_listen = siw_destroy_listen,
+ .iw_get_qp = siw_get_base_qp,
+ .iw_reject = siw_reject,
+ .iw_rem_ref = siw_qp_put_ref,
+ .map_mr_sg = siw_map_mr_sg,
+ .mmap = siw_mmap,
+ .modify_qp = siw_verbs_modify_qp,
+ .modify_srq = siw_modify_srq,
+ .poll_cq = siw_poll_cq,
+ .post_recv = siw_post_receive,
+ .post_send = siw_post_send,
+ .post_srq_recv = siw_post_srq_recv,
+ .query_device = siw_query_device,
+ .query_gid = siw_query_gid,
+ .query_pkey = siw_query_pkey,
+ .query_port = siw_query_port,
+ .query_qp = siw_query_qp,
+ .query_srq = siw_query_srq,
+ .req_notify_cq = siw_req_notify_cq,
+ .reg_user_mr = siw_reg_user_mr,
+
+ INIT_RDMA_OBJ_SIZE(ib_cq, siw_cq, base_cq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, siw_pd, base_pd),
+ INIT_RDMA_OBJ_SIZE(ib_srq, siw_srq, base_srq),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, siw_ucontext, base_ucontext),
+};
+
+static struct siw_device *siw_device_create(struct net_device *netdev)
+{
+ struct siw_device *sdev = NULL;
+ struct ib_device *base_dev;
+ struct device *parent = netdev->dev.parent;
+ int rv;
+
+ if (!parent) {
+ /*
+ * The loopback device has no parent device,
+ * so it appears as a top-level device. To support
+ * loopback device connectivity, take this device
+ * as the parent device. Skip all other devices
+ * w/o parent device.
+ */
+ if (netdev->type != ARPHRD_LOOPBACK) {
+ pr_warn("siw: device %s error: no parent device\n",
+ netdev->name);
+ return NULL;
+ }
+ parent = &netdev->dev;
+ }
+ sdev = ib_alloc_device(siw_device, base_dev);
+ if (!sdev)
+ return NULL;
+
+ base_dev = &sdev->base_dev;
+
+ sdev->netdev = netdev;
+
+ if (netdev->type != ARPHRD_LOOPBACK) {
+ memcpy(&base_dev->node_guid, netdev->dev_addr, 6);
+ } else {
+ /*
+ * The loopback device does not have a HW address,
+ * but connection mangagement lib expects gid != 0
+ */
+ size_t gidlen = min_t(size_t, strlen(base_dev->name), 6);
+
+ memcpy(&base_dev->node_guid, base_dev->name, gidlen);
+ }
+ base_dev->uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+ base_dev->node_type = RDMA_NODE_RNIC;
+ memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON,
+ sizeof(SIW_NODE_DESC_COMMON));
+
+ /*
+ * Current model (one-to-one device association):
+ * One Softiwarp device per net_device or, equivalently,
+ * per physical port.
+ */
+ base_dev->phys_port_cnt = 1;
+ base_dev->dev.parent = parent;
+ base_dev->dev.dma_ops = &dma_virt_ops;
+ base_dev->num_comp_vectors = num_possible_cpus();
+
+ ib_set_device_ops(base_dev, &siw_device_ops);
+ rv = ib_device_set_netdev(base_dev, netdev, 1);
+ if (rv)
+ goto error;
+
+ memcpy(base_dev->iw_ifname, netdev->name,
+ sizeof(base_dev->iw_ifname));
+
+ /* Disable TCP port mapping */
+ base_dev->iw_driver_flags = IW_F_NO_PORT_MAP,
+
+ sdev->attrs.max_qp = SIW_MAX_QP;
+ sdev->attrs.max_qp_wr = SIW_MAX_QP_WR;
+ sdev->attrs.max_ord = SIW_MAX_ORD_QP;
+ sdev->attrs.max_ird = SIW_MAX_IRD_QP;
+ sdev->attrs.max_sge = SIW_MAX_SGE;
+ sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD;
+ sdev->attrs.max_cq = SIW_MAX_CQ;
+ sdev->attrs.max_cqe = SIW_MAX_CQE;
+ sdev->attrs.max_mr = SIW_MAX_MR;
+ sdev->attrs.max_pd = SIW_MAX_PD;
+ sdev->attrs.max_mw = SIW_MAX_MW;
+ sdev->attrs.max_fmr = SIW_MAX_FMR;
+ sdev->attrs.max_srq = SIW_MAX_SRQ;
+ sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR;
+ sdev->attrs.max_srq_sge = SIW_MAX_SGE;
+
+ xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1);
+
+ INIT_LIST_HEAD(&sdev->cep_list);
+ INIT_LIST_HEAD(&sdev->qp_list);
+
+ atomic_set(&sdev->num_ctx, 0);
+ atomic_set(&sdev->num_srq, 0);
+ atomic_set(&sdev->num_qp, 0);
+ atomic_set(&sdev->num_cq, 0);
+ atomic_set(&sdev->num_mr, 0);
+ atomic_set(&sdev->num_pd, 0);
+
+ sdev->numa_node = dev_to_node(parent);
+ spin_lock_init(&sdev->lock);
+
+ return sdev;
+error:
+ ib_dealloc_device(base_dev);
+
+ return NULL;
+}
+
+/*
+ * Network link becomes unavailable. Mark all
+ * affected QP's accordingly.
+ */
+static void siw_netdev_down(struct work_struct *work)
+{
+ struct siw_device *sdev =
+ container_of(work, struct siw_device, netdev_down);
+
+ struct siw_qp_attrs qp_attrs;
+ struct list_head *pos, *tmp;
+
+ memset(&qp_attrs, 0, sizeof(qp_attrs));
+ qp_attrs.state = SIW_QP_STATE_ERROR;
+
+ list_for_each_safe(pos, tmp, &sdev->qp_list) {
+ struct siw_qp *qp = list_entry(pos, struct siw_qp, devq);
+
+ down_write(&qp->state_lock);
+ WARN_ON(siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE));
+ up_write(&qp->state_lock);
+ }
+ ib_device_put(&sdev->base_dev);
+}
+
+static void siw_device_goes_down(struct siw_device *sdev)
+{
+ if (ib_device_try_get(&sdev->base_dev)) {
+ INIT_WORK(&sdev->netdev_down, siw_netdev_down);
+ schedule_work(&sdev->netdev_down);
+ }
+}
+
+static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
+ void *arg)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(arg);
+ struct ib_device *base_dev;
+ struct siw_device *sdev;
+
+ dev_dbg(&netdev->dev, "siw: event %lu\n", event);
+
+ if (dev_net(netdev) != &init_net)
+ return NOTIFY_OK;
+
+ base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW);
+ if (!base_dev)
+ return NOTIFY_OK;
+
+ sdev = to_siw_dev(base_dev);
+
+ switch (event) {
+ case NETDEV_UP:
+ sdev->state = IB_PORT_ACTIVE;
+ siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
+ break;
+
+ case NETDEV_GOING_DOWN:
+ siw_device_goes_down(sdev);
+ break;
+
+ case NETDEV_DOWN:
+ sdev->state = IB_PORT_DOWN;
+ siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
+ break;
+
+ case NETDEV_REGISTER:
+ /*
+ * Device registration now handled only by
+ * rdma netlink commands. So it shall be impossible
+ * to end up here with a valid siw device.
+ */
+ siw_dbg(base_dev, "unexpected NETDEV_REGISTER event\n");
+ break;
+
+ case NETDEV_UNREGISTER:
+ ib_unregister_device_queued(&sdev->base_dev);
+ break;
+
+ case NETDEV_CHANGEADDR:
+ siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
+ break;
+ /*
+ * Todo: Below netdev events are currently not handled.
+ */
+ case NETDEV_CHANGEMTU:
+ case NETDEV_CHANGE:
+ break;
+
+ default:
+ break;
+ }
+ ib_device_put(&sdev->base_dev);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block siw_netdev_nb = {
+ .notifier_call = siw_netdev_event,
+};
+
+static int siw_newlink(const char *basedev_name, struct net_device *netdev)
+{
+ struct ib_device *base_dev;
+ struct siw_device *sdev = NULL;
+ int rv = -ENOMEM;
+
+ if (!siw_dev_qualified(netdev))
+ return -EINVAL;
+
+ base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW);
+ if (base_dev) {
+ ib_device_put(base_dev);
+ return -EEXIST;
+ }
+ sdev = siw_device_create(netdev);
+ if (sdev) {
+ dev_dbg(&netdev->dev, "siw: new device\n");
+
+ if (netif_running(netdev) && netif_carrier_ok(netdev))
+ sdev->state = IB_PORT_ACTIVE;
+ else
+ sdev->state = IB_PORT_DOWN;
+
+ rv = siw_device_register(sdev, basedev_name);
+ if (rv)
+ ib_dealloc_device(&sdev->base_dev);
+ }
+ return rv;
+}
+
+static struct rdma_link_ops siw_link_ops = {
+ .type = "siw",
+ .newlink = siw_newlink,
+};
+
+/*
+ * siw_init_module - Initialize Softiwarp module and register with netdev
+ * subsystem.
+ */
+static __init int siw_init_module(void)
+{
+ int rv;
+ int nr_cpu;
+
+ if (SENDPAGE_THRESH < SIW_MAX_INLINE) {
+ pr_info("siw: sendpage threshold too small: %u\n",
+ (int)SENDPAGE_THRESH);
+ rv = -EINVAL;
+ goto out_error;
+ }
+ rv = siw_init_cpulist();
+ if (rv)
+ goto out_error;
+
+ rv = siw_cm_init();
+ if (rv)
+ goto out_error;
+
+ if (!siw_create_tx_threads()) {
+ pr_info("siw: Could not start any TX thread\n");
+ goto out_error;
+ }
+ /*
+ * Locate CRC32 algorithm. If unsuccessful, fail
+ * loading siw only, if CRC is required.
+ */
+ siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0);
+ if (IS_ERR(siw_crypto_shash)) {
+ pr_info("siw: Loading CRC32c failed: %ld\n",
+ PTR_ERR(siw_crypto_shash));
+ siw_crypto_shash = NULL;
+ if (mpa_crc_required) {
+ rv = -EOPNOTSUPP;
+ goto out_error;
+ }
+ }
+ rv = register_netdevice_notifier(&siw_netdev_nb);
+ if (rv)
+ goto out_error;
+
+ rdma_link_register(&siw_link_ops);
+
+ pr_info("SoftiWARP attached\n");
+ return 0;
+
+out_error:
+ for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) {
+ if (siw_tx_thread[nr_cpu]) {
+ siw_stop_tx_thread(nr_cpu);
+ siw_tx_thread[nr_cpu] = NULL;
+ }
+ }
+ if (siw_crypto_shash)
+ crypto_free_shash(siw_crypto_shash);
+
+ pr_info("SoftIWARP attach failed. Error: %d\n", rv);
+
+ siw_cm_exit();
+ siw_destroy_cpulist();
+
+ return rv;
+}
+
+static void __exit siw_exit_module(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (siw_tx_thread[cpu]) {
+ siw_stop_tx_thread(cpu);
+ siw_tx_thread[cpu] = NULL;
+ }
+ }
+ unregister_netdevice_notifier(&siw_netdev_nb);
+ rdma_link_unregister(&siw_link_ops);
+ ib_unregister_driver(RDMA_DRIVER_SIW);
+
+ siw_cm_exit();
+
+ siw_destroy_cpulist();
+
+ if (siw_crypto_shash)
+ crypto_free_shash(siw_crypto_shash);
+
+ pr_info("SoftiWARP detached\n");
+}
+
+module_init(siw_init_module);
+module_exit(siw_exit_module);
+
+MODULE_ALIAS_RDMA_LINK("siw");
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
new file mode 100644
index 000000000000..67171c82b0c4
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include <linux/gfp.h>
+#include <rdma/ib_verbs.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include <linux/resource.h>
+
+#include "siw.h"
+#include "siw_mem.h"
+
+/*
+ * Stag lookup is based on its index part only (24 bits).
+ * The code avoids special Stag of zero and tries to randomize
+ * STag values between 1 and SIW_STAG_MAX_INDEX.
+ */
+int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
+{
+ struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
+ u32 id, next;
+
+ get_random_bytes(&next, 4);
+ next &= 0x00ffffff;
+
+ if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
+ GFP_KERNEL) < 0)
+ return -ENOMEM;
+
+ /* Set the STag index part */
+ m->stag = id << 8;
+
+ siw_dbg_mem(m, "new MEM object\n");
+
+ return 0;
+}
+
+/*
+ * siw_mem_id2obj()
+ *
+ * resolves memory from stag given by id. might be called from:
+ * o process context before sending out of sgl, or
+ * o in softirq when resolving target memory
+ */
+struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
+{
+ struct siw_mem *mem;
+
+ rcu_read_lock();
+ mem = xa_load(&sdev->mem_xa, stag_index);
+ if (likely(mem && kref_get_unless_zero(&mem->ref))) {
+ rcu_read_unlock();
+ return mem;
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
+ bool dirty)
+{
+ struct page **p = chunk->plist;
+
+ while (num_pages--) {
+ if (!PageDirty(*p) && dirty)
+ put_user_pages_dirty_lock(p, 1);
+ else
+ put_user_page(*p);
+ p++;
+ }
+}
+
+void siw_umem_release(struct siw_umem *umem, bool dirty)
+{
+ struct mm_struct *mm_s = umem->owning_mm;
+ int i, num_pages = umem->num_pages;
+
+ for (i = 0; num_pages; i++) {
+ int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
+
+ siw_free_plist(&umem->page_chunk[i], to_free,
+ umem->writable && dirty);
+ kfree(umem->page_chunk[i].plist);
+ num_pages -= to_free;
+ }
+ atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
+
+ mmdrop(mm_s);
+ kfree(umem->page_chunk);
+ kfree(umem);
+}
+
+int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
+ u64 start, u64 len, int rights)
+{
+ struct siw_device *sdev = to_siw_dev(pd->device);
+ struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
+ u32 id, next;
+
+ if (!mem)
+ return -ENOMEM;
+
+ mem->mem_obj = mem_obj;
+ mem->stag_valid = 0;
+ mem->sdev = sdev;
+ mem->va = start;
+ mem->len = len;
+ mem->pd = pd;
+ mem->perms = rights & IWARP_ACCESS_MASK;
+ kref_init(&mem->ref);
+
+ mr->mem = mem;
+
+ get_random_bytes(&next, 4);
+ next &= 0x00ffffff;
+
+ if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
+ GFP_KERNEL) < 0) {
+ kfree(mem);
+ return -ENOMEM;
+ }
+ /* Set the STag index part */
+ mem->stag = id << 8;
+ mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
+
+ return 0;
+}
+
+void siw_mr_drop_mem(struct siw_mr *mr)
+{
+ struct siw_mem *mem = mr->mem, *found;
+
+ mem->stag_valid = 0;
+
+ /* make STag invalid visible asap */
+ smp_mb();
+
+ found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
+ WARN_ON(found != mem);
+ siw_mem_put(mem);
+}
+
+void siw_free_mem(struct kref *ref)
+{
+ struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
+
+ siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
+
+ if (!mem->is_mw && mem->mem_obj) {
+ if (mem->is_pbl == 0)
+ siw_umem_release(mem->umem, true);
+ else
+ kfree(mem->pbl);
+ }
+ kfree(mem);
+}
+
+/*
+ * siw_check_mem()
+ *
+ * Check protection domain, STAG state, access permissions and
+ * address range for memory object.
+ *
+ * @pd: Protection Domain memory should belong to
+ * @mem: memory to be checked
+ * @addr: starting addr of mem
+ * @perms: requested access permissions
+ * @len: len of memory interval to be checked
+ *
+ */
+int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
+ enum ib_access_flags perms, int len)
+{
+ if (!mem->stag_valid) {
+ siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
+ return -E_STAG_INVALID;
+ }
+ if (mem->pd != pd) {
+ siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
+ return -E_PD_MISMATCH;
+ }
+ /*
+ * check access permissions
+ */
+ if ((mem->perms & perms) < perms) {
+ siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
+ mem->perms, perms);
+ return -E_ACCESS_PERM;
+ }
+ /*
+ * Check if access falls into valid memory interval.
+ */
+ if (addr < mem->va || addr + len > mem->va + mem->len) {
+ siw_dbg_pd(pd, "MEM interval len %d\n", len);
+ siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] out of bounds\n",
+ (unsigned long long)addr,
+ (unsigned long long)(addr + len));
+ siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] STag=0x%08x\n",
+ (unsigned long long)mem->va,
+ (unsigned long long)(mem->va + mem->len),
+ mem->stag);
+
+ return -E_BASE_BOUNDS;
+ }
+ return E_ACCESS_OK;
+}
+
+/*
+ * siw_check_sge()
+ *
+ * Check SGE for access rights in given interval
+ *
+ * @pd: Protection Domain memory should belong to
+ * @sge: SGE to be checked
+ * @mem: location of memory reference within array
+ * @perms: requested access permissions
+ * @off: starting offset in SGE
+ * @len: len of memory interval to be checked
+ *
+ * NOTE: Function references SGE's memory object (mem->obj)
+ * if not yet done. New reference is kept if check went ok and
+ * released if check failed. If mem->obj is already valid, no new
+ * lookup is being done and mem is not released it check fails.
+ */
+int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
+ enum ib_access_flags perms, u32 off, int len)
+{
+ struct siw_device *sdev = to_siw_dev(pd->device);
+ struct siw_mem *new = NULL;
+ int rv = E_ACCESS_OK;
+
+ if (len + off > sge->length) {
+ rv = -E_BASE_BOUNDS;
+ goto fail;
+ }
+ if (*mem == NULL) {
+ new = siw_mem_id2obj(sdev, sge->lkey >> 8);
+ if (unlikely(!new)) {
+ siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
+ rv = -E_STAG_INVALID;
+ goto fail;
+ }
+ *mem = new;
+ }
+ /* Check if user re-registered with different STag key */
+ if (unlikely((*mem)->stag != sge->lkey)) {
+ siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
+ rv = -E_STAG_INVALID;
+ goto fail;
+ }
+ rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
+ if (unlikely(rv))
+ goto fail;
+
+ return 0;
+
+fail:
+ if (new) {
+ *mem = NULL;
+ siw_mem_put(new);
+ }
+ return rv;
+}
+
+void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
+{
+ switch (op) {
+ case SIW_OP_SEND:
+ case SIW_OP_WRITE:
+ case SIW_OP_SEND_WITH_IMM:
+ case SIW_OP_SEND_REMOTE_INV:
+ case SIW_OP_READ:
+ case SIW_OP_READ_LOCAL_INV:
+ if (!(wqe->sqe.flags & SIW_WQE_INLINE))
+ siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
+ break;
+
+ case SIW_OP_RECEIVE:
+ siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
+ break;
+
+ case SIW_OP_READ_RESPONSE:
+ siw_unref_mem_sgl(wqe->mem, 1);
+ break;
+
+ default:
+ /*
+ * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
+ * do not hold memory references
+ */
+ break;
+ }
+}
+
+int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
+{
+ struct siw_device *sdev = to_siw_dev(pd->device);
+ struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
+ int rv = 0;
+
+ if (unlikely(!mem)) {
+ siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
+ return -EINVAL;
+ }
+ if (unlikely(mem->pd != pd)) {
+ siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
+ rv = -EACCES;
+ goto out;
+ }
+ /*
+ * Per RDMA verbs definition, an STag may already be in invalid
+ * state if invalidation is requested. So no state check here.
+ */
+ mem->stag_valid = 0;
+
+ siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
+out:
+ siw_mem_put(mem);
+ return rv;
+}
+
+/*
+ * Gets physical address backed by PBL element. Address is referenced
+ * by linear byte offset into list of variably sized PB elements.
+ * Optionally, provides remaining len within current element, and
+ * current PBL index for later resume at same element.
+ */
+u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
+{
+ int i = idx ? *idx : 0;
+
+ while (i < pbl->num_buf) {
+ struct siw_pble *pble = &pbl->pbe[i];
+
+ if (pble->pbl_off + pble->size > off) {
+ u64 pble_off = off - pble->pbl_off;
+
+ if (len)
+ *len = pble->size - pble_off;
+ if (idx)
+ *idx = i;
+
+ return pble->addr + pble_off;
+ }
+ i++;
+ }
+ if (len)
+ *len = 0;
+ return 0;
+}
+
+struct siw_pbl *siw_pbl_alloc(u32 num_buf)
+{
+ struct siw_pbl *pbl;
+ int buf_size = sizeof(*pbl);
+
+ if (num_buf == 0)
+ return ERR_PTR(-EINVAL);
+
+ buf_size += ((num_buf - 1) * sizeof(struct siw_pble));
+
+ pbl = kzalloc(buf_size, GFP_KERNEL);
+ if (!pbl)
+ return ERR_PTR(-ENOMEM);
+
+ pbl->max_buf = num_buf;
+
+ return pbl;
+}
+
+struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
+{
+ struct siw_umem *umem;
+ struct mm_struct *mm_s;
+ u64 first_page_va;
+ unsigned long mlock_limit;
+ unsigned int foll_flags = FOLL_WRITE;
+ int num_pages, num_chunks, i, rv = 0;
+
+ if (!can_do_mlock())
+ return ERR_PTR(-EPERM);
+
+ if (!len)
+ return ERR_PTR(-EINVAL);
+
+ first_page_va = start & PAGE_MASK;
+ num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
+ num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ mm_s = current->mm;
+ umem->owning_mm = mm_s;
+ umem->writable = writable;
+
+ mmgrab(mm_s);
+
+ if (!writable)
+ foll_flags |= FOLL_FORCE;
+
+ down_read(&mm_s->mmap_sem);
+
+ mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+ if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
+ rv = -ENOMEM;
+ goto out_sem_up;
+ }
+ umem->fp_addr = first_page_va;
+
+ umem->page_chunk =
+ kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
+ if (!umem->page_chunk) {
+ rv = -ENOMEM;
+ goto out_sem_up;
+ }
+ for (i = 0; num_pages; i++) {
+ int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
+
+ umem->page_chunk[i].plist =
+ kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
+ if (!umem->page_chunk[i].plist) {
+ rv = -ENOMEM;
+ goto out_sem_up;
+ }
+ got = 0;
+ while (nents) {
+ struct page **plist = &umem->page_chunk[i].plist[got];
+
+ rv = get_user_pages(first_page_va, nents,
+ foll_flags | FOLL_LONGTERM,
+ plist, NULL);
+ if (rv < 0)
+ goto out_sem_up;
+
+ umem->num_pages += rv;
+ atomic64_add(rv, &mm_s->pinned_vm);
+ first_page_va += rv * PAGE_SIZE;
+ nents -= rv;
+ got += rv;
+ }
+ num_pages -= got;
+ }
+out_sem_up:
+ up_read(&mm_s->mmap_sem);
+
+ if (rv > 0)
+ return umem;
+
+ siw_umem_release(umem, false);
+
+ return ERR_PTR(rv);
+}
diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h
new file mode 100644
index 000000000000..f43daf280891
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_mem.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#ifndef _SIW_MEM_H
+#define _SIW_MEM_H
+
+struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable);
+void siw_umem_release(struct siw_umem *umem, bool dirty);
+struct siw_pbl *siw_pbl_alloc(u32 num_buf);
+u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx);
+struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index);
+int siw_mem_add(struct siw_device *sdev, struct siw_mem *m);
+int siw_invalidate_stag(struct ib_pd *pd, u32 stag);
+int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
+ enum ib_access_flags perms, int len);
+int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge,
+ struct siw_mem *mem[], enum ib_access_flags perms,
+ u32 off, int len);
+void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op);
+int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
+ u64 start, u64 len, int rights);
+void siw_mr_drop_mem(struct siw_mr *mr);
+void siw_free_mem(struct kref *ref);
+
+static inline void siw_mem_put(struct siw_mem *mem)
+{
+ kref_put(&mem->ref, siw_free_mem);
+}
+
+static inline struct siw_mr *siw_mem2mr(struct siw_mem *m)
+{
+ return container_of(m, struct siw_mr, mem);
+}
+
+static inline void siw_unref_mem_sgl(struct siw_mem **mem, unsigned int num_sge)
+{
+ while (num_sge) {
+ if (*mem == NULL)
+ break;
+
+ siw_mem_put(*mem);
+ *mem = NULL;
+ mem++;
+ num_sge--;
+ }
+}
+
+#define CHUNK_SHIFT 9 /* sets number of pages per chunk */
+#define PAGES_PER_CHUNK (_AC(1, UL) << CHUNK_SHIFT)
+#define CHUNK_MASK (~(PAGES_PER_CHUNK - 1))
+#define PAGE_CHUNK_SIZE (PAGES_PER_CHUNK * sizeof(struct page *))
+
+/*
+ * siw_get_upage()
+ *
+ * Get page pointer for address on given umem.
+ *
+ * @umem: two dimensional list of page pointers
+ * @addr: user virtual address
+ */
+static inline struct page *siw_get_upage(struct siw_umem *umem, u64 addr)
+{
+ unsigned int page_idx = (addr - umem->fp_addr) >> PAGE_SHIFT,
+ chunk_idx = page_idx >> CHUNK_SHIFT,
+ page_in_chunk = page_idx & ~CHUNK_MASK;
+
+ if (likely(page_idx < umem->num_pages))
+ return umem->page_chunk[chunk_idx].plist[page_in_chunk];
+
+ return NULL;
+}
+#endif
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
new file mode 100644
index 000000000000..11383d9f95ef
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -0,0 +1,1322 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/scatterlist.h>
+#include <linux/llist.h>
+#include <asm/barrier.h>
+#include <net/tcp.h>
+
+#include "siw.h"
+#include "siw_verbs.h"
+#include "siw_mem.h"
+
+static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
+ [SIW_QP_STATE_IDLE] = "IDLE",
+ [SIW_QP_STATE_RTR] = "RTR",
+ [SIW_QP_STATE_RTS] = "RTS",
+ [SIW_QP_STATE_CLOSING] = "CLOSING",
+ [SIW_QP_STATE_TERMINATE] = "TERMINATE",
+ [SIW_QP_STATE_ERROR] = "ERROR"
+};
+
+/*
+ * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
+ * per-RDMAP message basis. Please keep order of initializer. All MPA len
+ * is initialized to minimum packet size.
+ */
+struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = {
+ { /* RDMAP_RDMA_WRITE */
+ .hdr_len = sizeof(struct iwarp_rdma_write),
+ .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
+ .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
+ cpu_to_be16(DDP_VERSION << 8) |
+ cpu_to_be16(RDMAP_VERSION << 6) |
+ cpu_to_be16(RDMAP_RDMA_WRITE),
+ .rx_data = siw_proc_write },
+ { /* RDMAP_RDMA_READ_REQ */
+ .hdr_len = sizeof(struct iwarp_rdma_rreq),
+ .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
+ .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
+ cpu_to_be16(RDMAP_VERSION << 6) |
+ cpu_to_be16(RDMAP_RDMA_READ_REQ),
+ .rx_data = siw_proc_rreq },
+ { /* RDMAP_RDMA_READ_RESP */
+ .hdr_len = sizeof(struct iwarp_rdma_rresp),
+ .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
+ .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
+ cpu_to_be16(DDP_VERSION << 8) |
+ cpu_to_be16(RDMAP_VERSION << 6) |
+ cpu_to_be16(RDMAP_RDMA_READ_RESP),
+ .rx_data = siw_proc_rresp },
+ { /* RDMAP_SEND */
+ .hdr_len = sizeof(struct iwarp_send),
+ .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
+ .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
+ cpu_to_be16(RDMAP_VERSION << 6) |
+ cpu_to_be16(RDMAP_SEND),
+ .rx_data = siw_proc_send },
+ { /* RDMAP_SEND_INVAL */
+ .hdr_len = sizeof(struct iwarp_send_inv),
+ .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
+ .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
+ cpu_to_be16(RDMAP_VERSION << 6) |
+ cpu_to_be16(RDMAP_SEND_INVAL),
+ .rx_data = siw_proc_send },
+ { /* RDMAP_SEND_SE */
+ .hdr_len = sizeof(struct iwarp_send),
+ .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
+ .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
+ cpu_to_be16(RDMAP_VERSION << 6) |
+ cpu_to_be16(RDMAP_SEND_SE),
+ .rx_data = siw_proc_send },
+ { /* RDMAP_SEND_SE_INVAL */
+ .hdr_len = sizeof(struct iwarp_send_inv),
+ .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
+ .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
+ cpu_to_be16(RDMAP_VERSION << 6) |
+ cpu_to_be16(RDMAP_SEND_SE_INVAL),
+ .rx_data = siw_proc_send },
+ { /* RDMAP_TERMINATE */
+ .hdr_len = sizeof(struct iwarp_terminate),
+ .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
+ .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
+ cpu_to_be16(RDMAP_VERSION << 6) |
+ cpu_to_be16(RDMAP_TERMINATE),
+ .rx_data = siw_proc_terminate }
+};
+
+void siw_qp_llp_data_ready(struct sock *sk)
+{
+ struct siw_qp *qp;
+
+ read_lock(&sk->sk_callback_lock);
+
+ if (unlikely(!sk->sk_user_data || !sk_to_qp(sk)))
+ goto done;
+
+ qp = sk_to_qp(sk);
+
+ if (likely(!qp->rx_stream.rx_suspend &&
+ down_read_trylock(&qp->state_lock))) {
+ read_descriptor_t rd_desc = { .arg.data = qp, .count = 1 };
+
+ if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
+ /*
+ * Implements data receive operation during
+ * socket callback. TCP gracefully catches
+ * the case where there is nothing to receive
+ * (not calling siw_tcp_rx_data() then).
+ */
+ tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
+
+ up_read(&qp->state_lock);
+ } else {
+ siw_dbg_qp(qp, "unable to process RX, suspend: %d\n",
+ qp->rx_stream.rx_suspend);
+ }
+done:
+ read_unlock(&sk->sk_callback_lock);
+}
+
+void siw_qp_llp_close(struct siw_qp *qp)
+{
+ siw_dbg_qp(qp, "enter llp close, state = %s\n",
+ siw_qp_state_to_string[qp->attrs.state]);
+
+ down_write(&qp->state_lock);
+
+ qp->rx_stream.rx_suspend = 1;
+ qp->tx_ctx.tx_suspend = 1;
+ qp->attrs.sk = NULL;
+
+ switch (qp->attrs.state) {
+ case SIW_QP_STATE_RTS:
+ case SIW_QP_STATE_RTR:
+ case SIW_QP_STATE_IDLE:
+ case SIW_QP_STATE_TERMINATE:
+ qp->attrs.state = SIW_QP_STATE_ERROR;
+ break;
+ /*
+ * SIW_QP_STATE_CLOSING:
+ *
+ * This is a forced close. shall the QP be moved to
+ * ERROR or IDLE ?
+ */
+ case SIW_QP_STATE_CLOSING:
+ if (tx_wqe(qp)->wr_status == SIW_WR_IDLE)
+ qp->attrs.state = SIW_QP_STATE_ERROR;
+ else
+ qp->attrs.state = SIW_QP_STATE_IDLE;
+ break;
+
+ default:
+ siw_dbg_qp(qp, "llp close: no state transition needed: %s\n",
+ siw_qp_state_to_string[qp->attrs.state]);
+ break;
+ }
+ siw_sq_flush(qp);
+ siw_rq_flush(qp);
+
+ /*
+ * Dereference closing CEP
+ */
+ if (qp->cep) {
+ siw_cep_put(qp->cep);
+ qp->cep = NULL;
+ }
+
+ up_write(&qp->state_lock);
+
+ siw_dbg_qp(qp, "llp close exit: state %s\n",
+ siw_qp_state_to_string[qp->attrs.state]);
+}
+
+/*
+ * socket callback routine informing about newly available send space.
+ * Function schedules SQ work for processing SQ items.
+ */
+void siw_qp_llp_write_space(struct sock *sk)
+{
+ struct siw_cep *cep = sk_to_cep(sk);
+
+ cep->sk_write_space(sk);
+
+ if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+ (void)siw_sq_start(cep->qp);
+}
+
+static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
+{
+ irq_size = roundup_pow_of_two(irq_size);
+ orq_size = roundup_pow_of_two(orq_size);
+
+ qp->attrs.irq_size = irq_size;
+ qp->attrs.orq_size = orq_size;
+
+ qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
+ if (!qp->irq) {
+ siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size);
+ qp->attrs.irq_size = 0;
+ return -ENOMEM;
+ }
+ qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
+ if (!qp->orq) {
+ siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size);
+ qp->attrs.orq_size = 0;
+ qp->attrs.irq_size = 0;
+ vfree(qp->irq);
+ return -ENOMEM;
+ }
+ siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
+ return 0;
+}
+
+static int siw_qp_enable_crc(struct siw_qp *qp)
+{
+ struct siw_rx_stream *c_rx = &qp->rx_stream;
+ struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
+ int size = crypto_shash_descsize(siw_crypto_shash) +
+ sizeof(struct shash_desc);
+
+ if (siw_crypto_shash == NULL)
+ return -ENOENT;
+
+ c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
+ c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
+ if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
+ kfree(c_tx->mpa_crc_hd);
+ kfree(c_rx->mpa_crc_hd);
+ c_tx->mpa_crc_hd = NULL;
+ c_rx->mpa_crc_hd = NULL;
+ return -ENOMEM;
+ }
+ c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
+ c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
+
+ return 0;
+}
+
+/*
+ * Send a non signalled READ or WRITE to peer side as negotiated
+ * with MPAv2 P2P setup protocol. The work request is only created
+ * as a current active WR and does not consume Send Queue space.
+ *
+ * Caller must hold QP state lock.
+ */
+int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
+{
+ struct siw_wqe *wqe = tx_wqe(qp);
+ unsigned long flags;
+ int rv = 0;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+
+ if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+ return -EIO;
+ }
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+
+ wqe->wr_status = SIW_WR_QUEUED;
+ wqe->sqe.flags = 0;
+ wqe->sqe.num_sge = 1;
+ wqe->sqe.sge[0].length = 0;
+ wqe->sqe.sge[0].laddr = 0;
+ wqe->sqe.sge[0].lkey = 0;
+ /*
+ * While it must not be checked for inbound zero length
+ * READ/WRITE, some HW may treat STag 0 special.
+ */
+ wqe->sqe.rkey = 1;
+ wqe->sqe.raddr = 0;
+ wqe->processed = 0;
+
+ if (ctrl & MPA_V2_RDMA_WRITE_RTR)
+ wqe->sqe.opcode = SIW_OP_WRITE;
+ else if (ctrl & MPA_V2_RDMA_READ_RTR) {
+ struct siw_sqe *rreq;
+
+ wqe->sqe.opcode = SIW_OP_READ;
+
+ spin_lock(&qp->orq_lock);
+
+ rreq = orq_get_free(qp);
+ if (rreq) {
+ siw_read_to_orq(rreq, &wqe->sqe);
+ qp->orq_put++;
+ } else
+ rv = -EIO;
+
+ spin_unlock(&qp->orq_lock);
+ } else
+ rv = -EINVAL;
+
+ if (rv)
+ wqe->wr_status = SIW_WR_IDLE;
+
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+
+ if (!rv)
+ rv = siw_sq_start(qp);
+
+ return rv;
+}
+
+/*
+ * Map memory access error to DDP tagged error
+ */
+enum ddp_ecode siw_tagged_error(enum siw_access_state state)
+{
+ switch (state) {
+ case E_STAG_INVALID:
+ return DDP_ECODE_T_INVALID_STAG;
+ case E_BASE_BOUNDS:
+ return DDP_ECODE_T_BASE_BOUNDS;
+ case E_PD_MISMATCH:
+ return DDP_ECODE_T_STAG_NOT_ASSOC;
+ case E_ACCESS_PERM:
+ /*
+ * RFC 5041 (DDP) lacks an ecode for insufficient access
+ * permissions. 'Invalid STag' seem to be the closest
+ * match though.
+ */
+ return DDP_ECODE_T_INVALID_STAG;
+ default:
+ WARN_ON(1);
+ return DDP_ECODE_T_INVALID_STAG;
+ }
+}
+
+/*
+ * Map memory access error to RDMAP protection error
+ */
+enum rdmap_ecode siw_rdmap_error(enum siw_access_state state)
+{
+ switch (state) {
+ case E_STAG_INVALID:
+ return RDMAP_ECODE_INVALID_STAG;
+ case E_BASE_BOUNDS:
+ return RDMAP_ECODE_BASE_BOUNDS;
+ case E_PD_MISMATCH:
+ return RDMAP_ECODE_STAG_NOT_ASSOC;
+ case E_ACCESS_PERM:
+ return RDMAP_ECODE_ACCESS_RIGHTS;
+ default:
+ return RDMAP_ECODE_UNSPECIFIED;
+ }
+}
+
+void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, u8 etype,
+ u8 ecode, int in_tx)
+{
+ if (!qp->term_info.valid) {
+ memset(&qp->term_info, 0, sizeof(qp->term_info));
+ qp->term_info.layer = layer;
+ qp->term_info.etype = etype;
+ qp->term_info.ecode = ecode;
+ qp->term_info.in_tx = in_tx;
+ qp->term_info.valid = 1;
+ }
+ siw_dbg_qp(qp, "init TERM: layer %d, type %d, code %d, in tx %s\n",
+ layer, etype, ecode, in_tx ? "yes" : "no");
+}
+
+/*
+ * Send a TERMINATE message, as defined in RFC's 5040/5041/5044/6581.
+ * Sending TERMINATE messages is best effort - such messages
+ * can only be send if the QP is still connected and it does
+ * not have another outbound message in-progress, i.e. the
+ * TERMINATE message must not interfer with an incomplete current
+ * transmit operation.
+ */
+void siw_send_terminate(struct siw_qp *qp)
+{
+ struct kvec iov[3];
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
+ struct iwarp_terminate *term = NULL;
+ union iwarp_hdr *err_hdr = NULL;
+ struct socket *s = qp->attrs.sk;
+ struct siw_rx_stream *srx = &qp->rx_stream;
+ union iwarp_hdr *rx_hdr = &srx->hdr;
+ u32 crc = 0;
+ int num_frags, len_terminate, rv;
+
+ if (!qp->term_info.valid)
+ return;
+
+ qp->term_info.valid = 0;
+
+ if (tx_wqe(qp)->wr_status == SIW_WR_INPROGRESS) {
+ siw_dbg_qp(qp, "cannot send TERMINATE: op %d in progress\n",
+ tx_type(tx_wqe(qp)));
+ return;
+ }
+ if (!s && qp->cep)
+ /* QP not yet in RTS. Take socket from connection end point */
+ s = qp->cep->sock;
+
+ if (!s) {
+ siw_dbg_qp(qp, "cannot send TERMINATE: not connected\n");
+ return;
+ }
+
+ term = kzalloc(sizeof(*term), GFP_KERNEL);
+ if (!term)
+ return;
+
+ term->ddp_qn = cpu_to_be32(RDMAP_UNTAGGED_QN_TERMINATE);
+ term->ddp_mo = 0;
+ term->ddp_msn = cpu_to_be32(1);
+
+ iov[0].iov_base = term;
+ iov[0].iov_len = sizeof(*term);
+
+ if ((qp->term_info.layer == TERM_ERROR_LAYER_DDP) ||
+ ((qp->term_info.layer == TERM_ERROR_LAYER_RDMAP) &&
+ (qp->term_info.etype != RDMAP_ETYPE_CATASTROPHIC))) {
+ err_hdr = kzalloc(sizeof(*err_hdr), GFP_KERNEL);
+ if (!err_hdr) {
+ kfree(term);
+ return;
+ }
+ }
+ memcpy(&term->ctrl, &iwarp_pktinfo[RDMAP_TERMINATE].ctrl,
+ sizeof(struct iwarp_ctrl));
+
+ __rdmap_term_set_layer(term, qp->term_info.layer);
+ __rdmap_term_set_etype(term, qp->term_info.etype);
+ __rdmap_term_set_ecode(term, qp->term_info.ecode);
+
+ switch (qp->term_info.layer) {
+ case TERM_ERROR_LAYER_RDMAP:
+ if (qp->term_info.etype == RDMAP_ETYPE_CATASTROPHIC)
+ /* No additional DDP/RDMAP header to be included */
+ break;
+
+ if (qp->term_info.etype == RDMAP_ETYPE_REMOTE_PROTECTION) {
+ /*
+ * Complete RDMAP frame will get attached, and
+ * DDP segment length is valid
+ */
+ term->flag_m = 1;
+ term->flag_d = 1;
+ term->flag_r = 1;
+
+ if (qp->term_info.in_tx) {
+ struct iwarp_rdma_rreq *rreq;
+ struct siw_wqe *wqe = tx_wqe(qp);
+
+ /* Inbound RREQ error, detected during
+ * RRESP creation. Take state from
+ * current TX work queue element to
+ * reconstruct peers RREQ.
+ */
+ rreq = (struct iwarp_rdma_rreq *)err_hdr;
+
+ memcpy(&rreq->ctrl,
+ &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
+ sizeof(struct iwarp_ctrl));
+
+ rreq->rsvd = 0;
+ rreq->ddp_qn =
+ htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
+
+ /* Provide RREQ's MSN as kept aside */
+ rreq->ddp_msn = htonl(wqe->sqe.sge[0].length);
+
+ rreq->ddp_mo = htonl(wqe->processed);
+ rreq->sink_stag = htonl(wqe->sqe.rkey);
+ rreq->sink_to = cpu_to_be64(wqe->sqe.raddr);
+ rreq->read_size = htonl(wqe->sqe.sge[0].length);
+ rreq->source_stag = htonl(wqe->sqe.sge[0].lkey);
+ rreq->source_to =
+ cpu_to_be64(wqe->sqe.sge[0].laddr);
+
+ iov[1].iov_base = rreq;
+ iov[1].iov_len = sizeof(*rreq);
+
+ rx_hdr = (union iwarp_hdr *)rreq;
+ } else {
+ /* Take RDMAP/DDP information from
+ * current (failed) inbound frame.
+ */
+ iov[1].iov_base = rx_hdr;
+
+ if (__rdmap_get_opcode(&rx_hdr->ctrl) ==
+ RDMAP_RDMA_READ_REQ)
+ iov[1].iov_len =
+ sizeof(struct iwarp_rdma_rreq);
+ else /* SEND type */
+ iov[1].iov_len =
+ sizeof(struct iwarp_send);
+ }
+ } else {
+ /* Do not report DDP hdr information if packet
+ * layout is unknown
+ */
+ if ((qp->term_info.ecode == RDMAP_ECODE_VERSION) ||
+ (qp->term_info.ecode == RDMAP_ECODE_OPCODE))
+ break;
+
+ iov[1].iov_base = rx_hdr;
+
+ /* Only DDP frame will get attached */
+ if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
+ iov[1].iov_len =
+ sizeof(struct iwarp_rdma_write);
+ else
+ iov[1].iov_len = sizeof(struct iwarp_send);
+
+ term->flag_m = 1;
+ term->flag_d = 1;
+ }
+ term->ctrl.mpa_len = cpu_to_be16(iov[1].iov_len);
+ break;
+
+ case TERM_ERROR_LAYER_DDP:
+ /* Report error encountered while DDP processing.
+ * This can only happen as a result of inbound
+ * DDP processing
+ */
+
+ /* Do not report DDP hdr information if packet
+ * layout is unknown
+ */
+ if (((qp->term_info.etype == DDP_ETYPE_TAGGED_BUF) &&
+ (qp->term_info.ecode == DDP_ECODE_T_VERSION)) ||
+ ((qp->term_info.etype == DDP_ETYPE_UNTAGGED_BUF) &&
+ (qp->term_info.ecode == DDP_ECODE_UT_VERSION)))
+ break;
+
+ iov[1].iov_base = rx_hdr;
+
+ if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
+ iov[1].iov_len = sizeof(struct iwarp_ctrl_tagged);
+ else
+ iov[1].iov_len = sizeof(struct iwarp_ctrl_untagged);
+
+ term->flag_m = 1;
+ term->flag_d = 1;
+ break;
+
+ default:
+ break;
+ }
+ if (term->flag_m || term->flag_d || term->flag_r) {
+ iov[2].iov_base = &crc;
+ iov[2].iov_len = sizeof(crc);
+ len_terminate = sizeof(*term) + iov[1].iov_len + MPA_CRC_SIZE;
+ num_frags = 3;
+ } else {
+ iov[1].iov_base = &crc;
+ iov[1].iov_len = sizeof(crc);
+ len_terminate = sizeof(*term) + MPA_CRC_SIZE;
+ num_frags = 2;
+ }
+
+ /* Adjust DDP Segment Length parameter, if valid */
+ if (term->flag_m) {
+ u32 real_ddp_len = be16_to_cpu(rx_hdr->ctrl.mpa_len);
+ enum rdma_opcode op = __rdmap_get_opcode(&rx_hdr->ctrl);
+
+ real_ddp_len -= iwarp_pktinfo[op].hdr_len - MPA_HDR_SIZE;
+ rx_hdr->ctrl.mpa_len = cpu_to_be16(real_ddp_len);
+ }
+
+ term->ctrl.mpa_len =
+ cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
+ if (qp->tx_ctx.mpa_crc_hd) {
+ crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
+ if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
+ (u8 *)iov[0].iov_base,
+ iov[0].iov_len))
+ goto out;
+
+ if (num_frags == 3) {
+ if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
+ (u8 *)iov[1].iov_base,
+ iov[1].iov_len))
+ goto out;
+ }
+ crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
+ }
+
+ rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
+ siw_dbg_qp(qp, "sent TERM: %s, layer %d, type %d, code %d (%d bytes)\n",
+ rv == len_terminate ? "success" : "failure",
+ __rdmap_term_layer(term), __rdmap_term_etype(term),
+ __rdmap_term_ecode(term), rv);
+out:
+ kfree(term);
+ kfree(err_hdr);
+}
+
+/*
+ * Handle all attrs other than state
+ */
+static void siw_qp_modify_nonstate(struct siw_qp *qp,
+ struct siw_qp_attrs *attrs,
+ enum siw_qp_attr_mask mask)
+{
+ if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
+ if (attrs->flags & SIW_RDMA_BIND_ENABLED)
+ qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
+ else
+ qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
+
+ if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
+ qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
+ else
+ qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
+
+ if (attrs->flags & SIW_RDMA_READ_ENABLED)
+ qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
+ else
+ qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED;
+ }
+}
+
+static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
+ struct siw_qp_attrs *attrs,
+ enum siw_qp_attr_mask mask)
+{
+ int rv = 0;
+
+ switch (attrs->state) {
+ case SIW_QP_STATE_RTS:
+ if (attrs->flags & SIW_MPA_CRC) {
+ rv = siw_qp_enable_crc(qp);
+ if (rv)
+ break;
+ }
+ if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
+ siw_dbg_qp(qp, "no socket\n");
+ rv = -EINVAL;
+ break;
+ }
+ if (!(mask & SIW_QP_ATTR_MPA)) {
+ siw_dbg_qp(qp, "no MPA\n");
+ rv = -EINVAL;
+ break;
+ }
+ /*
+ * Initialize iWARP TX state
+ */
+ qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
+ qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
+ qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
+
+ /*
+ * Initialize iWARP RX state
+ */
+ qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
+ qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
+ qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
+
+ /*
+ * init IRD free queue, caller has already checked
+ * limits.
+ */
+ rv = siw_qp_readq_init(qp, attrs->irq_size,
+ attrs->orq_size);
+ if (rv)
+ break;
+
+ qp->attrs.sk = attrs->sk;
+ qp->attrs.state = SIW_QP_STATE_RTS;
+
+ siw_dbg_qp(qp, "enter RTS: crc=%s, ord=%u, ird=%u\n",
+ attrs->flags & SIW_MPA_CRC ? "y" : "n",
+ qp->attrs.orq_size, qp->attrs.irq_size);
+ break;
+
+ case SIW_QP_STATE_ERROR:
+ siw_rq_flush(qp);
+ qp->attrs.state = SIW_QP_STATE_ERROR;
+ if (qp->cep) {
+ siw_cep_put(qp->cep);
+ qp->cep = NULL;
+ }
+ break;
+
+ default:
+ break;
+ }
+ return rv;
+}
+
+static int siw_qp_nextstate_from_rts(struct siw_qp *qp,
+ struct siw_qp_attrs *attrs)
+{
+ int drop_conn = 0;
+
+ switch (attrs->state) {
+ case SIW_QP_STATE_CLOSING:
+ /*
+ * Verbs: move to IDLE if SQ and ORQ are empty.
+ * Move to ERROR otherwise. But first of all we must
+ * close the connection. So we keep CLOSING or ERROR
+ * as a transient state, schedule connection drop work
+ * and wait for the socket state change upcall to
+ * come back closed.
+ */
+ if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) {
+ qp->attrs.state = SIW_QP_STATE_CLOSING;
+ } else {
+ qp->attrs.state = SIW_QP_STATE_ERROR;
+ siw_sq_flush(qp);
+ }
+ siw_rq_flush(qp);
+
+ drop_conn = 1;
+ break;
+
+ case SIW_QP_STATE_TERMINATE:
+ qp->attrs.state = SIW_QP_STATE_TERMINATE;
+
+ siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
+ RDMAP_ETYPE_CATASTROPHIC,
+ RDMAP_ECODE_UNSPECIFIED, 1);
+ drop_conn = 1;
+ break;
+
+ case SIW_QP_STATE_ERROR:
+ /*
+ * This is an emergency close.
+ *
+ * Any in progress transmit operation will get
+ * cancelled.
+ * This will likely result in a protocol failure,
+ * if a TX operation is in transit. The caller
+ * could unconditional wait to give the current
+ * operation a chance to complete.
+ * Esp., how to handle the non-empty IRQ case?
+ * The peer was asking for data transfer at a valid
+ * point in time.
+ */
+ siw_sq_flush(qp);
+ siw_rq_flush(qp);
+ qp->attrs.state = SIW_QP_STATE_ERROR;
+ drop_conn = 1;
+ break;
+
+ default:
+ break;
+ }
+ return drop_conn;
+}
+
+static void siw_qp_nextstate_from_term(struct siw_qp *qp,
+ struct siw_qp_attrs *attrs)
+{
+ switch (attrs->state) {
+ case SIW_QP_STATE_ERROR:
+ siw_rq_flush(qp);
+ qp->attrs.state = SIW_QP_STATE_ERROR;
+
+ if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
+ siw_sq_flush(qp);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int siw_qp_nextstate_from_close(struct siw_qp *qp,
+ struct siw_qp_attrs *attrs)
+{
+ int rv = 0;
+
+ switch (attrs->state) {
+ case SIW_QP_STATE_IDLE:
+ WARN_ON(tx_wqe(qp)->wr_status != SIW_WR_IDLE);
+ qp->attrs.state = SIW_QP_STATE_IDLE;
+ break;
+
+ case SIW_QP_STATE_CLOSING:
+ /*
+ * The LLP may already moved the QP to closing
+ * due to graceful peer close init
+ */
+ break;
+
+ case SIW_QP_STATE_ERROR:
+ /*
+ * QP was moved to CLOSING by LLP event
+ * not yet seen by user.
+ */
+ qp->attrs.state = SIW_QP_STATE_ERROR;
+
+ if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
+ siw_sq_flush(qp);
+
+ siw_rq_flush(qp);
+ break;
+
+ default:
+ siw_dbg_qp(qp, "state transition undefined: %s => %s\n",
+ siw_qp_state_to_string[qp->attrs.state],
+ siw_qp_state_to_string[attrs->state]);
+
+ rv = -ECONNABORTED;
+ }
+ return rv;
+}
+
+/*
+ * Caller must hold qp->state_lock
+ */
+int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
+ enum siw_qp_attr_mask mask)
+{
+ int drop_conn = 0, rv = 0;
+
+ if (!mask)
+ return 0;
+
+ siw_dbg_qp(qp, "state: %s => %s\n",
+ siw_qp_state_to_string[qp->attrs.state],
+ siw_qp_state_to_string[attrs->state]);
+
+ if (mask != SIW_QP_ATTR_STATE)
+ siw_qp_modify_nonstate(qp, attrs, mask);
+
+ if (!(mask & SIW_QP_ATTR_STATE))
+ return 0;
+
+ switch (qp->attrs.state) {
+ case SIW_QP_STATE_IDLE:
+ case SIW_QP_STATE_RTR:
+ rv = siw_qp_nextstate_from_idle(qp, attrs, mask);
+ break;
+
+ case SIW_QP_STATE_RTS:
+ drop_conn = siw_qp_nextstate_from_rts(qp, attrs);
+ break;
+
+ case SIW_QP_STATE_TERMINATE:
+ siw_qp_nextstate_from_term(qp, attrs);
+ break;
+
+ case SIW_QP_STATE_CLOSING:
+ siw_qp_nextstate_from_close(qp, attrs);
+ break;
+ default:
+ break;
+ }
+ if (drop_conn)
+ siw_qp_cm_drop(qp, 0);
+
+ return rv;
+}
+
+void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
+{
+ rreq->id = sqe->id;
+ rreq->opcode = sqe->opcode;
+ rreq->sge[0].laddr = sqe->sge[0].laddr;
+ rreq->sge[0].length = sqe->sge[0].length;
+ rreq->sge[0].lkey = sqe->sge[0].lkey;
+ rreq->sge[1].lkey = sqe->sge[1].lkey;
+ rreq->flags = sqe->flags | SIW_WQE_VALID;
+ rreq->num_sge = 1;
+}
+
+/*
+ * Must be called with SQ locked.
+ * To avoid complete SQ starvation by constant inbound READ requests,
+ * the active IRQ will not be served after qp->irq_burst, if the
+ * SQ has pending work.
+ */
+int siw_activate_tx(struct siw_qp *qp)
+{
+ struct siw_sqe *irqe, *sqe;
+ struct siw_wqe *wqe = tx_wqe(qp);
+ int rv = 1;
+
+ irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
+
+ if (irqe->flags & SIW_WQE_VALID) {
+ sqe = sq_get_next(qp);
+
+ /*
+ * Avoid local WQE processing starvation in case
+ * of constant inbound READ request stream
+ */
+ if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
+ qp->irq_burst = 0;
+ goto skip_irq;
+ }
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
+
+ /* start READ RESPONSE */
+ wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
+ wqe->sqe.flags = 0;
+ if (irqe->num_sge) {
+ wqe->sqe.num_sge = 1;
+ wqe->sqe.sge[0].length = irqe->sge[0].length;
+ wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
+ wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
+ } else {
+ wqe->sqe.num_sge = 0;
+ }
+
+ /* Retain original RREQ's message sequence number for
+ * potential error reporting cases.
+ */
+ wqe->sqe.sge[1].length = irqe->sge[1].length;
+
+ wqe->sqe.rkey = irqe->rkey;
+ wqe->sqe.raddr = irqe->raddr;
+
+ wqe->processed = 0;
+ qp->irq_get++;
+
+ /* mark current IRQ entry free */
+ smp_store_mb(irqe->flags, 0);
+
+ goto out;
+ }
+ sqe = sq_get_next(qp);
+ if (sqe) {
+skip_irq:
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
+
+ /* First copy SQE to kernel private memory */
+ memcpy(&wqe->sqe, sqe, sizeof(*sqe));
+
+ if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ rv = -EINVAL;
+ goto out;
+ }
+ if (wqe->sqe.flags & SIW_WQE_INLINE) {
+ if (wqe->sqe.opcode != SIW_OP_SEND &&
+ wqe->sqe.opcode != SIW_OP_WRITE) {
+ rv = -EINVAL;
+ goto out;
+ }
+ if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
+ rv = -EINVAL;
+ goto out;
+ }
+ wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1];
+ wqe->sqe.sge[0].lkey = 0;
+ wqe->sqe.num_sge = 1;
+ }
+ if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
+ /* A READ cannot be fenced */
+ if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode ==
+ SIW_OP_READ_LOCAL_INV)) {
+ siw_dbg_qp(qp, "cannot fence read\n");
+ rv = -EINVAL;
+ goto out;
+ }
+ spin_lock(&qp->orq_lock);
+
+ if (!siw_orq_empty(qp)) {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
+ }
+ spin_unlock(&qp->orq_lock);
+
+ } else if (wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
+ struct siw_sqe *rreq;
+
+ wqe->sqe.num_sge = 1;
+
+ spin_lock(&qp->orq_lock);
+
+ rreq = orq_get_free(qp);
+ if (rreq) {
+ /*
+ * Make an immediate copy in ORQ to be ready
+ * to process loopback READ reply
+ */
+ siw_read_to_orq(rreq, &wqe->sqe);
+ qp->orq_put++;
+ } else {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
+ }
+ spin_unlock(&qp->orq_lock);
+ }
+
+ /* Clear SQE, can be re-used by application */
+ smp_store_mb(sqe->flags, 0);
+ qp->sq_get++;
+ } else {
+ rv = 0;
+ }
+out:
+ if (unlikely(rv < 0)) {
+ siw_dbg_qp(qp, "error %d\n", rv);
+ wqe->wr_status = SIW_WR_IDLE;
+ }
+ return rv;
+}
+
+/*
+ * Check if current CQ state qualifies for calling CQ completion
+ * handler. Must be called with CQ lock held.
+ */
+static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
+{
+ u64 cq_notify;
+
+ if (!cq->base_cq.comp_handler)
+ return false;
+
+ cq_notify = READ_ONCE(*cq->notify);
+
+ if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
+ ((cq_notify & SIW_NOTIFY_SOLICITED) &&
+ (flags & SIW_WQE_SOLICITED))) {
+ /* dis-arm CQ */
+ smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);
+
+ return true;
+ }
+ return false;
+}
+
+int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
+ enum siw_wc_status status)
+{
+ struct siw_cq *cq = qp->scq;
+ int rv = 0;
+
+ if (cq) {
+ u32 sqe_flags = sqe->flags;
+ struct siw_cqe *cqe;
+ u32 idx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ idx = cq->cq_put % cq->num_cqe;
+ cqe = &cq->queue[idx];
+
+ if (!READ_ONCE(cqe->flags)) {
+ bool notify;
+
+ cqe->id = sqe->id;
+ cqe->opcode = sqe->opcode;
+ cqe->status = status;
+ cqe->imm_data = 0;
+ cqe->bytes = bytes;
+
+ if (cq->kernel_verbs)
+ cqe->base_qp = qp->ib_qp;
+ else
+ cqe->qp_id = qp_id(qp);
+
+ /* mark CQE valid for application */
+ WRITE_ONCE(cqe->flags, SIW_WQE_VALID);
+ /* recycle SQE */
+ smp_store_mb(sqe->flags, 0);
+
+ cq->cq_put++;
+ notify = siw_cq_notify_now(cq, sqe_flags);
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ if (notify) {
+ siw_dbg_cq(cq, "Call completion handler\n");
+ cq->base_cq.comp_handler(&cq->base_cq,
+ cq->base_cq.cq_context);
+ }
+ } else {
+ spin_unlock_irqrestore(&cq->lock, flags);
+ rv = -ENOMEM;
+ siw_cq_event(cq, IB_EVENT_CQ_ERR);
+ }
+ } else {
+ /* recycle SQE */
+ smp_store_mb(sqe->flags, 0);
+ }
+ return rv;
+}
+
+int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
+ u32 inval_stag, enum siw_wc_status status)
+{
+ struct siw_cq *cq = qp->rcq;
+ int rv = 0;
+
+ if (cq) {
+ struct siw_cqe *cqe;
+ u32 idx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ idx = cq->cq_put % cq->num_cqe;
+ cqe = &cq->queue[idx];
+
+ if (!READ_ONCE(cqe->flags)) {
+ bool notify;
+ u8 cqe_flags = SIW_WQE_VALID;
+
+ cqe->id = rqe->id;
+ cqe->opcode = SIW_OP_RECEIVE;
+ cqe->status = status;
+ cqe->imm_data = 0;
+ cqe->bytes = bytes;
+
+ if (cq->kernel_verbs) {
+ cqe->base_qp = qp->ib_qp;
+ if (inval_stag) {
+ cqe_flags |= SIW_WQE_REM_INVAL;
+ cqe->inval_stag = inval_stag;
+ }
+ } else {
+ cqe->qp_id = qp_id(qp);
+ }
+ /* mark CQE valid for application */
+ WRITE_ONCE(cqe->flags, cqe_flags);
+ /* recycle RQE */
+ smp_store_mb(rqe->flags, 0);
+
+ cq->cq_put++;
+ notify = siw_cq_notify_now(cq, SIW_WQE_SIGNALLED);
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ if (notify) {
+ siw_dbg_cq(cq, "Call completion handler\n");
+ cq->base_cq.comp_handler(&cq->base_cq,
+ cq->base_cq.cq_context);
+ }
+ } else {
+ spin_unlock_irqrestore(&cq->lock, flags);
+ rv = -ENOMEM;
+ siw_cq_event(cq, IB_EVENT_CQ_ERR);
+ }
+ } else {
+ /* recycle RQE */
+ smp_store_mb(rqe->flags, 0);
+ }
+ return rv;
+}
+
+/*
+ * siw_sq_flush()
+ *
+ * Flush SQ and ORRQ entries to CQ.
+ *
+ * Must be called with QP state write lock held.
+ * Therefore, SQ and ORQ lock must not be taken.
+ */
+void siw_sq_flush(struct siw_qp *qp)
+{
+ struct siw_sqe *sqe;
+ struct siw_wqe *wqe = tx_wqe(qp);
+ int async_event = 0;
+
+ /*
+ * Start with completing any work currently on the ORQ
+ */
+ while (qp->attrs.orq_size) {
+ sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size];
+ if (!READ_ONCE(sqe->flags))
+ break;
+
+ if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
+ break;
+
+ WRITE_ONCE(sqe->flags, 0);
+ qp->orq_get++;
+ }
+ /*
+ * Flush an in-progress WQE if present
+ */
+ if (wqe->wr_status != SIW_WR_IDLE) {
+ siw_dbg_qp(qp, "flush current SQE, type %d, status %d\n",
+ tx_type(wqe), wqe->wr_status);
+
+ siw_wqe_put_mem(wqe, tx_type(wqe));
+
+ if (tx_type(wqe) != SIW_OP_READ_RESPONSE &&
+ ((tx_type(wqe) != SIW_OP_READ &&
+ tx_type(wqe) != SIW_OP_READ_LOCAL_INV) ||
+ wqe->wr_status == SIW_WR_QUEUED))
+ /*
+ * An in-progress Read Request is already in
+ * the ORQ
+ */
+ siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
+ SIW_WC_WR_FLUSH_ERR);
+
+ wqe->wr_status = SIW_WR_IDLE;
+ }
+ /*
+ * Flush the Send Queue
+ */
+ while (qp->attrs.sq_size) {
+ sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
+ if (!READ_ONCE(sqe->flags))
+ break;
+
+ async_event = 1;
+ if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
+ /*
+ * Shall IB_EVENT_SQ_DRAINED be supressed if work
+ * completion fails?
+ */
+ break;
+
+ WRITE_ONCE(sqe->flags, 0);
+ qp->sq_get++;
+ }
+ if (async_event)
+ siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
+}
+
+/*
+ * siw_rq_flush()
+ *
+ * Flush recv queue entries to CQ. Also
+ * takes care of pending active tagged and untagged
+ * inbound transfers, which have target memory
+ * referenced.
+ *
+ * Must be called with QP state write lock held.
+ * Therefore, RQ lock must not be taken.
+ */
+void siw_rq_flush(struct siw_qp *qp)
+{
+ struct siw_wqe *wqe = &qp->rx_untagged.wqe_active;
+
+ /*
+ * Flush an in-progress untagged operation if present
+ */
+ if (wqe->wr_status != SIW_WR_IDLE) {
+ siw_dbg_qp(qp, "flush current rqe, type %d, status %d\n",
+ rx_type(wqe), wqe->wr_status);
+
+ siw_wqe_put_mem(wqe, rx_type(wqe));
+
+ if (rx_type(wqe) == SIW_OP_RECEIVE) {
+ siw_rqe_complete(qp, &wqe->rqe, wqe->bytes,
+ 0, SIW_WC_WR_FLUSH_ERR);
+ } else if (rx_type(wqe) != SIW_OP_READ &&
+ rx_type(wqe) != SIW_OP_READ_RESPONSE &&
+ rx_type(wqe) != SIW_OP_WRITE) {
+ siw_sqe_complete(qp, &wqe->sqe, 0, SIW_WC_WR_FLUSH_ERR);
+ }
+ wqe->wr_status = SIW_WR_IDLE;
+ }
+ wqe = &qp->rx_tagged.wqe_active;
+
+ if (wqe->wr_status != SIW_WR_IDLE) {
+ siw_wqe_put_mem(wqe, rx_type(wqe));
+ wqe->wr_status = SIW_WR_IDLE;
+ }
+ /*
+ * Flush the Receive Queue
+ */
+ while (qp->attrs.rq_size) {
+ struct siw_rqe *rqe =
+ &qp->recvq[qp->rq_get % qp->attrs.rq_size];
+
+ if (!READ_ONCE(rqe->flags))
+ break;
+
+ if (siw_rqe_complete(qp, rqe, 0, 0, SIW_WC_WR_FLUSH_ERR) != 0)
+ break;
+
+ WRITE_ONCE(rqe->flags, 0);
+ qp->rq_get++;
+ }
+}
+
+int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
+{
+ int rv = xa_alloc(&sdev->qp_xa, &qp->ib_qp->qp_num, qp, xa_limit_32b,
+ GFP_KERNEL);
+
+ if (!rv) {
+ kref_init(&qp->ref);
+ qp->sdev = sdev;
+ qp->qp_num = qp->ib_qp->qp_num;
+ siw_dbg_qp(qp, "new QP\n");
+ }
+ return rv;
+}
+
+void siw_free_qp(struct kref *ref)
+{
+ struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
+ struct siw_device *sdev = qp->sdev;
+ unsigned long flags;
+
+ if (qp->cep)
+ siw_cep_put(qp->cep);
+
+ found = xa_erase(&sdev->qp_xa, qp_id(qp));
+ WARN_ON(found != qp);
+ spin_lock_irqsave(&sdev->lock, flags);
+ list_del(&qp->devq);
+ spin_unlock_irqrestore(&sdev->lock, flags);
+
+ vfree(qp->sendq);
+ vfree(qp->recvq);
+ vfree(qp->irq);
+ vfree(qp->orq);
+
+ siw_put_tx_cpu(qp->tx_cpu);
+
+ atomic_dec(&sdev->num_qp);
+ siw_dbg_qp(qp, "free QP\n");
+ kfree_rcu(qp, rcu);
+}
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
new file mode 100644
index 000000000000..f87657a11657
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -0,0 +1,1458 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+
+#include "siw.h"
+#include "siw_verbs.h"
+#include "siw_mem.h"
+
+/*
+ * siw_rx_umem()
+ *
+ * Receive data of @len into target referenced by @dest_addr.
+ *
+ * @srx: Receive Context
+ * @umem: siw representation of target memory
+ * @dest_addr: user virtual address
+ * @len: number of bytes to place
+ */
+static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
+ u64 dest_addr, int len)
+{
+ int copied = 0;
+
+ while (len) {
+ struct page *p;
+ int pg_off, bytes, rv;
+ void *dest;
+
+ p = siw_get_upage(umem, dest_addr);
+ if (unlikely(!p)) {
+ pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n",
+ __func__, qp_id(rx_qp(srx)),
+ (void *)dest_addr, (void *)umem->fp_addr);
+ /* siw internal error */
+ srx->skb_copied += copied;
+ srx->skb_new -= copied;
+
+ return -EFAULT;
+ }
+ pg_off = dest_addr & ~PAGE_MASK;
+ bytes = min(len, (int)PAGE_SIZE - pg_off);
+
+ siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes);
+
+ dest = kmap_atomic(p);
+ rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
+ bytes);
+
+ if (unlikely(rv)) {
+ kunmap_atomic(dest);
+ srx->skb_copied += copied;
+ srx->skb_new -= copied;
+
+ pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n",
+ qp_id(rx_qp(srx)), __func__, len, p, rv);
+
+ return -EFAULT;
+ }
+ if (srx->mpa_crc_hd) {
+ if (rx_qp(srx)->kernel_verbs) {
+ crypto_shash_update(srx->mpa_crc_hd,
+ (u8 *)(dest + pg_off), bytes);
+ kunmap_atomic(dest);
+ } else {
+ kunmap_atomic(dest);
+ /*
+ * Do CRC on original, not target buffer.
+ * Some user land applications may
+ * concurrently write the target buffer,
+ * which would yield a broken CRC.
+ * Walking the skb twice is very ineffcient.
+ * Folding the CRC into skb_copy_bits()
+ * would be much better, but is currently
+ * not supported.
+ */
+ siw_crc_skb(srx, bytes);
+ }
+ } else {
+ kunmap_atomic(dest);
+ }
+ srx->skb_offset += bytes;
+ copied += bytes;
+ len -= bytes;
+ dest_addr += bytes;
+ pg_off = 0;
+ }
+ srx->skb_copied += copied;
+ srx->skb_new -= copied;
+
+ return copied;
+}
+
+static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
+{
+ int rv;
+
+ siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len);
+
+ rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
+ if (unlikely(rv)) {
+ pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n",
+ qp_id(rx_qp(srx)), __func__, len, kva, rv);
+
+ return rv;
+ }
+ if (srx->mpa_crc_hd)
+ crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
+
+ srx->skb_offset += len;
+ srx->skb_copied += len;
+ srx->skb_new -= len;
+
+ return len;
+}
+
+static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
+ struct siw_mem *mem, u64 addr, int len)
+{
+ struct siw_pbl *pbl = mem->pbl;
+ u64 offset = addr - mem->va;
+ int copied = 0;
+
+ while (len) {
+ int bytes;
+ u64 buf_addr =
+ siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx);
+ if (!buf_addr)
+ break;
+
+ bytes = min(bytes, len);
+ if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) {
+ copied += bytes;
+ offset += bytes;
+ len -= bytes;
+ } else {
+ break;
+ }
+ }
+ return copied;
+}
+
+/*
+ * siw_rresp_check_ntoh()
+ *
+ * Check incoming RRESP fragment header against expected
+ * header values and update expected values for potential next
+ * fragment.
+ *
+ * NOTE: This function must be called only if a RRESP DDP segment
+ * starts but not for fragmented consecutive pieces of an
+ * already started DDP segment.
+ */
+static int siw_rresp_check_ntoh(struct siw_rx_stream *srx,
+ struct siw_rx_fpdu *frx)
+{
+ struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp;
+ struct siw_wqe *wqe = &frx->wqe_active;
+ enum ddp_ecode ecode;
+
+ u32 sink_stag = be32_to_cpu(rresp->sink_stag);
+ u64 sink_to = be64_to_cpu(rresp->sink_to);
+
+ if (frx->first_ddp_seg) {
+ srx->ddp_stag = wqe->sqe.sge[0].lkey;
+ srx->ddp_to = wqe->sqe.sge[0].laddr;
+ frx->pbl_idx = 0;
+ }
+ /* Below checks extend beyond the semantics of DDP, and
+ * into RDMAP:
+ * We check if the read response matches exactly the
+ * read request which was send to the remote peer to
+ * trigger this read response. RFC5040/5041 do not
+ * always have a proper error code for the detected
+ * error cases. We choose 'base or bounds error' for
+ * cases where the inbound STag is valid, but offset
+ * or length do not match our response receive state.
+ */
+ if (unlikely(srx->ddp_stag != sink_stag)) {
+ pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n",
+ qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag);
+ ecode = DDP_ECODE_T_INVALID_STAG;
+ goto error;
+ }
+ if (unlikely(srx->ddp_to != sink_to)) {
+ pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n",
+ qp_id(rx_qp(srx)), (unsigned long long)sink_to,
+ (unsigned long long)srx->ddp_to);
+ ecode = DDP_ECODE_T_BASE_BOUNDS;
+ goto error;
+ }
+ if (unlikely(!frx->more_ddp_segs &&
+ (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) {
+ pr_warn("siw: [QP %u]: rresp len: %d != %d\n",
+ qp_id(rx_qp(srx)),
+ wqe->processed + srx->fpdu_part_rem, wqe->bytes);
+ ecode = DDP_ECODE_T_BASE_BOUNDS;
+ goto error;
+ }
+ return 0;
+error:
+ siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_TAGGED_BUF, ecode, 0);
+ return -EINVAL;
+}
+
+/*
+ * siw_write_check_ntoh()
+ *
+ * Check incoming WRITE fragment header against expected
+ * header values and update expected values for potential next
+ * fragment
+ *
+ * NOTE: This function must be called only if a WRITE DDP segment
+ * starts but not for fragmented consecutive pieces of an
+ * already started DDP segment.
+ */
+static int siw_write_check_ntoh(struct siw_rx_stream *srx,
+ struct siw_rx_fpdu *frx)
+{
+ struct iwarp_rdma_write *write = &srx->hdr.rwrite;
+ enum ddp_ecode ecode;
+
+ u32 sink_stag = be32_to_cpu(write->sink_stag);
+ u64 sink_to = be64_to_cpu(write->sink_to);
+
+ if (frx->first_ddp_seg) {
+ srx->ddp_stag = sink_stag;
+ srx->ddp_to = sink_to;
+ frx->pbl_idx = 0;
+ } else {
+ if (unlikely(srx->ddp_stag != sink_stag)) {
+ pr_warn("siw: [QP %u]: write stag: %08x != %08x\n",
+ qp_id(rx_qp(srx)), sink_stag,
+ srx->ddp_stag);
+ ecode = DDP_ECODE_T_INVALID_STAG;
+ goto error;
+ }
+ if (unlikely(srx->ddp_to != sink_to)) {
+ pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n",
+ qp_id(rx_qp(srx)),
+ (unsigned long long)sink_to,
+ (unsigned long long)srx->ddp_to);
+ ecode = DDP_ECODE_T_BASE_BOUNDS;
+ goto error;
+ }
+ }
+ return 0;
+error:
+ siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_TAGGED_BUF, ecode, 0);
+ return -EINVAL;
+}
+
+/*
+ * siw_send_check_ntoh()
+ *
+ * Check incoming SEND fragment header against expected
+ * header values and update expected MSN if no next
+ * fragment expected
+ *
+ * NOTE: This function must be called only if a SEND DDP segment
+ * starts but not for fragmented consecutive pieces of an
+ * already started DDP segment.
+ */
+static int siw_send_check_ntoh(struct siw_rx_stream *srx,
+ struct siw_rx_fpdu *frx)
+{
+ struct iwarp_send_inv *send = &srx->hdr.send_inv;
+ struct siw_wqe *wqe = &frx->wqe_active;
+ enum ddp_ecode ecode;
+
+ u32 ddp_msn = be32_to_cpu(send->ddp_msn);
+ u32 ddp_mo = be32_to_cpu(send->ddp_mo);
+ u32 ddp_qn = be32_to_cpu(send->ddp_qn);
+
+ if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) {
+ pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n",
+ qp_id(rx_qp(srx)), ddp_qn);
+ ecode = DDP_ECODE_UT_INVALID_QN;
+ goto error;
+ }
+ if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) {
+ pr_warn("siw: [QP %u]: send msn: %u != %u\n",
+ qp_id(rx_qp(srx)), ddp_msn,
+ srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
+ ecode = DDP_ECODE_UT_INVALID_MSN_RANGE;
+ goto error;
+ }
+ if (unlikely(ddp_mo != wqe->processed)) {
+ pr_warn("siw: [QP %u], send mo: %u != %u\n",
+ qp_id(rx_qp(srx)), ddp_mo, wqe->processed);
+ ecode = DDP_ECODE_UT_INVALID_MO;
+ goto error;
+ }
+ if (frx->first_ddp_seg) {
+ /* initialize user memory write position */
+ frx->sge_idx = 0;
+ frx->sge_off = 0;
+ frx->pbl_idx = 0;
+
+ /* only valid for SEND_INV and SEND_SE_INV operations */
+ srx->inval_stag = be32_to_cpu(send->inval_stag);
+ }
+ if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) {
+ siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n",
+ wqe->bytes, wqe->processed, srx->fpdu_part_rem);
+ wqe->wc_status = SIW_WC_LOC_LEN_ERR;
+ ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF;
+ goto error;
+ }
+ return 0;
+error:
+ siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_UNTAGGED_BUF, ecode, 0);
+ return -EINVAL;
+}
+
+static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
+{
+ struct siw_rqe *rqe;
+ struct siw_srq *srq;
+ struct siw_wqe *wqe = NULL;
+ bool srq_event = false;
+ unsigned long flags;
+
+ srq = qp->srq;
+ if (srq) {
+ spin_lock_irqsave(&srq->lock, flags);
+ if (unlikely(!srq->num_rqe))
+ goto out;
+
+ rqe = &srq->recvq[srq->rq_get % srq->num_rqe];
+ } else {
+ if (unlikely(!qp->recvq))
+ goto out;
+
+ rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size];
+ }
+ if (likely(rqe->flags == SIW_WQE_VALID)) {
+ int num_sge = rqe->num_sge;
+
+ if (likely(num_sge <= SIW_MAX_SGE)) {
+ int i = 0;
+
+ wqe = rx_wqe(&qp->rx_untagged);
+ rx_type(wqe) = SIW_OP_RECEIVE;
+ wqe->wr_status = SIW_WR_INPROGRESS;
+ wqe->bytes = 0;
+ wqe->processed = 0;
+
+ wqe->rqe.id = rqe->id;
+ wqe->rqe.num_sge = num_sge;
+
+ while (i < num_sge) {
+ wqe->rqe.sge[i].laddr = rqe->sge[i].laddr;
+ wqe->rqe.sge[i].lkey = rqe->sge[i].lkey;
+ wqe->rqe.sge[i].length = rqe->sge[i].length;
+ wqe->bytes += wqe->rqe.sge[i].length;
+ wqe->mem[i] = NULL;
+ i++;
+ }
+ /* can be re-used by appl */
+ smp_store_mb(rqe->flags, 0);
+ } else {
+ siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge);
+ if (srq)
+ spin_unlock_irqrestore(&srq->lock, flags);
+ return NULL;
+ }
+ if (!srq) {
+ qp->rq_get++;
+ } else {
+ if (srq->armed) {
+ /* Test SRQ limit */
+ u32 off = (srq->rq_get + srq->limit) %
+ srq->num_rqe;
+ struct siw_rqe *rqe2 = &srq->recvq[off];
+
+ if (!(rqe2->flags & SIW_WQE_VALID)) {
+ srq->armed = 0;
+ srq_event = true;
+ }
+ }
+ srq->rq_get++;
+ }
+ }
+out:
+ if (srq) {
+ spin_unlock_irqrestore(&srq->lock, flags);
+ if (srq_event)
+ siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED);
+ }
+ return wqe;
+}
+
+/*
+ * siw_proc_send:
+ *
+ * Process one incoming SEND and place data into memory referenced by
+ * receive wqe.
+ *
+ * Function supports partially received sends (suspending/resuming
+ * current receive wqe processing)
+ *
+ * return value:
+ * 0: reached the end of a DDP segment
+ * -EAGAIN: to be called again to finish the DDP segment
+ */
+int siw_proc_send(struct siw_qp *qp)
+{
+ struct siw_rx_stream *srx = &qp->rx_stream;
+ struct siw_rx_fpdu *frx = &qp->rx_untagged;
+ struct siw_wqe *wqe;
+ u32 data_bytes; /* all data bytes available */
+ u32 rcvd_bytes; /* sum of data bytes rcvd */
+ int rv = 0;
+
+ if (frx->first_ddp_seg) {
+ wqe = siw_rqe_get(qp);
+ if (unlikely(!wqe)) {
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_UNTAGGED_BUF,
+ DDP_ECODE_UT_INVALID_MSN_NOBUF, 0);
+ return -ENOENT;
+ }
+ } else {
+ wqe = rx_wqe(frx);
+ }
+ if (srx->state == SIW_GET_DATA_START) {
+ rv = siw_send_check_ntoh(srx, frx);
+ if (unlikely(rv)) {
+ siw_qp_event(qp, IB_EVENT_QP_FATAL);
+ return rv;
+ }
+ if (!srx->fpdu_part_rem) /* zero length SEND */
+ return 0;
+ }
+ data_bytes = min(srx->fpdu_part_rem, srx->skb_new);
+ rcvd_bytes = 0;
+
+ /* A zero length SEND will skip below loop */
+ while (data_bytes) {
+ struct ib_pd *pd;
+ struct siw_mem **mem, *mem_p;
+ struct siw_sge *sge;
+ u32 sge_bytes; /* data bytes avail for SGE */
+
+ sge = &wqe->rqe.sge[frx->sge_idx];
+
+ if (!sge->length) {
+ /* just skip empty sge's */
+ frx->sge_idx++;
+ frx->sge_off = 0;
+ frx->pbl_idx = 0;
+ continue;
+ }
+ sge_bytes = min(data_bytes, sge->length - frx->sge_off);
+ mem = &wqe->mem[frx->sge_idx];
+
+ /*
+ * check with QP's PD if no SRQ present, SRQ's PD otherwise
+ */
+ pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd;
+
+ rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE,
+ frx->sge_off, sge_bytes);
+ if (unlikely(rv)) {
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_CATASTROPHIC,
+ DDP_ECODE_CATASTROPHIC, 0);
+
+ siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
+ break;
+ }
+ mem_p = *mem;
+ if (mem_p->mem_obj == NULL)
+ rv = siw_rx_kva(srx,
+ (void *)(sge->laddr + frx->sge_off),
+ sge_bytes);
+ else if (!mem_p->is_pbl)
+ rv = siw_rx_umem(srx, mem_p->umem,
+ sge->laddr + frx->sge_off, sge_bytes);
+ else
+ rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
+ sge->laddr + frx->sge_off, sge_bytes);
+
+ if (unlikely(rv != sge_bytes)) {
+ wqe->processed += rcvd_bytes;
+
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_CATASTROPHIC,
+ DDP_ECODE_CATASTROPHIC, 0);
+ return -EINVAL;
+ }
+ frx->sge_off += rv;
+
+ if (frx->sge_off == sge->length) {
+ frx->sge_idx++;
+ frx->sge_off = 0;
+ frx->pbl_idx = 0;
+ }
+ data_bytes -= rv;
+ rcvd_bytes += rv;
+
+ srx->fpdu_part_rem -= rv;
+ srx->fpdu_part_rcvd += rv;
+ }
+ wqe->processed += rcvd_bytes;
+
+ if (!srx->fpdu_part_rem)
+ return 0;
+
+ return (rv < 0) ? rv : -EAGAIN;
+}
+
+/*
+ * siw_proc_write:
+ *
+ * Place incoming WRITE after referencing and checking target buffer
+
+ * Function supports partially received WRITEs (suspending/resuming
+ * current receive processing)
+ *
+ * return value:
+ * 0: reached the end of a DDP segment
+ * -EAGAIN: to be called again to finish the DDP segment
+ */
+int siw_proc_write(struct siw_qp *qp)
+{
+ struct siw_rx_stream *srx = &qp->rx_stream;
+ struct siw_rx_fpdu *frx = &qp->rx_tagged;
+ struct siw_mem *mem;
+ int bytes, rv;
+
+ if (srx->state == SIW_GET_DATA_START) {
+ if (!srx->fpdu_part_rem) /* zero length WRITE */
+ return 0;
+
+ rv = siw_write_check_ntoh(srx, frx);
+ if (unlikely(rv)) {
+ siw_qp_event(qp, IB_EVENT_QP_FATAL);
+ return rv;
+ }
+ }
+ bytes = min(srx->fpdu_part_rem, srx->skb_new);
+
+ if (frx->first_ddp_seg) {
+ struct siw_wqe *wqe = rx_wqe(frx);
+
+ rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8);
+ if (unlikely(!rx_mem(frx))) {
+ siw_dbg_qp(qp,
+ "sink stag not found/invalid, stag 0x%08x\n",
+ srx->ddp_stag);
+
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_TAGGED_BUF,
+ DDP_ECODE_T_INVALID_STAG, 0);
+ return -EINVAL;
+ }
+ wqe->rqe.num_sge = 1;
+ rx_type(wqe) = SIW_OP_WRITE;
+ wqe->wr_status = SIW_WR_INPROGRESS;
+ }
+ mem = rx_mem(frx);
+
+ /*
+ * Check if application re-registered memory with different
+ * key field of STag.
+ */
+ if (unlikely(mem->stag != srx->ddp_stag)) {
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_TAGGED_BUF,
+ DDP_ECODE_T_INVALID_STAG, 0);
+ return -EINVAL;
+ }
+ rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd,
+ IB_ACCESS_REMOTE_WRITE, bytes);
+ if (unlikely(rv)) {
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv),
+ 0);
+
+ siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
+
+ return -EINVAL;
+ }
+
+ if (mem->mem_obj == NULL)
+ rv = siw_rx_kva(srx,
+ (void *)(srx->ddp_to + srx->fpdu_part_rcvd),
+ bytes);
+ else if (!mem->is_pbl)
+ rv = siw_rx_umem(srx, mem->umem,
+ srx->ddp_to + srx->fpdu_part_rcvd, bytes);
+ else
+ rv = siw_rx_pbl(srx, &frx->pbl_idx, mem,
+ srx->ddp_to + srx->fpdu_part_rcvd, bytes);
+
+ if (unlikely(rv != bytes)) {
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_CATASTROPHIC,
+ DDP_ECODE_CATASTROPHIC, 0);
+ return -EINVAL;
+ }
+ srx->fpdu_part_rem -= rv;
+ srx->fpdu_part_rcvd += rv;
+
+ if (!srx->fpdu_part_rem) {
+ srx->ddp_to += srx->fpdu_part_rcvd;
+ return 0;
+ }
+ return -EAGAIN;
+}
+
+/*
+ * Inbound RREQ's cannot carry user data.
+ */
+int siw_proc_rreq(struct siw_qp *qp)
+{
+ struct siw_rx_stream *srx = &qp->rx_stream;
+
+ if (!srx->fpdu_part_rem)
+ return 0;
+
+ pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp),
+ be16_to_cpu(srx->hdr.ctrl.mpa_len));
+
+ return -EPROTO;
+}
+
+/*
+ * siw_init_rresp:
+ *
+ * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
+ * Put it at the tail of the IRQ, if there is another WQE currently in
+ * transmit processing. If not, make it the current WQE to be processed
+ * and schedule transmit processing.
+ *
+ * Can be called from softirq context and from process
+ * context (RREAD socket loopback case!)
+ *
+ * return value:
+ * 0: success,
+ * failure code otherwise
+ */
+
+static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
+{
+ struct siw_wqe *tx_work = tx_wqe(qp);
+ struct siw_sqe *resp;
+
+ uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to),
+ laddr = be64_to_cpu(srx->hdr.rreq.source_to);
+ uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size),
+ lkey = be32_to_cpu(srx->hdr.rreq.source_stag),
+ rkey = be32_to_cpu(srx->hdr.rreq.sink_stag),
+ msn = be32_to_cpu(srx->hdr.rreq.ddp_msn);
+
+ int run_sq = 1, rv = 0;
+ unsigned long flags;
+
+ if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) {
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_UNTAGGED_BUF,
+ DDP_ECODE_UT_INVALID_MSN_RANGE, 0);
+ return -EPROTO;
+ }
+ spin_lock_irqsave(&qp->sq_lock, flags);
+
+ if (tx_work->wr_status == SIW_WR_IDLE) {
+ /*
+ * immediately schedule READ response w/o
+ * consuming IRQ entry: IRQ must be empty.
+ */
+ tx_work->processed = 0;
+ tx_work->mem[0] = NULL;
+ tx_work->wr_status = SIW_WR_QUEUED;
+ resp = &tx_work->sqe;
+ } else {
+ resp = irq_alloc_free(qp);
+ run_sq = 0;
+ }
+ if (likely(resp)) {
+ resp->opcode = SIW_OP_READ_RESPONSE;
+
+ resp->sge[0].length = length;
+ resp->sge[0].laddr = laddr;
+ resp->sge[0].lkey = lkey;
+
+ /* Keep aside message sequence number for potential
+ * error reporting during Read Response generation.
+ */
+ resp->sge[1].length = msn;
+
+ resp->raddr = raddr;
+ resp->rkey = rkey;
+ resp->num_sge = length ? 1 : 0;
+
+ /* RRESP now valid as current TX wqe or placed into IRQ */
+ smp_store_mb(resp->flags, SIW_WQE_VALID);
+ } else {
+ pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp),
+ qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size);
+
+ siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
+ RDMAP_ETYPE_REMOTE_OPERATION,
+ RDMAP_ECODE_CATASTROPHIC_STREAM, 0);
+ rv = -EPROTO;
+ }
+
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+
+ if (run_sq)
+ rv = siw_sq_start(qp);
+
+ return rv;
+}
+
+/*
+ * Only called at start of Read.Resonse processing.
+ * Transfer pending Read from tip of ORQ into currrent rx wqe,
+ * but keep ORQ entry valid until Read.Response processing done.
+ * No Queue locking needed.
+ */
+static int siw_orqe_start_rx(struct siw_qp *qp)
+{
+ struct siw_sqe *orqe;
+ struct siw_wqe *wqe = NULL;
+
+ /* make sure ORQ indices are current */
+ smp_mb();
+
+ orqe = orq_get_current(qp);
+ if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) {
+ /* RRESP is a TAGGED RDMAP operation */
+ wqe = rx_wqe(&qp->rx_tagged);
+ wqe->sqe.id = orqe->id;
+ wqe->sqe.opcode = orqe->opcode;
+ wqe->sqe.sge[0].laddr = orqe->sge[0].laddr;
+ wqe->sqe.sge[0].lkey = orqe->sge[0].lkey;
+ wqe->sqe.sge[0].length = orqe->sge[0].length;
+ wqe->sqe.flags = orqe->flags;
+ wqe->sqe.num_sge = 1;
+ wqe->bytes = orqe->sge[0].length;
+ wqe->processed = 0;
+ wqe->mem[0] = NULL;
+ /* make sure WQE is completely written before valid */
+ smp_wmb();
+ wqe->wr_status = SIW_WR_INPROGRESS;
+
+ return 0;
+ }
+ return -EPROTO;
+}
+
+/*
+ * siw_proc_rresp:
+ *
+ * Place incoming RRESP data into memory referenced by RREQ WQE
+ * which is at the tip of the ORQ
+ *
+ * Function supports partially received RRESP's (suspending/resuming
+ * current receive processing)
+ */
+int siw_proc_rresp(struct siw_qp *qp)
+{
+ struct siw_rx_stream *srx = &qp->rx_stream;
+ struct siw_rx_fpdu *frx = &qp->rx_tagged;
+ struct siw_wqe *wqe = rx_wqe(frx);
+ struct siw_mem **mem, *mem_p;
+ struct siw_sge *sge;
+ int bytes, rv;
+
+ if (frx->first_ddp_seg) {
+ if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
+ pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n",
+ qp_id(qp), wqe->wr_status, wqe->sqe.opcode);
+ rv = -EPROTO;
+ goto error_term;
+ }
+ /*
+ * fetch pending RREQ from orq
+ */
+ rv = siw_orqe_start_rx(qp);
+ if (rv) {
+ pr_warn("siw: [QP %u]: ORQ empty at idx %d\n",
+ qp_id(qp), qp->orq_get % qp->attrs.orq_size);
+ goto error_term;
+ }
+ rv = siw_rresp_check_ntoh(srx, frx);
+ if (unlikely(rv)) {
+ siw_qp_event(qp, IB_EVENT_QP_FATAL);
+ return rv;
+ }
+ } else {
+ if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) {
+ pr_warn("siw: [QP %u]: resume RRESP: status %d\n",
+ qp_id(qp), wqe->wr_status);
+ rv = -EPROTO;
+ goto error_term;
+ }
+ }
+ if (!srx->fpdu_part_rem) /* zero length RRESPONSE */
+ return 0;
+
+ sge = wqe->sqe.sge; /* there is only one */
+ mem = &wqe->mem[0];
+
+ if (!(*mem)) {
+ /*
+ * check target memory which resolves memory on first fragment
+ */
+ rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0,
+ wqe->bytes);
+ if (unlikely(rv)) {
+ siw_dbg_qp(qp, "target mem check: %d\n", rv);
+ wqe->wc_status = SIW_WC_LOC_PROT_ERR;
+
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
+ DDP_ETYPE_TAGGED_BUF,
+ siw_tagged_error(-rv), 0);
+
+ siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
+
+ return -EINVAL;
+ }
+ }
+ mem_p = *mem;
+
+ bytes = min(srx->fpdu_part_rem, srx->skb_new);
+
+ if (mem_p->mem_obj == NULL)
+ rv = siw_rx_kva(srx, (void *)(sge->laddr + wqe->processed),
+ bytes);
+ else if (!mem_p->is_pbl)
+ rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
+ bytes);
+ else
+ rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
+ sge->laddr + wqe->processed, bytes);
+ if (rv != bytes) {
+ wqe->wc_status = SIW_WC_GENERAL_ERR;
+ rv = -EINVAL;
+ goto error_term;
+ }
+ srx->fpdu_part_rem -= rv;
+ srx->fpdu_part_rcvd += rv;
+ wqe->processed += rv;
+
+ if (!srx->fpdu_part_rem) {
+ srx->ddp_to += srx->fpdu_part_rcvd;
+ return 0;
+ }
+ return -EAGAIN;
+
+error_term:
+ siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC,
+ DDP_ECODE_CATASTROPHIC, 0);
+ return rv;
+}
+
+int siw_proc_terminate(struct siw_qp *qp)
+{
+ struct siw_rx_stream *srx = &qp->rx_stream;
+ struct sk_buff *skb = srx->skb;
+ struct iwarp_terminate *term = &srx->hdr.terminate;
+ union iwarp_hdr term_info;
+ u8 *infop = (u8 *)&term_info;
+ enum rdma_opcode op;
+ u16 to_copy = sizeof(struct iwarp_ctrl);
+
+ pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n",
+ __rdmap_term_layer(term), __rdmap_term_etype(term),
+ __rdmap_term_ecode(term));
+
+ if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE ||
+ be32_to_cpu(term->ddp_msn) !=
+ qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] ||
+ be32_to_cpu(term->ddp_mo) != 0) {
+ pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n",
+ be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn),
+ be32_to_cpu(term->ddp_mo));
+ return -ECONNRESET;
+ }
+ /*
+ * Receive remaining pieces of TERM if indicated
+ */
+ if (!term->flag_m)
+ return -ECONNRESET;
+
+ /* Do not take the effort to reassemble a network fragmented
+ * TERM message
+ */
+ if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged))
+ return -ECONNRESET;
+
+ memset(infop, 0, sizeof(term_info));
+
+ skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
+
+ op = __rdmap_get_opcode(&term_info.ctrl);
+ if (op >= RDMAP_TERMINATE)
+ goto out;
+
+ infop += to_copy;
+ srx->skb_offset += to_copy;
+ srx->skb_new -= to_copy;
+ srx->skb_copied += to_copy;
+ srx->fpdu_part_rcvd += to_copy;
+ srx->fpdu_part_rem -= to_copy;
+
+ to_copy = iwarp_pktinfo[op].hdr_len - to_copy;
+
+ /* Again, no network fragmented TERM's */
+ if (to_copy + MPA_CRC_SIZE > srx->skb_new)
+ return -ECONNRESET;
+
+ skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
+
+ if (term->flag_r) {
+ siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n",
+ op, be16_to_cpu(term_info.ctrl.mpa_len),
+ term->flag_m ? "valid" : "invalid");
+ } else if (term->flag_d) {
+ siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n",
+ op, be16_to_cpu(term_info.ctrl.mpa_len),
+ term->flag_m ? "valid" : "invalid");
+ }
+out:
+ srx->skb_new -= to_copy;
+ srx->skb_offset += to_copy;
+ srx->skb_copied += to_copy;
+ srx->fpdu_part_rcvd += to_copy;
+ srx->fpdu_part_rem -= to_copy;
+
+ return -ECONNRESET;
+}
+
+static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
+{
+ struct sk_buff *skb = srx->skb;
+ u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
+ __wsum crc_in, crc_own = 0;
+
+ siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
+ srx->fpdu_part_rem, srx->skb_new, srx->pad);
+
+ if (srx->skb_new < srx->fpdu_part_rem)
+ return -EAGAIN;
+
+ skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem);
+
+ if (srx->mpa_crc_hd && srx->pad)
+ crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
+
+ srx->skb_new -= srx->fpdu_part_rem;
+ srx->skb_offset += srx->fpdu_part_rem;
+ srx->skb_copied += srx->fpdu_part_rem;
+
+ if (!srx->mpa_crc_hd)
+ return 0;
+
+ /*
+ * CRC32 is computed, transmitted and received directly in NBO,
+ * so there's never a reason to convert byte order.
+ */
+ crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
+ crc_in = (__force __wsum)srx->trailer.crc;
+
+ if (unlikely(crc_in != crc_own)) {
+ pr_warn("siw: crc error. in: %08x, own %08x, op %u\n",
+ crc_in, crc_own, qp->rx_stream.rdmap_op);
+
+ siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
+ LLP_ETYPE_MPA,
+ LLP_ECODE_RECEIVED_CRC, 0);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged)
+
+static int siw_get_hdr(struct siw_rx_stream *srx)
+{
+ struct sk_buff *skb = srx->skb;
+ struct siw_qp *qp = rx_qp(srx);
+ struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl;
+ struct siw_rx_fpdu *frx;
+ u8 opcode;
+ int bytes;
+
+ if (srx->fpdu_part_rcvd < MIN_DDP_HDR) {
+ /*
+ * copy a mimimum sized (tagged) DDP frame control part
+ */
+ bytes = min_t(int, srx->skb_new,
+ MIN_DDP_HDR - srx->fpdu_part_rcvd);
+
+ skb_copy_bits(skb, srx->skb_offset,
+ (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
+
+ srx->fpdu_part_rcvd += bytes;
+
+ srx->skb_new -= bytes;
+ srx->skb_offset += bytes;
+ srx->skb_copied += bytes;
+
+ if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
+ return -EAGAIN;
+
+ if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) {
+ enum ddp_etype etype;
+ enum ddp_ecode ecode;
+
+ pr_warn("siw: received ddp version unsupported %d\n",
+ __ddp_get_version(c_hdr));
+
+ if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) {
+ etype = DDP_ETYPE_TAGGED_BUF;
+ ecode = DDP_ECODE_T_VERSION;
+ } else {
+ etype = DDP_ETYPE_UNTAGGED_BUF;
+ ecode = DDP_ECODE_UT_VERSION;
+ }
+ siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
+ etype, ecode, 0);
+ return -EINVAL;
+ }
+ if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) {
+ pr_warn("siw: received rdmap version unsupported %d\n",
+ __rdmap_get_version(c_hdr));
+
+ siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
+ RDMAP_ETYPE_REMOTE_OPERATION,
+ RDMAP_ECODE_VERSION, 0);
+ return -EINVAL;
+ }
+ opcode = __rdmap_get_opcode(c_hdr);
+
+ if (opcode > RDMAP_TERMINATE) {
+ pr_warn("siw: received unknown packet type %u\n",
+ opcode);
+
+ siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
+ RDMAP_ETYPE_REMOTE_OPERATION,
+ RDMAP_ECODE_OPCODE, 0);
+ return -EINVAL;
+ }
+ siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode);
+ } else {
+ opcode = __rdmap_get_opcode(c_hdr);
+ }
+ set_rx_fpdu_context(qp, opcode);
+ frx = qp->rx_fpdu;
+
+ /*
+ * Figure out len of current hdr: variable length of
+ * iwarp hdr may force us to copy hdr information in
+ * two steps. Only tagged DDP messages are already
+ * completely received.
+ */
+ if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
+ bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR;
+
+ if (srx->skb_new < bytes)
+ return -EAGAIN;
+
+ skb_copy_bits(skb, srx->skb_offset,
+ (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
+
+ srx->fpdu_part_rcvd += bytes;
+
+ srx->skb_new -= bytes;
+ srx->skb_offset += bytes;
+ srx->skb_copied += bytes;
+ }
+
+ /*
+ * DDP/RDMAP header receive completed. Check if the current
+ * DDP segment starts a new RDMAP message or continues a previously
+ * started RDMAP message.
+ *
+ * Alternating reception of DDP segments (or FPDUs) from incomplete
+ * tagged and untagged RDMAP messages is supported, as long as
+ * the current tagged or untagged message gets eventually completed
+ * w/o intersection from another message of the same type
+ * (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
+ * but not by a READ RESPONSE etc.
+ */
+ if (srx->mpa_crc_hd) {
+ /*
+ * Restart CRC computation
+ */
+ crypto_shash_init(srx->mpa_crc_hd);
+ crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
+ srx->fpdu_part_rcvd);
+ }
+ if (frx->more_ddp_segs) {
+ frx->first_ddp_seg = 0;
+ if (frx->prev_rdmap_op != opcode) {
+ pr_warn("siw: packet intersection: %u : %u\n",
+ frx->prev_rdmap_op, opcode);
+ /*
+ * The last inbound RDMA operation of same type
+ * (tagged or untagged) is left unfinished.
+ * To complete it in error, make it the current
+ * operation again, even with the header already
+ * overwritten. For error handling, only the opcode
+ * and current rx context are relevant.
+ */
+ set_rx_fpdu_context(qp, frx->prev_rdmap_op);
+ __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op);
+ return -EPROTO;
+ }
+ } else {
+ frx->prev_rdmap_op = opcode;
+ frx->first_ddp_seg = 1;
+ }
+ frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1;
+
+ return 0;
+}
+
+static int siw_check_tx_fence(struct siw_qp *qp)
+{
+ struct siw_wqe *tx_waiting = tx_wqe(qp);
+ struct siw_sqe *rreq;
+ int resume_tx = 0, rv = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->orq_lock, flags);
+
+ rreq = orq_get_current(qp);
+
+ /* free current orq entry */
+ WRITE_ONCE(rreq->flags, 0);
+
+ if (qp->tx_ctx.orq_fence) {
+ if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
+ pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
+ qp_id(qp), tx_waiting->wr_status);
+ rv = -EPROTO;
+ goto out;
+ }
+ /* resume SQ processing */
+ if (tx_waiting->sqe.opcode == SIW_OP_READ ||
+ tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
+ rreq = orq_get_tail(qp);
+ if (unlikely(!rreq)) {
+ pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
+ rv = -EPROTO;
+ goto out;
+ }
+ siw_read_to_orq(rreq, &tx_waiting->sqe);
+
+ qp->orq_put++;
+ qp->tx_ctx.orq_fence = 0;
+ resume_tx = 1;
+
+ } else if (siw_orq_empty(qp)) {
+ qp->tx_ctx.orq_fence = 0;
+ resume_tx = 1;
+ } else {
+ pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
+ qp_id(qp), qp->orq_get, qp->orq_put);
+ rv = -EPROTO;
+ }
+ }
+ qp->orq_get++;
+out:
+ spin_unlock_irqrestore(&qp->orq_lock, flags);
+
+ if (resume_tx)
+ rv = siw_sq_start(qp);
+
+ return rv;
+}
+
+/*
+ * siw_rdmap_complete()
+ *
+ * Complete processing of an RDMA message after receiving all
+ * DDP segmens or ABort processing after encountering error case.
+ *
+ * o SENDs + RRESPs will need for completion,
+ * o RREQs need for READ RESPONSE initialization
+ * o WRITEs need memory dereferencing
+ *
+ * TODO: Failed WRITEs need local error to be surfaced.
+ */
+static int siw_rdmap_complete(struct siw_qp *qp, int error)
+{
+ struct siw_rx_stream *srx = &qp->rx_stream;
+ struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu);
+ enum siw_wc_status wc_status = wqe->wc_status;
+ u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl);
+ int rv = 0;
+
+ switch (opcode) {
+ case RDMAP_SEND_SE:
+ case RDMAP_SEND_SE_INVAL:
+ wqe->rqe.flags |= SIW_WQE_SOLICITED;
+ /* Fall through */
+
+ case RDMAP_SEND:
+ case RDMAP_SEND_INVAL:
+ if (wqe->wr_status == SIW_WR_IDLE)
+ break;
+
+ srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
+
+ if (error != 0 && wc_status == SIW_WC_SUCCESS)
+ wc_status = SIW_WC_GENERAL_ERR;
+ /*
+ * Handle STag invalidation request
+ */
+ if (wc_status == SIW_WC_SUCCESS &&
+ (opcode == RDMAP_SEND_INVAL ||
+ opcode == RDMAP_SEND_SE_INVAL)) {
+ rv = siw_invalidate_stag(qp->pd, srx->inval_stag);
+ if (rv) {
+ siw_init_terminate(
+ qp, TERM_ERROR_LAYER_RDMAP,
+ rv == -EACCES ?
+ RDMAP_ETYPE_REMOTE_PROTECTION :
+ RDMAP_ETYPE_REMOTE_OPERATION,
+ RDMAP_ECODE_CANNOT_INVALIDATE, 0);
+
+ wc_status = SIW_WC_REM_INV_REQ_ERR;
+ }
+ rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
+ rv ? 0 : srx->inval_stag,
+ wc_status);
+ } else {
+ rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
+ 0, wc_status);
+ }
+ siw_wqe_put_mem(wqe, SIW_OP_RECEIVE);
+ break;
+
+ case RDMAP_RDMA_READ_RESP:
+ if (wqe->wr_status == SIW_WR_IDLE)
+ break;
+
+ if (error != 0) {
+ if ((srx->state == SIW_GET_HDR &&
+ qp->rx_fpdu->first_ddp_seg) || error == -ENODATA)
+ /* possible RREQ in ORQ left untouched */
+ break;
+
+ if (wc_status == SIW_WC_SUCCESS)
+ wc_status = SIW_WC_GENERAL_ERR;
+ } else if (qp->kernel_verbs &&
+ rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
+ /*
+ * Handle any STag invalidation request
+ */
+ rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey);
+ if (rv) {
+ siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
+ RDMAP_ETYPE_CATASTROPHIC,
+ RDMAP_ECODE_UNSPECIFIED, 0);
+
+ if (wc_status == SIW_WC_SUCCESS) {
+ wc_status = SIW_WC_GENERAL_ERR;
+ error = rv;
+ }
+ }
+ }
+ /*
+ * All errors turn the wqe into signalled.
+ */
+ if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0)
+ rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed,
+ wc_status);
+ siw_wqe_put_mem(wqe, SIW_OP_READ);
+
+ if (!error)
+ rv = siw_check_tx_fence(qp);
+ else
+ /* Disable current ORQ eleement */
+ WRITE_ONCE(orq_get_current(qp)->flags, 0);
+ break;
+
+ case RDMAP_RDMA_READ_REQ:
+ if (!error) {
+ rv = siw_init_rresp(qp, srx);
+ srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
+ }
+ break;
+
+ case RDMAP_RDMA_WRITE:
+ if (wqe->wr_status == SIW_WR_IDLE)
+ break;
+
+ /*
+ * Free References from memory object if
+ * attached to receive context (inbound WRITE).
+ * While a zero-length WRITE is allowed,
+ * no memory reference got created.
+ */
+ if (rx_mem(&qp->rx_tagged)) {
+ siw_mem_put(rx_mem(&qp->rx_tagged));
+ rx_mem(&qp->rx_tagged) = NULL;
+ }
+ break;
+
+ default:
+ break;
+ }
+ wqe->wr_status = SIW_WR_IDLE;
+
+ return rv;
+}
+
+/*
+ * siw_tcp_rx_data()
+ *
+ * Main routine to consume inbound TCP payload
+ *
+ * @rd_desc: read descriptor
+ * @skb: socket buffer
+ * @off: offset in skb
+ * @len: skb->len - offset : payload in skb
+ */
+int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
+ unsigned int off, size_t len)
+{
+ struct siw_qp *qp = rd_desc->arg.data;
+ struct siw_rx_stream *srx = &qp->rx_stream;
+ int rv;
+
+ srx->skb = skb;
+ srx->skb_new = skb->len - off;
+ srx->skb_offset = off;
+ srx->skb_copied = 0;
+
+ siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new);
+
+ while (srx->skb_new) {
+ int run_completion = 1;
+
+ if (unlikely(srx->rx_suspend)) {
+ /* Do not process any more data */
+ srx->skb_copied += srx->skb_new;
+ break;
+ }
+ switch (srx->state) {
+ case SIW_GET_HDR:
+ rv = siw_get_hdr(srx);
+ if (!rv) {
+ srx->fpdu_part_rem =
+ be16_to_cpu(srx->hdr.ctrl.mpa_len) -
+ srx->fpdu_part_rcvd + MPA_HDR_SIZE;
+
+ if (srx->fpdu_part_rem)
+ srx->pad = -srx->fpdu_part_rem & 0x3;
+ else
+ srx->pad = 0;
+
+ srx->state = SIW_GET_DATA_START;
+ srx->fpdu_part_rcvd = 0;
+ }
+ break;
+
+ case SIW_GET_DATA_MORE:
+ /*
+ * Another data fragment of the same DDP segment.
+ * Setting first_ddp_seg = 0 avoids repeating
+ * initializations that shall occur only once per
+ * DDP segment.
+ */
+ qp->rx_fpdu->first_ddp_seg = 0;
+ /* Fall through */
+
+ case SIW_GET_DATA_START:
+ /*
+ * Headers will be checked by the opcode-specific
+ * data receive function below.
+ */
+ rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp);
+ if (!rv) {
+ int mpa_len =
+ be16_to_cpu(srx->hdr.ctrl.mpa_len)
+ + MPA_HDR_SIZE;
+
+ srx->fpdu_part_rem = (-mpa_len & 0x3)
+ + MPA_CRC_SIZE;
+ srx->fpdu_part_rcvd = 0;
+ srx->state = SIW_GET_TRAILER;
+ } else {
+ if (unlikely(rv == -ECONNRESET))
+ run_completion = 0;
+ else
+ srx->state = SIW_GET_DATA_MORE;
+ }
+ break;
+
+ case SIW_GET_TRAILER:
+ /*
+ * read CRC + any padding
+ */
+ rv = siw_get_trailer(qp, srx);
+ if (likely(!rv)) {
+ /*
+ * FPDU completed.
+ * complete RDMAP message if last fragment
+ */
+ srx->state = SIW_GET_HDR;
+ srx->fpdu_part_rcvd = 0;
+
+ if (!(srx->hdr.ctrl.ddp_rdmap_ctrl &
+ DDP_FLAG_LAST))
+ /* more frags */
+ break;
+
+ rv = siw_rdmap_complete(qp, 0);
+ run_completion = 0;
+ }
+ break;
+
+ default:
+ pr_warn("QP[%u]: RX out of state\n", qp_id(qp));
+ rv = -EPROTO;
+ run_completion = 0;
+ }
+ if (unlikely(rv != 0 && rv != -EAGAIN)) {
+ if ((srx->state > SIW_GET_HDR ||
+ qp->rx_fpdu->more_ddp_segs) && run_completion)
+ siw_rdmap_complete(qp, rv);
+
+ siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
+ srx->state);
+
+ siw_qp_cm_drop(qp, 1);
+
+ break;
+ }
+ if (rv) {
+ siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n",
+ srx->state, srx->fpdu_part_rem);
+ break;
+ }
+ }
+ return srx->skb_copied;
+}
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
new file mode 100644
index 000000000000..43020d2040fc
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -0,0 +1,1269 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <net/tcp.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "siw.h"
+#include "siw_verbs.h"
+#include "siw_mem.h"
+
+#define MAX_HDR_INLINE \
+ (((uint32_t)(sizeof(struct siw_rreq_pkt) - \
+ sizeof(struct iwarp_send))) & 0xF8)
+
+static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx)
+{
+ struct siw_pbl *pbl = mem->pbl;
+ u64 offset = addr - mem->va;
+ u64 paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx);
+
+ if (paddr)
+ return virt_to_page(paddr);
+
+ return NULL;
+}
+
+/*
+ * Copy short payload at provided destination payload address
+ */
+static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr)
+{
+ struct siw_wqe *wqe = &c_tx->wqe_active;
+ struct siw_sge *sge = &wqe->sqe.sge[0];
+ u32 bytes = sge->length;
+
+ if (bytes > MAX_HDR_INLINE || wqe->sqe.num_sge != 1)
+ return MAX_HDR_INLINE + 1;
+
+ if (!bytes)
+ return 0;
+
+ if (tx_flags(wqe) & SIW_WQE_INLINE) {
+ memcpy((void *)paddr, &wqe->sqe.sge[1], bytes);
+ } else {
+ struct siw_mem *mem = wqe->mem[0];
+
+ if (!mem->mem_obj) {
+ /* Kernel client using kva */
+ memcpy((void *)paddr, (void *)sge->laddr, bytes);
+ } else if (c_tx->in_syscall) {
+ if (copy_from_user((void *)paddr,
+ (const void __user *)sge->laddr,
+ bytes))
+ return -EFAULT;
+ } else {
+ unsigned int off = sge->laddr & ~PAGE_MASK;
+ struct page *p;
+ char *buffer;
+ int pbl_idx = 0;
+
+ if (!mem->is_pbl)
+ p = siw_get_upage(mem->umem, sge->laddr);
+ else
+ p = siw_get_pblpage(mem, sge->laddr, &pbl_idx);
+
+ if (unlikely(!p))
+ return -EFAULT;
+
+ buffer = kmap_atomic(p);
+
+ if (likely(PAGE_SIZE - off >= bytes)) {
+ memcpy((void *)paddr, buffer + off, bytes);
+ kunmap_atomic(buffer);
+ } else {
+ unsigned long part = bytes - (PAGE_SIZE - off);
+
+ memcpy((void *)paddr, buffer + off, part);
+ kunmap_atomic(buffer);
+
+ if (!mem->is_pbl)
+ p = siw_get_upage(mem->umem,
+ sge->laddr + part);
+ else
+ p = siw_get_pblpage(mem,
+ sge->laddr + part,
+ &pbl_idx);
+ if (unlikely(!p))
+ return -EFAULT;
+
+ buffer = kmap_atomic(p);
+ memcpy((void *)(paddr + part), buffer,
+ bytes - part);
+ kunmap_atomic(buffer);
+ }
+ }
+ }
+ return (int)bytes;
+}
+
+#define PKT_FRAGMENTED 1
+#define PKT_COMPLETE 0
+
+/*
+ * siw_qp_prepare_tx()
+ *
+ * Prepare tx state for sending out one fpdu. Builds complete pkt
+ * if no user data or only immediate data are present.
+ *
+ * returns PKT_COMPLETE if complete pkt built, PKT_FRAGMENTED otherwise.
+ */
+static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx)
+{
+ struct siw_wqe *wqe = &c_tx->wqe_active;
+ char *crc = NULL;
+ int data = 0;
+
+ switch (tx_type(wqe)) {
+ case SIW_OP_READ:
+ case SIW_OP_READ_LOCAL_INV:
+ memcpy(&c_tx->pkt.ctrl,
+ &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
+ sizeof(struct iwarp_ctrl));
+
+ c_tx->pkt.rreq.rsvd = 0;
+ c_tx->pkt.rreq.ddp_qn = htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
+ c_tx->pkt.rreq.ddp_msn =
+ htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]);
+ c_tx->pkt.rreq.ddp_mo = 0;
+ c_tx->pkt.rreq.sink_stag = htonl(wqe->sqe.sge[0].lkey);
+ c_tx->pkt.rreq.sink_to =
+ cpu_to_be64(wqe->sqe.sge[0].laddr);
+ c_tx->pkt.rreq.source_stag = htonl(wqe->sqe.rkey);
+ c_tx->pkt.rreq.source_to = cpu_to_be64(wqe->sqe.raddr);
+ c_tx->pkt.rreq.read_size = htonl(wqe->sqe.sge[0].length);
+
+ c_tx->ctrl_len = sizeof(struct iwarp_rdma_rreq);
+ crc = (char *)&c_tx->pkt.rreq_pkt.crc;
+ break;
+
+ case SIW_OP_SEND:
+ if (tx_flags(wqe) & SIW_WQE_SOLICITED)
+ memcpy(&c_tx->pkt.ctrl,
+ &iwarp_pktinfo[RDMAP_SEND_SE].ctrl,
+ sizeof(struct iwarp_ctrl));
+ else
+ memcpy(&c_tx->pkt.ctrl, &iwarp_pktinfo[RDMAP_SEND].ctrl,
+ sizeof(struct iwarp_ctrl));
+
+ c_tx->pkt.send.ddp_qn = RDMAP_UNTAGGED_QN_SEND;
+ c_tx->pkt.send.ddp_msn =
+ htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
+ c_tx->pkt.send.ddp_mo = 0;
+
+ c_tx->pkt.send_inv.inval_stag = 0;
+
+ c_tx->ctrl_len = sizeof(struct iwarp_send);
+
+ crc = (char *)&c_tx->pkt.send_pkt.crc;
+ data = siw_try_1seg(c_tx, (u64)crc);
+ break;
+
+ case SIW_OP_SEND_REMOTE_INV:
+ if (tx_flags(wqe) & SIW_WQE_SOLICITED)
+ memcpy(&c_tx->pkt.ctrl,
+ &iwarp_pktinfo[RDMAP_SEND_SE_INVAL].ctrl,
+ sizeof(struct iwarp_ctrl));
+ else
+ memcpy(&c_tx->pkt.ctrl,
+ &iwarp_pktinfo[RDMAP_SEND_INVAL].ctrl,
+ sizeof(struct iwarp_ctrl));
+
+ c_tx->pkt.send.ddp_qn = RDMAP_UNTAGGED_QN_SEND;
+ c_tx->pkt.send.ddp_msn =
+ htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
+ c_tx->pkt.send.ddp_mo = 0;
+
+ c_tx->pkt.send_inv.inval_stag = cpu_to_be32(wqe->sqe.rkey);
+
+ c_tx->ctrl_len = sizeof(struct iwarp_send_inv);
+
+ crc = (char *)&c_tx->pkt.send_pkt.crc;
+ data = siw_try_1seg(c_tx, (u64)crc);
+ break;
+
+ case SIW_OP_WRITE:
+ memcpy(&c_tx->pkt.ctrl, &iwarp_pktinfo[RDMAP_RDMA_WRITE].ctrl,
+ sizeof(struct iwarp_ctrl));
+
+ c_tx->pkt.rwrite.sink_stag = htonl(wqe->sqe.rkey);
+ c_tx->pkt.rwrite.sink_to = cpu_to_be64(wqe->sqe.raddr);
+ c_tx->ctrl_len = sizeof(struct iwarp_rdma_write);
+
+ crc = (char *)&c_tx->pkt.write_pkt.crc;
+ data = siw_try_1seg(c_tx, (u64)crc);
+ break;
+
+ case SIW_OP_READ_RESPONSE:
+ memcpy(&c_tx->pkt.ctrl,
+ &iwarp_pktinfo[RDMAP_RDMA_READ_RESP].ctrl,
+ sizeof(struct iwarp_ctrl));
+
+ /* NBO */
+ c_tx->pkt.rresp.sink_stag = cpu_to_be32(wqe->sqe.rkey);
+ c_tx->pkt.rresp.sink_to = cpu_to_be64(wqe->sqe.raddr);
+
+ c_tx->ctrl_len = sizeof(struct iwarp_rdma_rresp);
+
+ crc = (char *)&c_tx->pkt.write_pkt.crc;
+ data = siw_try_1seg(c_tx, (u64)crc);
+ break;
+
+ default:
+ siw_dbg_qp(tx_qp(c_tx), "stale wqe type %d\n", tx_type(wqe));
+ return -EOPNOTSUPP;
+ }
+ if (unlikely(data < 0))
+ return data;
+
+ c_tx->ctrl_sent = 0;
+
+ if (data <= MAX_HDR_INLINE) {
+ if (data) {
+ wqe->processed = data;
+
+ c_tx->pkt.ctrl.mpa_len =
+ htons(c_tx->ctrl_len + data - MPA_HDR_SIZE);
+
+ /* Add pad, if needed */
+ data += -(int)data & 0x3;
+ /* advance CRC location after payload */
+ crc += data;
+ c_tx->ctrl_len += data;
+
+ if (!(c_tx->pkt.ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED))
+ c_tx->pkt.c_untagged.ddp_mo = 0;
+ else
+ c_tx->pkt.c_tagged.ddp_to =
+ cpu_to_be64(wqe->sqe.raddr);
+ }
+
+ *(u32 *)crc = 0;
+ /*
+ * Do complete CRC if enabled and short packet
+ */
+ if (c_tx->mpa_crc_hd) {
+ crypto_shash_init(c_tx->mpa_crc_hd);
+ if (crypto_shash_update(c_tx->mpa_crc_hd,
+ (u8 *)&c_tx->pkt,
+ c_tx->ctrl_len))
+ return -EINVAL;
+ crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)crc);
+ }
+ c_tx->ctrl_len += MPA_CRC_SIZE;
+
+ return PKT_COMPLETE;
+ }
+ c_tx->ctrl_len += MPA_CRC_SIZE;
+ c_tx->sge_idx = 0;
+ c_tx->sge_off = 0;
+ c_tx->pbl_idx = 0;
+
+ /*
+ * Allow direct sending out of user buffer if WR is non signalled
+ * and payload is over threshold.
+ * Per RDMA verbs, the application should not change the send buffer
+ * until the work completed. In iWarp, work completion is only
+ * local delivery to TCP. TCP may reuse the buffer for
+ * retransmission. Changing unsent data also breaks the CRC,
+ * if applied.
+ */
+ if (c_tx->zcopy_tx && wqe->bytes >= SENDPAGE_THRESH &&
+ !(tx_flags(wqe) & SIW_WQE_SIGNALLED))
+ c_tx->use_sendpage = 1;
+ else
+ c_tx->use_sendpage = 0;
+
+ return PKT_FRAGMENTED;
+}
+
+/*
+ * Send out one complete control type FPDU, or header of FPDU carrying
+ * data. Used for fixed sized packets like Read.Requests or zero length
+ * SENDs, WRITEs, READ.Responses, or header only.
+ */
+static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s,
+ int flags)
+{
+ struct msghdr msg = { .msg_flags = flags };
+ struct kvec iov = { .iov_base =
+ (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent,
+ .iov_len = c_tx->ctrl_len - c_tx->ctrl_sent };
+
+ int rv = kernel_sendmsg(s, &msg, &iov, 1,
+ c_tx->ctrl_len - c_tx->ctrl_sent);
+
+ if (rv >= 0) {
+ c_tx->ctrl_sent += rv;
+
+ if (c_tx->ctrl_sent == c_tx->ctrl_len)
+ rv = 0;
+ else
+ rv = -EAGAIN;
+ }
+ return rv;
+}
+
+/*
+ * 0copy TCP transmit interface: Use do_tcp_sendpages.
+ *
+ * Using sendpage to push page by page appears to be less efficient
+ * than using sendmsg, even if data are copied.
+ *
+ * A general performance limitation might be the extra four bytes
+ * trailer checksum segment to be pushed after user data.
+ */
+static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset,
+ size_t size)
+{
+ struct sock *sk = s->sk;
+ int i = 0, rv = 0, sent = 0,
+ flags = MSG_MORE | MSG_DONTWAIT | MSG_SENDPAGE_NOTLAST;
+
+ while (size) {
+ size_t bytes = min_t(size_t, PAGE_SIZE - offset, size);
+
+ if (size + offset <= PAGE_SIZE)
+ flags = MSG_MORE | MSG_DONTWAIT;
+
+ tcp_rate_check_app_limited(sk);
+try_page_again:
+ lock_sock(sk);
+ rv = do_tcp_sendpages(sk, page[i], offset, bytes, flags);
+ release_sock(sk);
+
+ if (rv > 0) {
+ size -= rv;
+ sent += rv;
+ if (rv != bytes) {
+ offset += rv;
+ bytes -= rv;
+ goto try_page_again;
+ }
+ offset = 0;
+ } else {
+ if (rv == -EAGAIN || rv == 0)
+ break;
+ return rv;
+ }
+ i++;
+ }
+ return sent;
+}
+
+/*
+ * siw_0copy_tx()
+ *
+ * Pushes list of pages to TCP socket. If pages from multiple
+ * SGE's, all referenced pages of each SGE are pushed in one
+ * shot.
+ */
+static int siw_0copy_tx(struct socket *s, struct page **page,
+ struct siw_sge *sge, unsigned int offset,
+ unsigned int size)
+{
+ int i = 0, sent = 0, rv;
+ int sge_bytes = min(sge->length - offset, size);
+
+ offset = (sge->laddr + offset) & ~PAGE_MASK;
+
+ while (sent != size) {
+ rv = siw_tcp_sendpages(s, &page[i], offset, sge_bytes);
+ if (rv >= 0) {
+ sent += rv;
+ if (size == sent || sge_bytes > rv)
+ break;
+
+ i += PAGE_ALIGN(sge_bytes + offset) >> PAGE_SHIFT;
+ sge++;
+ sge_bytes = min(sge->length, size - sent);
+ offset = sge->laddr & ~PAGE_MASK;
+ } else {
+ sent = rv;
+ break;
+ }
+ }
+ return sent;
+}
+
+#define MAX_TRAILER (MPA_CRC_SIZE + 4)
+
+static void siw_unmap_pages(struct page **pages, int hdr_len, int num_maps)
+{
+ if (hdr_len) {
+ ++pages;
+ --num_maps;
+ }
+ while (num_maps-- > 0) {
+ kunmap(*pages);
+ pages++;
+ }
+}
+
+/*
+ * siw_tx_hdt() tries to push a complete packet to TCP where all
+ * packet fragments are referenced by the elements of one iovec.
+ * For the data portion, each involved page must be referenced by
+ * one extra element. All sge's data can be non-aligned to page
+ * boundaries. Two more elements are referencing iWARP header
+ * and trailer:
+ * MAX_ARRAY = 64KB/PAGE_SIZE + 1 + (2 * (SIW_MAX_SGE - 1) + HDR + TRL
+ */
+#define MAX_ARRAY ((0xffff / PAGE_SIZE) + 1 + (2 * (SIW_MAX_SGE - 1) + 2))
+
+/*
+ * Write out iov referencing hdr, data and trailer of current FPDU.
+ * Update transmit state dependent on write return status
+ */
+static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
+{
+ struct siw_wqe *wqe = &c_tx->wqe_active;
+ struct siw_sge *sge = &wqe->sqe.sge[c_tx->sge_idx];
+ struct kvec iov[MAX_ARRAY];
+ struct page *page_array[MAX_ARRAY];
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
+
+ int seg = 0, do_crc = c_tx->do_crc, is_kva = 0, rv;
+ unsigned int data_len = c_tx->bytes_unsent, hdr_len = 0, trl_len = 0,
+ sge_off = c_tx->sge_off, sge_idx = c_tx->sge_idx,
+ pbl_idx = c_tx->pbl_idx;
+
+ if (c_tx->state == SIW_SEND_HDR) {
+ if (c_tx->use_sendpage) {
+ rv = siw_tx_ctrl(c_tx, s, MSG_DONTWAIT | MSG_MORE);
+ if (rv)
+ goto done;
+
+ c_tx->state = SIW_SEND_DATA;
+ } else {
+ iov[0].iov_base =
+ (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent;
+ iov[0].iov_len = hdr_len =
+ c_tx->ctrl_len - c_tx->ctrl_sent;
+ seg = 1;
+ }
+ }
+
+ wqe->processed += data_len;
+
+ while (data_len) { /* walk the list of SGE's */
+ unsigned int sge_len = min(sge->length - sge_off, data_len);
+ unsigned int fp_off = (sge->laddr + sge_off) & ~PAGE_MASK;
+ struct siw_mem *mem;
+
+ if (!(tx_flags(wqe) & SIW_WQE_INLINE)) {
+ mem = wqe->mem[sge_idx];
+ if (!mem->mem_obj)
+ is_kva = 1;
+ } else {
+ is_kva = 1;
+ }
+ if (is_kva && !c_tx->use_sendpage) {
+ /*
+ * tx from kernel virtual address: either inline data
+ * or memory region with assigned kernel buffer
+ */
+ iov[seg].iov_base = (void *)(sge->laddr + sge_off);
+ iov[seg].iov_len = sge_len;
+
+ if (do_crc)
+ crypto_shash_update(c_tx->mpa_crc_hd,
+ iov[seg].iov_base,
+ sge_len);
+ sge_off += sge_len;
+ data_len -= sge_len;
+ seg++;
+ goto sge_done;
+ }
+
+ while (sge_len) {
+ size_t plen = min((int)PAGE_SIZE - fp_off, sge_len);
+
+ if (!is_kva) {
+ struct page *p;
+
+ if (mem->is_pbl)
+ p = siw_get_pblpage(
+ mem, sge->laddr + sge_off,
+ &pbl_idx);
+ else
+ p = siw_get_upage(mem->umem,
+ sge->laddr + sge_off);
+ if (unlikely(!p)) {
+ if (hdr_len)
+ seg--;
+ if (!c_tx->use_sendpage && seg) {
+ siw_unmap_pages(page_array,
+ hdr_len, seg);
+ }
+ wqe->processed -= c_tx->bytes_unsent;
+ rv = -EFAULT;
+ goto done_crc;
+ }
+ page_array[seg] = p;
+
+ if (!c_tx->use_sendpage) {
+ iov[seg].iov_base = kmap(p) + fp_off;
+ iov[seg].iov_len = plen;
+ if (do_crc)
+ crypto_shash_update(
+ c_tx->mpa_crc_hd,
+ iov[seg].iov_base,
+ plen);
+ } else if (do_crc)
+ crypto_shash_update(
+ c_tx->mpa_crc_hd,
+ page_address(p) + fp_off,
+ plen);
+ } else {
+ u64 pa = ((sge->laddr + sge_off) & PAGE_MASK);
+
+ page_array[seg] = virt_to_page(pa);
+ if (do_crc)
+ crypto_shash_update(
+ c_tx->mpa_crc_hd,
+ (void *)(sge->laddr + sge_off),
+ plen);
+ }
+
+ sge_len -= plen;
+ sge_off += plen;
+ data_len -= plen;
+ fp_off = 0;
+
+ if (++seg > (int)MAX_ARRAY) {
+ siw_dbg_qp(tx_qp(c_tx), "to many fragments\n");
+ if (!is_kva && !c_tx->use_sendpage) {
+ siw_unmap_pages(page_array, hdr_len,
+ seg - 1);
+ }
+ wqe->processed -= c_tx->bytes_unsent;
+ rv = -EMSGSIZE;
+ goto done_crc;
+ }
+ }
+sge_done:
+ /* Update SGE variables at end of SGE */
+ if (sge_off == sge->length &&
+ (data_len != 0 || wqe->processed < wqe->bytes)) {
+ sge_idx++;
+ sge++;
+ sge_off = 0;
+ }
+ }
+ /* trailer */
+ if (likely(c_tx->state != SIW_SEND_TRAILER)) {
+ iov[seg].iov_base = &c_tx->trailer.pad[4 - c_tx->pad];
+ iov[seg].iov_len = trl_len = MAX_TRAILER - (4 - c_tx->pad);
+ } else {
+ iov[seg].iov_base = &c_tx->trailer.pad[c_tx->ctrl_sent];
+ iov[seg].iov_len = trl_len = MAX_TRAILER - c_tx->ctrl_sent;
+ }
+
+ if (c_tx->pad) {
+ *(u32 *)c_tx->trailer.pad = 0;
+ if (do_crc)
+ crypto_shash_update(c_tx->mpa_crc_hd,
+ (u8 *)&c_tx->trailer.crc - c_tx->pad,
+ c_tx->pad);
+ }
+ if (!c_tx->mpa_crc_hd)
+ c_tx->trailer.crc = 0;
+ else if (do_crc)
+ crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)&c_tx->trailer.crc);
+
+ data_len = c_tx->bytes_unsent;
+
+ if (c_tx->use_sendpage) {
+ rv = siw_0copy_tx(s, page_array, &wqe->sqe.sge[c_tx->sge_idx],
+ c_tx->sge_off, data_len);
+ if (rv == data_len) {
+ rv = kernel_sendmsg(s, &msg, &iov[seg], 1, trl_len);
+ if (rv > 0)
+ rv += data_len;
+ else
+ rv = data_len;
+ }
+ } else {
+ rv = kernel_sendmsg(s, &msg, iov, seg + 1,
+ hdr_len + data_len + trl_len);
+ if (!is_kva)
+ siw_unmap_pages(page_array, hdr_len, seg);
+ }
+ if (rv < (int)hdr_len) {
+ /* Not even complete hdr pushed or negative rv */
+ wqe->processed -= data_len;
+ if (rv >= 0) {
+ c_tx->ctrl_sent += rv;
+ rv = -EAGAIN;
+ }
+ goto done_crc;
+ }
+ rv -= hdr_len;
+
+ if (rv >= (int)data_len) {
+ /* all user data pushed to TCP or no data to push */
+ if (data_len > 0 && wqe->processed < wqe->bytes) {
+ /* Save the current state for next tx */
+ c_tx->sge_idx = sge_idx;
+ c_tx->sge_off = sge_off;
+ c_tx->pbl_idx = pbl_idx;
+ }
+ rv -= data_len;
+
+ if (rv == trl_len) /* all pushed */
+ rv = 0;
+ else {
+ c_tx->state = SIW_SEND_TRAILER;
+ c_tx->ctrl_len = MAX_TRAILER;
+ c_tx->ctrl_sent = rv + 4 - c_tx->pad;
+ c_tx->bytes_unsent = 0;
+ rv = -EAGAIN;
+ }
+
+ } else if (data_len > 0) {
+ /* Maybe some user data pushed to TCP */
+ c_tx->state = SIW_SEND_DATA;
+ wqe->processed -= data_len - rv;
+
+ if (rv) {
+ /*
+ * Some bytes out. Recompute tx state based
+ * on old state and bytes pushed
+ */
+ unsigned int sge_unsent;
+
+ c_tx->bytes_unsent -= rv;
+ sge = &wqe->sqe.sge[c_tx->sge_idx];
+ sge_unsent = sge->length - c_tx->sge_off;
+
+ while (sge_unsent <= rv) {
+ rv -= sge_unsent;
+ c_tx->sge_idx++;
+ c_tx->sge_off = 0;
+ sge++;
+ sge_unsent = sge->length;
+ }
+ c_tx->sge_off += rv;
+ }
+ rv = -EAGAIN;
+ }
+done_crc:
+ c_tx->do_crc = 0;
+done:
+ return rv;
+}
+
+static void siw_update_tcpseg(struct siw_iwarp_tx *c_tx,
+ struct socket *s)
+{
+ struct tcp_sock *tp = tcp_sk(s->sk);
+
+ if (tp->gso_segs) {
+ if (c_tx->gso_seg_limit == 0)
+ c_tx->tcp_seglen = tp->mss_cache * tp->gso_segs;
+ else
+ c_tx->tcp_seglen =
+ tp->mss_cache *
+ min_t(u16, c_tx->gso_seg_limit, tp->gso_segs);
+ } else {
+ c_tx->tcp_seglen = tp->mss_cache;
+ }
+ /* Loopback may give odd numbers */
+ c_tx->tcp_seglen &= 0xfffffff8;
+}
+
+/*
+ * siw_prepare_fpdu()
+ *
+ * Prepares transmit context to send out one FPDU if FPDU will contain
+ * user data and user data are not immediate data.
+ * Computes maximum FPDU length to fill up TCP MSS if possible.
+ *
+ * @qp: QP from which to transmit
+ * @wqe: Current WQE causing transmission
+ *
+ * TODO: Take into account real available sendspace on socket
+ * to avoid header misalignment due to send pausing within
+ * fpdu transmission
+ */
+static void siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe)
+{
+ struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
+ int data_len;
+
+ c_tx->ctrl_len =
+ iwarp_pktinfo[__rdmap_get_opcode(&c_tx->pkt.ctrl)].hdr_len;
+ c_tx->ctrl_sent = 0;
+
+ /*
+ * Update target buffer offset if any
+ */
+ if (!(c_tx->pkt.ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED))
+ /* Untagged message */
+ c_tx->pkt.c_untagged.ddp_mo = cpu_to_be32(wqe->processed);
+ else /* Tagged message */
+ c_tx->pkt.c_tagged.ddp_to =
+ cpu_to_be64(wqe->sqe.raddr + wqe->processed);
+
+ data_len = wqe->bytes - wqe->processed;
+ if (data_len + c_tx->ctrl_len + MPA_CRC_SIZE > c_tx->tcp_seglen) {
+ /* Trim DDP payload to fit into current TCP segment */
+ data_len = c_tx->tcp_seglen - (c_tx->ctrl_len + MPA_CRC_SIZE);
+ c_tx->pkt.ctrl.ddp_rdmap_ctrl &= ~DDP_FLAG_LAST;
+ c_tx->pad = 0;
+ } else {
+ c_tx->pkt.ctrl.ddp_rdmap_ctrl |= DDP_FLAG_LAST;
+ c_tx->pad = -data_len & 0x3;
+ }
+ c_tx->bytes_unsent = data_len;
+
+ c_tx->pkt.ctrl.mpa_len =
+ htons(c_tx->ctrl_len + data_len - MPA_HDR_SIZE);
+
+ /*
+ * Init MPA CRC computation
+ */
+ if (c_tx->mpa_crc_hd) {
+ crypto_shash_init(c_tx->mpa_crc_hd);
+ crypto_shash_update(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt,
+ c_tx->ctrl_len);
+ c_tx->do_crc = 1;
+ }
+}
+
+/*
+ * siw_check_sgl_tx()
+ *
+ * Check permissions for a list of SGE's (SGL).
+ * A successful check will have all memory referenced
+ * for transmission resolved and assigned to the WQE.
+ *
+ * @pd: Protection Domain SGL should belong to
+ * @wqe: WQE to be checked
+ * @perms: requested access permissions
+ *
+ */
+
+static int siw_check_sgl_tx(struct ib_pd *pd, struct siw_wqe *wqe,
+ enum ib_access_flags perms)
+{
+ struct siw_sge *sge = &wqe->sqe.sge[0];
+ int i, len, num_sge = wqe->sqe.num_sge;
+
+ if (unlikely(num_sge > SIW_MAX_SGE))
+ return -EINVAL;
+
+ for (i = 0, len = 0; num_sge; num_sge--, i++, sge++) {
+ /*
+ * rdma verbs: do not check stag for a zero length sge
+ */
+ if (sge->length) {
+ int rv = siw_check_sge(pd, sge, &wqe->mem[i], perms, 0,
+ sge->length);
+
+ if (unlikely(rv != E_ACCESS_OK))
+ return rv;
+ }
+ len += sge->length;
+ }
+ return len;
+}
+
+/*
+ * siw_qp_sq_proc_tx()
+ *
+ * Process one WQE which needs transmission on the wire.
+ */
+static int siw_qp_sq_proc_tx(struct siw_qp *qp, struct siw_wqe *wqe)
+{
+ struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
+ struct socket *s = qp->attrs.sk;
+ int rv = 0, burst_len = qp->tx_ctx.burst;
+ enum rdmap_ecode ecode = RDMAP_ECODE_CATASTROPHIC_STREAM;
+
+ if (unlikely(wqe->wr_status == SIW_WR_IDLE))
+ return 0;
+
+ if (!burst_len)
+ burst_len = SQ_USER_MAXBURST;
+
+ if (wqe->wr_status == SIW_WR_QUEUED) {
+ if (!(wqe->sqe.flags & SIW_WQE_INLINE)) {
+ if (tx_type(wqe) == SIW_OP_READ_RESPONSE)
+ wqe->sqe.num_sge = 1;
+
+ if (tx_type(wqe) != SIW_OP_READ &&
+ tx_type(wqe) != SIW_OP_READ_LOCAL_INV) {
+ /*
+ * Reference memory to be tx'd w/o checking
+ * access for LOCAL_READ permission, since
+ * not defined in RDMA core.
+ */
+ rv = siw_check_sgl_tx(qp->pd, wqe, 0);
+ if (rv < 0) {
+ if (tx_type(wqe) ==
+ SIW_OP_READ_RESPONSE)
+ ecode = siw_rdmap_error(-rv);
+ rv = -EINVAL;
+ goto tx_error;
+ }
+ wqe->bytes = rv;
+ } else {
+ wqe->bytes = 0;
+ }
+ } else {
+ wqe->bytes = wqe->sqe.sge[0].length;
+ if (!qp->kernel_verbs) {
+ if (wqe->bytes > SIW_MAX_INLINE) {
+ rv = -EINVAL;
+ goto tx_error;
+ }
+ wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1];
+ }
+ }
+ wqe->wr_status = SIW_WR_INPROGRESS;
+ wqe->processed = 0;
+
+ siw_update_tcpseg(c_tx, s);
+
+ rv = siw_qp_prepare_tx(c_tx);
+ if (rv == PKT_FRAGMENTED) {
+ c_tx->state = SIW_SEND_HDR;
+ siw_prepare_fpdu(qp, wqe);
+ } else if (rv == PKT_COMPLETE) {
+ c_tx->state = SIW_SEND_SHORT_FPDU;
+ } else {
+ goto tx_error;
+ }
+ }
+
+next_segment:
+ siw_dbg_qp(qp, "wr type %d, state %d, data %u, sent %u, id %llx\n",
+ tx_type(wqe), wqe->wr_status, wqe->bytes, wqe->processed,
+ wqe->sqe.id);
+
+ if (--burst_len == 0) {
+ rv = -EINPROGRESS;
+ goto tx_done;
+ }
+ if (c_tx->state == SIW_SEND_SHORT_FPDU) {
+ enum siw_opcode tx_type = tx_type(wqe);
+ unsigned int msg_flags;
+
+ if (siw_sq_empty(qp) || !siw_tcp_nagle || burst_len == 1)
+ /*
+ * End current TCP segment, if SQ runs empty,
+ * or siw_tcp_nagle is not set, or we bail out
+ * soon due to no burst credit left.
+ */
+ msg_flags = MSG_DONTWAIT;
+ else
+ msg_flags = MSG_DONTWAIT | MSG_MORE;
+
+ rv = siw_tx_ctrl(c_tx, s, msg_flags);
+
+ if (!rv && tx_type != SIW_OP_READ &&
+ tx_type != SIW_OP_READ_LOCAL_INV)
+ wqe->processed = wqe->bytes;
+
+ goto tx_done;
+
+ } else {
+ rv = siw_tx_hdt(c_tx, s);
+ }
+ if (!rv) {
+ /*
+ * One segment sent. Processing completed if last
+ * segment, Do next segment otherwise.
+ */
+ if (unlikely(c_tx->tx_suspend)) {
+ /*
+ * Verbs, 6.4.: Try stopping sending after a full
+ * DDP segment if the connection goes down
+ * (== peer halfclose)
+ */
+ rv = -ECONNABORTED;
+ goto tx_done;
+ }
+ if (c_tx->pkt.ctrl.ddp_rdmap_ctrl & DDP_FLAG_LAST) {
+ siw_dbg_qp(qp, "WQE completed\n");
+ goto tx_done;
+ }
+ c_tx->state = SIW_SEND_HDR;
+
+ siw_update_tcpseg(c_tx, s);
+
+ siw_prepare_fpdu(qp, wqe);
+ goto next_segment;
+ }
+tx_done:
+ qp->tx_ctx.burst = burst_len;
+ return rv;
+
+tx_error:
+ if (ecode != RDMAP_ECODE_CATASTROPHIC_STREAM)
+ siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
+ RDMAP_ETYPE_REMOTE_PROTECTION, ecode, 1);
+ else
+ siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
+ RDMAP_ETYPE_CATASTROPHIC,
+ RDMAP_ECODE_UNSPECIFIED, 1);
+ return rv;
+}
+
+static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe)
+{
+ struct ib_mr *base_mr = (struct ib_mr *)sqe->base_mr;
+ struct siw_device *sdev = to_siw_dev(pd->device);
+ struct siw_mem *mem = siw_mem_id2obj(sdev, sqe->rkey >> 8);
+ int rv = 0;
+
+ siw_dbg_pd(pd, "STag 0x%08x\n", sqe->rkey);
+
+ if (unlikely(!mem || !base_mr)) {
+ pr_warn("siw: fastreg: STag 0x%08x unknown\n", sqe->rkey);
+ return -EINVAL;
+ }
+ if (unlikely(base_mr->rkey >> 8 != sqe->rkey >> 8)) {
+ pr_warn("siw: fastreg: STag 0x%08x: bad MR\n", sqe->rkey);
+ rv = -EINVAL;
+ goto out;
+ }
+ if (unlikely(mem->pd != pd)) {
+ pr_warn("siw: fastreg: PD mismatch\n");
+ rv = -EINVAL;
+ goto out;
+ }
+ if (unlikely(mem->stag_valid)) {
+ pr_warn("siw: fastreg: STag 0x%08x already valid\n", sqe->rkey);
+ rv = -EINVAL;
+ goto out;
+ }
+ /* Refresh STag since user may have changed key part */
+ mem->stag = sqe->rkey;
+ mem->perms = sqe->access;
+
+ siw_dbg_mem(mem, "STag now valid, MR va: 0x%016llx -> 0x%016llx\n",
+ mem->va, base_mr->iova);
+ mem->va = base_mr->iova;
+ mem->stag_valid = 1;
+out:
+ siw_mem_put(mem);
+ return rv;
+}
+
+static int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe)
+{
+ int rv;
+
+ switch (tx_type(wqe)) {
+ case SIW_OP_REG_MR:
+ rv = siw_fastreg_mr(qp->pd, &wqe->sqe);
+ break;
+
+ case SIW_OP_INVAL_STAG:
+ rv = siw_invalidate_stag(qp->pd, wqe->sqe.rkey);
+ break;
+
+ default:
+ rv = -EINVAL;
+ }
+ return rv;
+}
+
+/*
+ * siw_qp_sq_process()
+ *
+ * Core TX path routine for RDMAP/DDP/MPA using a TCP kernel socket.
+ * Sends RDMAP payload for the current SQ WR @wqe of @qp in one or more
+ * MPA FPDUs, each containing a DDP segment.
+ *
+ * SQ processing may occur in user context as a result of posting
+ * new WQE's or from siw_sq_work_handler() context. Processing in
+ * user context is limited to non-kernel verbs users.
+ *
+ * SQ processing may get paused anytime, possibly in the middle of a WR
+ * or FPDU, if insufficient send space is available. SQ processing
+ * gets resumed from siw_sq_work_handler(), if send space becomes
+ * available again.
+ *
+ * Must be called with the QP state read-locked.
+ *
+ * Note:
+ * An outbound RREQ can be satisfied by the corresponding RRESP
+ * _before_ it gets assigned to the ORQ. This happens regularly
+ * in RDMA READ via loopback case. Since both outbound RREQ and
+ * inbound RRESP can be handled by the same CPU, locking the ORQ
+ * is dead-lock prone and thus not an option. With that, the
+ * RREQ gets assigned to the ORQ _before_ being sent - see
+ * siw_activate_tx() - and pulled back in case of send failure.
+ */
+int siw_qp_sq_process(struct siw_qp *qp)
+{
+ struct siw_wqe *wqe = tx_wqe(qp);
+ enum siw_opcode tx_type;
+ unsigned long flags;
+ int rv = 0;
+
+ siw_dbg_qp(qp, "enter for type %d\n", tx_type(wqe));
+
+next_wqe:
+ /*
+ * Stop QP processing if SQ state changed
+ */
+ if (unlikely(qp->tx_ctx.tx_suspend)) {
+ siw_dbg_qp(qp, "tx suspended\n");
+ goto done;
+ }
+ tx_type = tx_type(wqe);
+
+ if (tx_type <= SIW_OP_READ_RESPONSE)
+ rv = siw_qp_sq_proc_tx(qp, wqe);
+ else
+ rv = siw_qp_sq_proc_local(qp, wqe);
+
+ if (!rv) {
+ /*
+ * WQE processing done
+ */
+ switch (tx_type) {
+ case SIW_OP_SEND:
+ case SIW_OP_SEND_REMOTE_INV:
+ case SIW_OP_WRITE:
+ siw_wqe_put_mem(wqe, tx_type);
+ /* Fall through */
+
+ case SIW_OP_INVAL_STAG:
+ case SIW_OP_REG_MR:
+ if (tx_flags(wqe) & SIW_WQE_SIGNALLED)
+ siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
+ SIW_WC_SUCCESS);
+ break;
+
+ case SIW_OP_READ:
+ case SIW_OP_READ_LOCAL_INV:
+ /*
+ * already enqueued to ORQ queue
+ */
+ break;
+
+ case SIW_OP_READ_RESPONSE:
+ siw_wqe_put_mem(wqe, tx_type);
+ break;
+
+ default:
+ WARN(1, "undefined WQE type %d\n", tx_type);
+ rv = -EINVAL;
+ goto done;
+ }
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+ wqe->wr_status = SIW_WR_IDLE;
+ rv = siw_activate_tx(qp);
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+
+ if (rv <= 0)
+ goto done;
+
+ goto next_wqe;
+
+ } else if (rv == -EAGAIN) {
+ siw_dbg_qp(qp, "sq paused: hd/tr %d of %d, data %d\n",
+ qp->tx_ctx.ctrl_sent, qp->tx_ctx.ctrl_len,
+ qp->tx_ctx.bytes_unsent);
+ rv = 0;
+ goto done;
+ } else if (rv == -EINPROGRESS) {
+ rv = siw_sq_start(qp);
+ goto done;
+ } else {
+ /*
+ * WQE processing failed.
+ * Verbs 8.3.2:
+ * o It turns any WQE into a signalled WQE.
+ * o Local catastrophic error must be surfaced
+ * o QP must be moved into Terminate state: done by code
+ * doing socket state change processing
+ *
+ * o TODO: Termination message must be sent.
+ * o TODO: Implement more precise work completion errors,
+ * see enum ib_wc_status in ib_verbs.h
+ */
+ siw_dbg_qp(qp, "wqe type %d processing failed: %d\n",
+ tx_type(wqe), rv);
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+ /*
+ * RREQ may have already been completed by inbound RRESP!
+ */
+ if (tx_type == SIW_OP_READ ||
+ tx_type == SIW_OP_READ_LOCAL_INV) {
+ /* Cleanup pending entry in ORQ */
+ qp->orq_put--;
+ qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0;
+ }
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+ /*
+ * immediately suspends further TX processing
+ */
+ if (!qp->tx_ctx.tx_suspend)
+ siw_qp_cm_drop(qp, 0);
+
+ switch (tx_type) {
+ case SIW_OP_SEND:
+ case SIW_OP_SEND_REMOTE_INV:
+ case SIW_OP_SEND_WITH_IMM:
+ case SIW_OP_WRITE:
+ case SIW_OP_READ:
+ case SIW_OP_READ_LOCAL_INV:
+ siw_wqe_put_mem(wqe, tx_type);
+ /* Fall through */
+
+ case SIW_OP_INVAL_STAG:
+ case SIW_OP_REG_MR:
+ siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
+ SIW_WC_LOC_QP_OP_ERR);
+
+ siw_qp_event(qp, IB_EVENT_QP_FATAL);
+
+ break;
+
+ case SIW_OP_READ_RESPONSE:
+ siw_dbg_qp(qp, "proc. read.response failed: %d\n", rv);
+
+ siw_qp_event(qp, IB_EVENT_QP_REQ_ERR);
+
+ siw_wqe_put_mem(wqe, SIW_OP_READ_RESPONSE);
+
+ break;
+
+ default:
+ WARN(1, "undefined WQE type %d\n", tx_type);
+ rv = -EINVAL;
+ }
+ wqe->wr_status = SIW_WR_IDLE;
+ }
+done:
+ return rv;
+}
+
+static void siw_sq_resume(struct siw_qp *qp)
+{
+ if (down_read_trylock(&qp->state_lock)) {
+ if (likely(qp->attrs.state == SIW_QP_STATE_RTS &&
+ !qp->tx_ctx.tx_suspend)) {
+ int rv = siw_qp_sq_process(qp);
+
+ up_read(&qp->state_lock);
+
+ if (unlikely(rv < 0)) {
+ siw_dbg_qp(qp, "SQ task failed: err %d\n", rv);
+
+ if (!qp->tx_ctx.tx_suspend)
+ siw_qp_cm_drop(qp, 0);
+ }
+ } else {
+ up_read(&qp->state_lock);
+ }
+ } else {
+ siw_dbg_qp(qp, "Resume SQ while QP locked\n");
+ }
+ siw_qp_put(qp);
+}
+
+struct tx_task_t {
+ struct llist_head active;
+ wait_queue_head_t waiting;
+};
+
+static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g);
+
+void siw_stop_tx_thread(int nr_cpu)
+{
+ kthread_stop(siw_tx_thread[nr_cpu]);
+ wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting);
+}
+
+int siw_run_sq(void *data)
+{
+ const int nr_cpu = (unsigned int)(long)data;
+ struct llist_node *active;
+ struct siw_qp *qp;
+ struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu);
+
+ init_llist_head(&tx_task->active);
+ init_waitqueue_head(&tx_task->waiting);
+
+ while (1) {
+ struct llist_node *fifo_list = NULL;
+
+ wait_event_interruptible(tx_task->waiting,
+ !llist_empty(&tx_task->active) ||
+ kthread_should_stop());
+
+ if (kthread_should_stop())
+ break;
+
+ active = llist_del_all(&tx_task->active);
+ /*
+ * llist_del_all returns a list with newest entry first.
+ * Re-order list for fairness among QP's.
+ */
+ while (active) {
+ struct llist_node *tmp = active;
+
+ active = llist_next(active);
+ tmp->next = fifo_list;
+ fifo_list = tmp;
+ }
+ while (fifo_list) {
+ qp = container_of(fifo_list, struct siw_qp, tx_list);
+ fifo_list = llist_next(fifo_list);
+ qp->tx_list.next = NULL;
+
+ siw_sq_resume(qp);
+ }
+ }
+ active = llist_del_all(&tx_task->active);
+ if (active) {
+ llist_for_each_entry(qp, active, tx_list) {
+ qp->tx_list.next = NULL;
+ siw_sq_resume(qp);
+ }
+ }
+ return 0;
+}
+
+int siw_sq_start(struct siw_qp *qp)
+{
+ if (tx_wqe(qp)->wr_status == SIW_WR_IDLE)
+ return 0;
+
+ if (unlikely(!cpu_online(qp->tx_cpu))) {
+ siw_put_tx_cpu(qp->tx_cpu);
+ qp->tx_cpu = siw_get_tx_cpu(qp->sdev);
+ if (qp->tx_cpu < 0) {
+ pr_warn("siw: no tx cpu available\n");
+
+ return -EIO;
+ }
+ }
+ siw_qp_get(qp);
+
+ llist_add(&qp->tx_list, &per_cpu(siw_tx_task_g, qp->tx_cpu).active);
+
+ wake_up(&per_cpu(siw_tx_task_g, qp->tx_cpu).waiting);
+
+ return 0;
+}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
new file mode 100644
index 000000000000..32dc79d0e898
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -0,0 +1,1760 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/xarray.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include "siw.h"
+#include "siw_verbs.h"
+#include "siw_mem.h"
+
+static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = SIW_QP_STATE_IDLE,
+ [IB_QPS_INIT] = SIW_QP_STATE_IDLE,
+ [IB_QPS_RTR] = SIW_QP_STATE_RTR,
+ [IB_QPS_RTS] = SIW_QP_STATE_RTS,
+ [IB_QPS_SQD] = SIW_QP_STATE_CLOSING,
+ [IB_QPS_SQE] = SIW_QP_STATE_TERMINATE,
+ [IB_QPS_ERR] = SIW_QP_STATE_ERROR
+};
+
+static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET")] = {
+ [IB_QPS_RESET] = "RESET", [IB_QPS_INIT] = "INIT", [IB_QPS_RTR] = "RTR",
+ [IB_QPS_RTS] = "RTS", [IB_QPS_SQD] = "SQD", [IB_QPS_SQE] = "SQE",
+ [IB_QPS_ERR] = "ERR"
+};
+
+static u32 siw_create_uobj(struct siw_ucontext *uctx, void *vaddr, u32 size)
+{
+ struct siw_uobj *uobj;
+ struct xa_limit limit = XA_LIMIT(0, SIW_UOBJ_MAX_KEY);
+ u32 key;
+
+ uobj = kzalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj)
+ return SIW_INVAL_UOBJ_KEY;
+
+ if (xa_alloc_cyclic(&uctx->xa, &key, uobj, limit, &uctx->uobj_nextkey,
+ GFP_KERNEL) < 0) {
+ kfree(uobj);
+ return SIW_INVAL_UOBJ_KEY;
+ }
+ uobj->size = PAGE_ALIGN(size);
+ uobj->addr = vaddr;
+
+ return key;
+}
+
+static struct siw_uobj *siw_get_uobj(struct siw_ucontext *uctx,
+ unsigned long off, u32 size)
+{
+ struct siw_uobj *uobj = xa_load(&uctx->xa, off);
+
+ if (uobj && uobj->size == size)
+ return uobj;
+
+ return NULL;
+}
+
+int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
+{
+ struct siw_ucontext *uctx = to_siw_ctx(ctx);
+ struct siw_uobj *uobj;
+ unsigned long off = vma->vm_pgoff;
+ int size = vma->vm_end - vma->vm_start;
+ int rv = -EINVAL;
+
+ /*
+ * Must be page aligned
+ */
+ if (vma->vm_start & (PAGE_SIZE - 1)) {
+ pr_warn("siw: mmap not page aligned\n");
+ goto out;
+ }
+ uobj = siw_get_uobj(uctx, off, size);
+ if (!uobj) {
+ siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %u\n",
+ off, size);
+ goto out;
+ }
+ rv = remap_vmalloc_range(vma, uobj->addr, 0);
+ if (rv)
+ pr_warn("remap_vmalloc_range failed: %lu, %u\n", off, size);
+out:
+ return rv;
+}
+
+int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata)
+{
+ struct siw_device *sdev = to_siw_dev(base_ctx->device);
+ struct siw_ucontext *ctx = to_siw_ctx(base_ctx);
+ struct siw_uresp_alloc_ctx uresp = {};
+ int rv;
+
+ if (atomic_inc_return(&sdev->num_ctx) > SIW_MAX_CONTEXT) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ xa_init_flags(&ctx->xa, XA_FLAGS_ALLOC);
+ ctx->uobj_nextkey = 0;
+ ctx->sdev = sdev;
+
+ uresp.dev_id = sdev->vendor_part_id;
+
+ if (udata->outlen < sizeof(uresp)) {
+ rv = -EINVAL;
+ goto err_out;
+ }
+ rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rv)
+ goto err_out;
+
+ siw_dbg(base_ctx->device, "success. now %d context(s)\n",
+ atomic_read(&sdev->num_ctx));
+
+ return 0;
+
+err_out:
+ atomic_dec(&sdev->num_ctx);
+ siw_dbg(base_ctx->device, "failure %d. now %d context(s)\n", rv,
+ atomic_read(&sdev->num_ctx));
+
+ return rv;
+}
+
+void siw_dealloc_ucontext(struct ib_ucontext *base_ctx)
+{
+ struct siw_ucontext *uctx = to_siw_ctx(base_ctx);
+ void *entry;
+ unsigned long index;
+
+ /*
+ * Make sure all user mmap objects are gone. Since QP, CQ
+ * and SRQ destroy routines destroy related objects, nothing
+ * should be found here.
+ */
+ xa_for_each(&uctx->xa, index, entry) {
+ kfree(xa_erase(&uctx->xa, index));
+ pr_warn("siw: dropping orphaned uobj at %lu\n", index);
+ }
+ xa_destroy(&uctx->xa);
+ atomic_dec(&uctx->sdev->num_ctx);
+}
+
+int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
+ struct ib_udata *udata)
+{
+ struct siw_device *sdev = to_siw_dev(base_dev);
+
+ if (udata->inlen || udata->outlen)
+ return -EINVAL;
+
+ memset(attr, 0, sizeof(*attr));
+
+ /* Revisit atomic caps if RFC 7306 gets supported */
+ attr->atomic_cap = 0;
+ attr->device_cap_flags =
+ IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG;
+ attr->max_cq = sdev->attrs.max_cq;
+ attr->max_cqe = sdev->attrs.max_cqe;
+ attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL;
+ attr->max_fmr = sdev->attrs.max_fmr;
+ attr->max_mr = sdev->attrs.max_mr;
+ attr->max_mw = sdev->attrs.max_mw;
+ attr->max_mr_size = ~0ull;
+ attr->max_pd = sdev->attrs.max_pd;
+ attr->max_qp = sdev->attrs.max_qp;
+ attr->max_qp_init_rd_atom = sdev->attrs.max_ird;
+ attr->max_qp_rd_atom = sdev->attrs.max_ord;
+ attr->max_qp_wr = sdev->attrs.max_qp_wr;
+ attr->max_recv_sge = sdev->attrs.max_sge;
+ attr->max_res_rd_atom = sdev->attrs.max_qp * sdev->attrs.max_ird;
+ attr->max_send_sge = sdev->attrs.max_sge;
+ attr->max_sge_rd = sdev->attrs.max_sge_rd;
+ attr->max_srq = sdev->attrs.max_srq;
+ attr->max_srq_sge = sdev->attrs.max_srq_sge;
+ attr->max_srq_wr = sdev->attrs.max_srq_wr;
+ attr->page_size_cap = PAGE_SIZE;
+ attr->vendor_id = SIW_VENDOR_ID;
+ attr->vendor_part_id = sdev->vendor_part_id;
+
+ memcpy(&attr->sys_image_guid, sdev->netdev->dev_addr, 6);
+
+ return 0;
+}
+
+int siw_query_port(struct ib_device *base_dev, u8 port,
+ struct ib_port_attr *attr)
+{
+ struct siw_device *sdev = to_siw_dev(base_dev);
+
+ memset(attr, 0, sizeof(*attr));
+
+ attr->active_mtu = attr->max_mtu;
+ attr->active_speed = 2;
+ attr->active_width = 2;
+ attr->gid_tbl_len = 1;
+ attr->max_msg_sz = -1;
+ attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
+ attr->phys_state = sdev->state == IB_PORT_ACTIVE ? 5 : 3;
+ attr->pkey_tbl_len = 1;
+ attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
+ attr->state = sdev->state;
+ /*
+ * All zero
+ *
+ * attr->lid = 0;
+ * attr->bad_pkey_cntr = 0;
+ * attr->qkey_viol_cntr = 0;
+ * attr->sm_lid = 0;
+ * attr->lmc = 0;
+ * attr->max_vl_num = 0;
+ * attr->sm_sl = 0;
+ * attr->subnet_timeout = 0;
+ * attr->init_type_repy = 0;
+ */
+ return 0;
+}
+
+int siw_get_port_immutable(struct ib_device *base_dev, u8 port,
+ struct ib_port_immutable *port_immutable)
+{
+ struct ib_port_attr attr;
+ int rv = siw_query_port(base_dev, port, &attr);
+
+ if (rv)
+ return rv;
+
+ port_immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ port_immutable->gid_tbl_len = attr.gid_tbl_len;
+ port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ return 0;
+}
+
+int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey)
+{
+ /* Report the default pkey */
+ *pkey = 0xffff;
+ return 0;
+}
+
+int siw_query_gid(struct ib_device *base_dev, u8 port, int idx,
+ union ib_gid *gid)
+{
+ struct siw_device *sdev = to_siw_dev(base_dev);
+
+ /* subnet_prefix == interface_id == 0; */
+ memset(gid, 0, sizeof(*gid));
+ memcpy(&gid->raw[0], sdev->netdev->dev_addr, 6);
+
+ return 0;
+}
+
+int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+{
+ struct siw_device *sdev = to_siw_dev(pd->device);
+
+ if (atomic_inc_return(&sdev->num_pd) > SIW_MAX_PD) {
+ atomic_dec(&sdev->num_pd);
+ return -ENOMEM;
+ }
+ siw_dbg_pd(pd, "now %d PD's(s)\n", atomic_read(&sdev->num_pd));
+
+ return 0;
+}
+
+void siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+{
+ struct siw_device *sdev = to_siw_dev(pd->device);
+
+ siw_dbg_pd(pd, "free PD\n");
+ atomic_dec(&sdev->num_pd);
+}
+
+void siw_qp_get_ref(struct ib_qp *base_qp)
+{
+ siw_qp_get(to_siw_qp(base_qp));
+}
+
+void siw_qp_put_ref(struct ib_qp *base_qp)
+{
+ siw_qp_put(to_siw_qp(base_qp));
+}
+
+/*
+ * siw_create_qp()
+ *
+ * Create QP of requested size on given device.
+ *
+ * @pd: Protection Domain
+ * @attrs: Initial QP attributes.
+ * @udata: used to provide QP ID, SQ and RQ size back to user.
+ */
+
+struct ib_qp *siw_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct siw_qp *qp = NULL;
+ struct siw_base_qp *siw_base_qp = NULL;
+ struct ib_device *base_dev = pd->device;
+ struct siw_device *sdev = to_siw_dev(base_dev);
+ struct siw_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct siw_ucontext,
+ base_ucontext);
+ struct siw_cq *scq = NULL, *rcq = NULL;
+ unsigned long flags;
+ int num_sqe, num_rqe, rv = 0;
+
+ siw_dbg(base_dev, "create new QP\n");
+
+ if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) {
+ siw_dbg(base_dev, "too many QP's\n");
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ if (attrs->qp_type != IB_QPT_RC) {
+ siw_dbg(base_dev, "only RC QP's supported\n");
+ rv = -EINVAL;
+ goto err_out;
+ }
+ if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) ||
+ (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) ||
+ (attrs->cap.max_send_sge > SIW_MAX_SGE) ||
+ (attrs->cap.max_recv_sge > SIW_MAX_SGE)) {
+ siw_dbg(base_dev, "QP size error\n");
+ rv = -EINVAL;
+ goto err_out;
+ }
+ if (attrs->cap.max_inline_data > SIW_MAX_INLINE) {
+ siw_dbg(base_dev, "max inline send: %d > %d\n",
+ attrs->cap.max_inline_data, (int)SIW_MAX_INLINE);
+ rv = -EINVAL;
+ goto err_out;
+ }
+ /*
+ * NOTE: we allow for zero element SQ and RQ WQE's SGL's
+ * but not for a QP unable to hold any WQE (SQ + RQ)
+ */
+ if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) {
+ siw_dbg(base_dev, "QP must have send or receive queue\n");
+ rv = -EINVAL;
+ goto err_out;
+ }
+ scq = to_siw_cq(attrs->send_cq);
+ rcq = to_siw_cq(attrs->recv_cq);
+
+ if (!scq || (!rcq && !attrs->srq)) {
+ siw_dbg(base_dev, "send CQ or receive CQ invalid\n");
+ rv = -EINVAL;
+ goto err_out;
+ }
+ siw_base_qp = kzalloc(sizeof(*siw_base_qp), GFP_KERNEL);
+ if (!siw_base_qp) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ siw_base_qp->qp = qp;
+ qp->ib_qp = &siw_base_qp->base_qp;
+
+ init_rwsem(&qp->state_lock);
+ spin_lock_init(&qp->sq_lock);
+ spin_lock_init(&qp->rq_lock);
+ spin_lock_init(&qp->orq_lock);
+
+ qp->kernel_verbs = !udata;
+ qp->xa_sq_index = SIW_INVAL_UOBJ_KEY;
+ qp->xa_rq_index = SIW_INVAL_UOBJ_KEY;
+
+ rv = siw_qp_add(sdev, qp);
+ if (rv)
+ goto err_out;
+
+ /* All queue indices are derived from modulo operations
+ * on a free running 'get' (consumer) and 'put' (producer)
+ * unsigned counter. Having queue sizes at power of two
+ * avoids handling counter wrap around.
+ */
+ num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr);
+ num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr);
+
+ if (qp->kernel_verbs)
+ qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe));
+ else
+ qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe));
+
+ if (qp->sendq == NULL) {
+ siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe);
+ rv = -ENOMEM;
+ goto err_out_xa;
+ }
+ if (attrs->sq_sig_type != IB_SIGNAL_REQ_WR) {
+ if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->attrs.flags |= SIW_SIGNAL_ALL_WR;
+ else {
+ rv = -EINVAL;
+ goto err_out_xa;
+ }
+ }
+ qp->pd = pd;
+ qp->scq = scq;
+ qp->rcq = rcq;
+
+ if (attrs->srq) {
+ /*
+ * SRQ support.
+ * Verbs 6.3.7: ignore RQ size, if SRQ present
+ * Verbs 6.3.5: do not check PD of SRQ against PD of QP
+ */
+ qp->srq = to_siw_srq(attrs->srq);
+ qp->attrs.rq_size = 0;
+ siw_dbg(base_dev, "QP [%u]: [SRQ 0x%p] attached\n",
+ qp->qp_num, qp->srq);
+ } else if (num_rqe) {
+ if (qp->kernel_verbs)
+ qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe));
+ else
+ qp->recvq =
+ vmalloc_user(num_rqe * sizeof(struct siw_rqe));
+
+ if (qp->recvq == NULL) {
+ siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe);
+ rv = -ENOMEM;
+ goto err_out_xa;
+ }
+ qp->attrs.rq_size = num_rqe;
+ }
+ qp->attrs.sq_size = num_sqe;
+ qp->attrs.sq_max_sges = attrs->cap.max_send_sge;
+ qp->attrs.rq_max_sges = attrs->cap.max_recv_sge;
+
+ /* Make those two tunables fixed for now. */
+ qp->tx_ctx.gso_seg_limit = 1;
+ qp->tx_ctx.zcopy_tx = zcopy_tx;
+
+ qp->attrs.state = SIW_QP_STATE_IDLE;
+
+ if (udata) {
+ struct siw_uresp_create_qp uresp = {};
+
+ uresp.num_sqe = num_sqe;
+ uresp.num_rqe = num_rqe;
+ uresp.qp_id = qp_id(qp);
+
+ if (qp->sendq) {
+ qp->xa_sq_index =
+ siw_create_uobj(uctx, qp->sendq,
+ num_sqe * sizeof(struct siw_sqe));
+ }
+ if (qp->recvq) {
+ qp->xa_rq_index =
+ siw_create_uobj(uctx, qp->recvq,
+ num_rqe * sizeof(struct siw_rqe));
+ }
+ if (qp->xa_sq_index == SIW_INVAL_UOBJ_KEY ||
+ qp->xa_rq_index == SIW_INVAL_UOBJ_KEY) {
+ rv = -ENOMEM;
+ goto err_out_xa;
+ }
+ uresp.sq_key = qp->xa_sq_index << PAGE_SHIFT;
+ uresp.rq_key = qp->xa_rq_index << PAGE_SHIFT;
+
+ if (udata->outlen < sizeof(uresp)) {
+ rv = -EINVAL;
+ goto err_out_xa;
+ }
+ rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rv)
+ goto err_out_xa;
+ }
+ qp->tx_cpu = siw_get_tx_cpu(sdev);
+ if (qp->tx_cpu < 0) {
+ rv = -EINVAL;
+ goto err_out_xa;
+ }
+ INIT_LIST_HEAD(&qp->devq);
+ spin_lock_irqsave(&sdev->lock, flags);
+ list_add_tail(&qp->devq, &sdev->qp_list);
+ spin_unlock_irqrestore(&sdev->lock, flags);
+
+ return qp->ib_qp;
+
+err_out_xa:
+ xa_erase(&sdev->qp_xa, qp_id(qp));
+err_out:
+ kfree(siw_base_qp);
+
+ if (qp) {
+ if (qp->xa_sq_index != SIW_INVAL_UOBJ_KEY)
+ kfree(xa_erase(&uctx->xa, qp->xa_sq_index));
+ if (qp->xa_rq_index != SIW_INVAL_UOBJ_KEY)
+ kfree(xa_erase(&uctx->xa, qp->xa_rq_index));
+
+ vfree(qp->sendq);
+ vfree(qp->recvq);
+ kfree(qp);
+ }
+ atomic_dec(&sdev->num_qp);
+
+ return ERR_PTR(rv);
+}
+
+/*
+ * Minimum siw_query_qp() verb interface.
+ *
+ * @qp_attr_mask is not used but all available information is provided
+ */
+int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct siw_qp *qp;
+ struct siw_device *sdev;
+
+ if (base_qp && qp_attr && qp_init_attr) {
+ qp = to_siw_qp(base_qp);
+ sdev = to_siw_dev(base_qp->device);
+ } else {
+ return -EINVAL;
+ }
+ qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
+ qp_attr->cap.max_send_wr = qp->attrs.sq_size;
+ qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
+ qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
+ qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
+ qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
+ qp_attr->max_rd_atomic = qp->attrs.irq_size;
+ qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
+
+ qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ;
+
+ qp_init_attr->qp_type = base_qp->qp_type;
+ qp_init_attr->send_cq = base_qp->send_cq;
+ qp_init_attr->recv_cq = base_qp->recv_cq;
+ qp_init_attr->srq = base_qp->srq;
+
+ qp_init_attr->cap = qp_attr->cap;
+
+ return 0;
+}
+
+int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct siw_qp_attrs new_attrs;
+ enum siw_qp_attr_mask siw_attr_mask = 0;
+ struct siw_qp *qp = to_siw_qp(base_qp);
+ int rv = 0;
+
+ if (!attr_mask)
+ return 0;
+
+ memset(&new_attrs, 0, sizeof(new_attrs));
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ siw_attr_mask = SIW_QP_ATTR_ACCESS_FLAGS;
+
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ new_attrs.flags |= SIW_RDMA_READ_ENABLED;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ new_attrs.flags |= SIW_RDMA_WRITE_ENABLED;
+ if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
+ new_attrs.flags |= SIW_RDMA_BIND_ENABLED;
+ }
+ if (attr_mask & IB_QP_STATE) {
+ siw_dbg_qp(qp, "desired IB QP state: %s\n",
+ ib_qp_state_to_string[attr->qp_state]);
+
+ new_attrs.state = ib_qp_state_to_siw_qp_state[attr->qp_state];
+
+ if (new_attrs.state > SIW_QP_STATE_RTS)
+ qp->tx_ctx.tx_suspend = 1;
+
+ siw_attr_mask |= SIW_QP_ATTR_STATE;
+ }
+ if (!siw_attr_mask)
+ goto out;
+
+ down_write(&qp->state_lock);
+
+ rv = siw_qp_modify(qp, &new_attrs, siw_attr_mask);
+
+ up_write(&qp->state_lock);
+out:
+ return rv;
+}
+
+int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
+{
+ struct siw_qp *qp = to_siw_qp(base_qp);
+ struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp);
+ struct siw_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct siw_ucontext,
+ base_ucontext);
+ struct siw_qp_attrs qp_attrs;
+
+ siw_dbg_qp(qp, "state %d, cep 0x%p\n", qp->attrs.state, qp->cep);
+
+ /*
+ * Mark QP as in process of destruction to prevent from
+ * any async callbacks to RDMA core
+ */
+ qp->attrs.flags |= SIW_QP_IN_DESTROY;
+ qp->rx_stream.rx_suspend = 1;
+
+ if (uctx && qp->xa_sq_index != SIW_INVAL_UOBJ_KEY)
+ kfree(xa_erase(&uctx->xa, qp->xa_sq_index));
+ if (uctx && qp->xa_rq_index != SIW_INVAL_UOBJ_KEY)
+ kfree(xa_erase(&uctx->xa, qp->xa_rq_index));
+
+ down_write(&qp->state_lock);
+
+ qp_attrs.state = SIW_QP_STATE_ERROR;
+ siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE);
+
+ if (qp->cep) {
+ siw_cep_put(qp->cep);
+ qp->cep = NULL;
+ }
+ up_write(&qp->state_lock);
+
+ kfree(qp->tx_ctx.mpa_crc_hd);
+ kfree(qp->rx_stream.mpa_crc_hd);
+
+ qp->scq = qp->rcq = NULL;
+
+ siw_qp_put(qp);
+ kfree(siw_base_qp);
+
+ return 0;
+}
+
+/*
+ * siw_copy_inline_sgl()
+ *
+ * Prepare sgl of inlined data for sending. For userland callers
+ * function checks if given buffer addresses and len's are within
+ * process context bounds.
+ * Data from all provided sge's are copied together into the wqe,
+ * referenced by a single sge.
+ */
+static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
+ struct siw_sqe *sqe)
+{
+ struct ib_sge *core_sge = core_wr->sg_list;
+ void *kbuf = &sqe->sge[1];
+ int num_sge = core_wr->num_sge, bytes = 0;
+
+ sqe->sge[0].laddr = (u64)kbuf;
+ sqe->sge[0].lkey = 0;
+
+ while (num_sge--) {
+ if (!core_sge->length) {
+ core_sge++;
+ continue;
+ }
+ bytes += core_sge->length;
+ if (bytes > SIW_MAX_INLINE) {
+ bytes = -EINVAL;
+ break;
+ }
+ memcpy(kbuf, (void *)(uintptr_t)core_sge->addr,
+ core_sge->length);
+
+ kbuf += core_sge->length;
+ core_sge++;
+ }
+ sqe->sge[0].length = bytes > 0 ? bytes : 0;
+ sqe->num_sge = bytes > 0 ? 1 : 0;
+
+ return bytes;
+}
+
+/*
+ * siw_post_send()
+ *
+ * Post a list of S-WR's to a SQ.
+ *
+ * @base_qp: Base QP contained in siw QP
+ * @wr: Null terminated list of user WR's
+ * @bad_wr: Points to failing WR in case of synchronous failure.
+ */
+int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ struct siw_qp *qp = to_siw_qp(base_qp);
+ struct siw_wqe *wqe = tx_wqe(qp);
+
+ unsigned long flags;
+ int rv = 0;
+
+ /*
+ * Try to acquire QP state lock. Must be non-blocking
+ * to accommodate kernel clients needs.
+ */
+ if (!down_read_trylock(&qp->state_lock)) {
+ *bad_wr = wr;
+ siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state);
+ return -ENOTCONN;
+ }
+ if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) {
+ up_read(&qp->state_lock);
+ *bad_wr = wr;
+ siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state);
+ return -ENOTCONN;
+ }
+ if (wr && !qp->kernel_verbs) {
+ siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
+ up_read(&qp->state_lock);
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+ spin_lock_irqsave(&qp->sq_lock, flags);
+
+ while (wr) {
+ u32 idx = qp->sq_put % qp->attrs.sq_size;
+ struct siw_sqe *sqe = &qp->sendq[idx];
+
+ if (sqe->flags) {
+ siw_dbg_qp(qp, "sq full\n");
+ rv = -ENOMEM;
+ break;
+ }
+ if (wr->num_sge > qp->attrs.sq_max_sges) {
+ siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge);
+ rv = -EINVAL;
+ break;
+ }
+ sqe->id = wr->wr_id;
+
+ if ((wr->send_flags & IB_SEND_SIGNALED) ||
+ (qp->attrs.flags & SIW_SIGNAL_ALL_WR))
+ sqe->flags |= SIW_WQE_SIGNALLED;
+
+ if (wr->send_flags & IB_SEND_FENCE)
+ sqe->flags |= SIW_WQE_READ_FENCE;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_INV:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ sqe->flags |= SIW_WQE_SOLICITED;
+
+ if (!(wr->send_flags & IB_SEND_INLINE)) {
+ siw_copy_sgl(wr->sg_list, sqe->sge,
+ wr->num_sge);
+ sqe->num_sge = wr->num_sge;
+ } else {
+ rv = siw_copy_inline_sgl(wr, sqe);
+ if (rv <= 0) {
+ rv = -EINVAL;
+ break;
+ }
+ sqe->flags |= SIW_WQE_INLINE;
+ sqe->num_sge = 1;
+ }
+ if (wr->opcode == IB_WR_SEND)
+ sqe->opcode = SIW_OP_SEND;
+ else {
+ sqe->opcode = SIW_OP_SEND_REMOTE_INV;
+ sqe->rkey = wr->ex.invalidate_rkey;
+ }
+ break;
+
+ case IB_WR_RDMA_READ_WITH_INV:
+ case IB_WR_RDMA_READ:
+ /*
+ * iWarp restricts RREAD sink to SGL containing
+ * 1 SGE only. we could relax to SGL with multiple
+ * elements referring the SAME ltag or even sending
+ * a private per-rreq tag referring to a checked
+ * local sgl with MULTIPLE ltag's.
+ */
+ if (unlikely(wr->num_sge != 1)) {
+ rv = -EINVAL;
+ break;
+ }
+ siw_copy_sgl(wr->sg_list, &sqe->sge[0], 1);
+ /*
+ * NOTE: zero length RREAD is allowed!
+ */
+ sqe->raddr = rdma_wr(wr)->remote_addr;
+ sqe->rkey = rdma_wr(wr)->rkey;
+ sqe->num_sge = 1;
+
+ if (wr->opcode == IB_WR_RDMA_READ)
+ sqe->opcode = SIW_OP_READ;
+ else
+ sqe->opcode = SIW_OP_READ_LOCAL_INV;
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ if (!(wr->send_flags & IB_SEND_INLINE)) {
+ siw_copy_sgl(wr->sg_list, &sqe->sge[0],
+ wr->num_sge);
+ sqe->num_sge = wr->num_sge;
+ } else {
+ rv = siw_copy_inline_sgl(wr, sqe);
+ if (unlikely(rv < 0)) {
+ rv = -EINVAL;
+ break;
+ }
+ sqe->flags |= SIW_WQE_INLINE;
+ sqe->num_sge = 1;
+ }
+ sqe->raddr = rdma_wr(wr)->remote_addr;
+ sqe->rkey = rdma_wr(wr)->rkey;
+ sqe->opcode = SIW_OP_WRITE;
+ break;
+
+ case IB_WR_REG_MR:
+ sqe->base_mr = (uint64_t)reg_wr(wr)->mr;
+ sqe->rkey = reg_wr(wr)->key;
+ sqe->access = reg_wr(wr)->access & IWARP_ACCESS_MASK;
+ sqe->opcode = SIW_OP_REG_MR;
+ break;
+
+ case IB_WR_LOCAL_INV:
+ sqe->rkey = wr->ex.invalidate_rkey;
+ sqe->opcode = SIW_OP_INVAL_STAG;
+ break;
+
+ default:
+ siw_dbg_qp(qp, "ib wr type %d unsupported\n",
+ wr->opcode);
+ rv = -EINVAL;
+ break;
+ }
+ siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%p\n",
+ sqe->opcode, sqe->flags, (void *)sqe->id);
+
+ if (unlikely(rv < 0))
+ break;
+
+ /* make SQE only valid after completely written */
+ smp_wmb();
+ sqe->flags |= SIW_WQE_VALID;
+
+ qp->sq_put++;
+ wr = wr->next;
+ }
+
+ /*
+ * Send directly if SQ processing is not in progress.
+ * Eventual immediate errors (rv < 0) do not affect the involved
+ * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ
+ * processing, if new work is already pending. But rv must be passed
+ * to caller.
+ */
+ if (wqe->wr_status != SIW_WR_IDLE) {
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+ goto skip_direct_sending;
+ }
+ rv = siw_activate_tx(qp);
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+
+ if (rv <= 0)
+ goto skip_direct_sending;
+
+ if (qp->kernel_verbs) {
+ rv = siw_sq_start(qp);
+ } else {
+ qp->tx_ctx.in_syscall = 1;
+
+ if (siw_qp_sq_process(qp) != 0 && !(qp->tx_ctx.tx_suspend))
+ siw_qp_cm_drop(qp, 0);
+
+ qp->tx_ctx.in_syscall = 0;
+ }
+skip_direct_sending:
+
+ up_read(&qp->state_lock);
+
+ if (rv >= 0)
+ return 0;
+ /*
+ * Immediate error
+ */
+ siw_dbg_qp(qp, "error %d\n", rv);
+
+ *bad_wr = wr;
+ return rv;
+}
+
+/*
+ * siw_post_receive()
+ *
+ * Post a list of R-WR's to a RQ.
+ *
+ * @base_qp: Base QP contained in siw QP
+ * @wr: Null terminated list of user WR's
+ * @bad_wr: Points to failing WR in case of synchronous failure.
+ */
+int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct siw_qp *qp = to_siw_qp(base_qp);
+ unsigned long flags;
+ int rv = 0;
+
+ if (qp->srq) {
+ *bad_wr = wr;
+ return -EOPNOTSUPP; /* what else from errno.h? */
+ }
+ /*
+ * Try to acquire QP state lock. Must be non-blocking
+ * to accommodate kernel clients needs.
+ */
+ if (!down_read_trylock(&qp->state_lock)) {
+ *bad_wr = wr;
+ return -ENOTCONN;
+ }
+ if (!qp->kernel_verbs) {
+ siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n");
+ up_read(&qp->state_lock);
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+ if (qp->attrs.state > SIW_QP_STATE_RTS) {
+ up_read(&qp->state_lock);
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+ /*
+ * Serialize potentially multiple producers.
+ * Not needed for single threaded consumer side.
+ */
+ spin_lock_irqsave(&qp->rq_lock, flags);
+
+ while (wr) {
+ u32 idx = qp->rq_put % qp->attrs.rq_size;
+ struct siw_rqe *rqe = &qp->recvq[idx];
+
+ if (rqe->flags) {
+ siw_dbg_qp(qp, "RQ full\n");
+ rv = -ENOMEM;
+ break;
+ }
+ if (wr->num_sge > qp->attrs.rq_max_sges) {
+ siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge);
+ rv = -EINVAL;
+ break;
+ }
+ rqe->id = wr->wr_id;
+ rqe->num_sge = wr->num_sge;
+ siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge);
+
+ /* make sure RQE is completely written before valid */
+ smp_wmb();
+
+ rqe->flags = SIW_WQE_VALID;
+
+ qp->rq_put++;
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&qp->rq_lock, flags);
+
+ up_read(&qp->state_lock);
+
+ if (rv < 0) {
+ siw_dbg_qp(qp, "error %d\n", rv);
+ *bad_wr = wr;
+ }
+ return rv > 0 ? 0 : rv;
+}
+
+void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata)
+{
+ struct siw_cq *cq = to_siw_cq(base_cq);
+ struct siw_device *sdev = to_siw_dev(base_cq->device);
+ struct siw_ucontext *ctx =
+ rdma_udata_to_drv_context(udata, struct siw_ucontext,
+ base_ucontext);
+
+ siw_dbg_cq(cq, "free CQ resources\n");
+
+ siw_cq_flush(cq);
+
+ if (ctx && cq->xa_cq_index != SIW_INVAL_UOBJ_KEY)
+ kfree(xa_erase(&ctx->xa, cq->xa_cq_index));
+
+ atomic_dec(&sdev->num_cq);
+
+ vfree(cq->queue);
+}
+
+/*
+ * siw_create_cq()
+ *
+ * Populate CQ of requested size
+ *
+ * @base_cq: CQ as allocated by RDMA midlayer
+ * @attr: Initial CQ attributes
+ * @udata: relates to user context
+ */
+
+int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct siw_device *sdev = to_siw_dev(base_cq->device);
+ struct siw_cq *cq = to_siw_cq(base_cq);
+ int rv, size = attr->cqe;
+
+ if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
+ siw_dbg(base_cq->device, "too many CQ's\n");
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ if (size < 1 || size > sdev->attrs.max_cqe) {
+ siw_dbg(base_cq->device, "CQ size error: %d\n", size);
+ rv = -EINVAL;
+ goto err_out;
+ }
+ size = roundup_pow_of_two(size);
+ cq->base_cq.cqe = size;
+ cq->num_cqe = size;
+ cq->xa_cq_index = SIW_INVAL_UOBJ_KEY;
+
+ if (!udata) {
+ cq->kernel_verbs = 1;
+ cq->queue = vzalloc(size * sizeof(struct siw_cqe) +
+ sizeof(struct siw_cq_ctrl));
+ } else {
+ cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) +
+ sizeof(struct siw_cq_ctrl));
+ }
+ if (cq->queue == NULL) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ get_random_bytes(&cq->id, 4);
+ siw_dbg(base_cq->device, "new CQ [%u]\n", cq->id);
+
+ spin_lock_init(&cq->lock);
+
+ cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify;
+
+ if (udata) {
+ struct siw_uresp_create_cq uresp = {};
+ struct siw_ucontext *ctx =
+ rdma_udata_to_drv_context(udata, struct siw_ucontext,
+ base_ucontext);
+
+ cq->xa_cq_index =
+ siw_create_uobj(ctx, cq->queue,
+ size * sizeof(struct siw_cqe) +
+ sizeof(struct siw_cq_ctrl));
+ if (cq->xa_cq_index == SIW_INVAL_UOBJ_KEY) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ uresp.cq_key = cq->xa_cq_index << PAGE_SHIFT;
+ uresp.cq_id = cq->id;
+ uresp.num_cqe = size;
+
+ if (udata->outlen < sizeof(uresp)) {
+ rv = -EINVAL;
+ goto err_out;
+ }
+ rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rv)
+ goto err_out;
+ }
+ return 0;
+
+err_out:
+ siw_dbg(base_cq->device, "CQ creation failed: %d", rv);
+
+ if (cq && cq->queue) {
+ struct siw_ucontext *ctx =
+ rdma_udata_to_drv_context(udata, struct siw_ucontext,
+ base_ucontext);
+ if (cq->xa_cq_index != SIW_INVAL_UOBJ_KEY)
+ kfree(xa_erase(&ctx->xa, cq->xa_cq_index));
+ vfree(cq->queue);
+ }
+ atomic_dec(&sdev->num_cq);
+
+ return rv;
+}
+
+/*
+ * siw_poll_cq()
+ *
+ * Reap CQ entries if available and copy work completion status into
+ * array of WC's provided by caller. Returns number of reaped CQE's.
+ *
+ * @base_cq: Base CQ contained in siw CQ.
+ * @num_cqe: Maximum number of CQE's to reap.
+ * @wc: Array of work completions to be filled by siw.
+ */
+int siw_poll_cq(struct ib_cq *base_cq, int num_cqe, struct ib_wc *wc)
+{
+ struct siw_cq *cq = to_siw_cq(base_cq);
+ int i;
+
+ for (i = 0; i < num_cqe; i++) {
+ if (!siw_reap_cqe(cq, wc))
+ break;
+ wc++;
+ }
+ return i;
+}
+
+/*
+ * siw_req_notify_cq()
+ *
+ * Request notification for new CQE's added to that CQ.
+ * Defined flags:
+ * o SIW_CQ_NOTIFY_SOLICITED lets siw trigger a notification
+ * event if a WQE with notification flag set enters the CQ
+ * o SIW_CQ_NOTIFY_NEXT_COMP lets siw trigger a notification
+ * event if a WQE enters the CQ.
+ * o IB_CQ_REPORT_MISSED_EVENTS: return value will provide the
+ * number of not reaped CQE's regardless of its notification
+ * type and current or new CQ notification settings.
+ *
+ * @base_cq: Base CQ contained in siw CQ.
+ * @flags: Requested notification flags.
+ */
+int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)
+{
+ struct siw_cq *cq = to_siw_cq(base_cq);
+
+ siw_dbg_cq(cq, "flags: 0x%02x\n", flags);
+
+ if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
+ /* CQ event for next solicited completion */
+ smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);
+ else
+ /* CQ event for any signalled completion */
+ smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);
+
+ if (flags & IB_CQ_REPORT_MISSED_EVENTS)
+ return cq->cq_put - cq->cq_get;
+
+ return 0;
+}
+
+/*
+ * siw_dereg_mr()
+ *
+ * Release Memory Region.
+ *
+ * @base_mr: Base MR contained in siw MR.
+ * @udata: points to user context, unused.
+ */
+int siw_dereg_mr(struct ib_mr *base_mr, struct ib_udata *udata)
+{
+ struct siw_mr *mr = to_siw_mr(base_mr);
+ struct siw_device *sdev = to_siw_dev(base_mr->device);
+
+ siw_dbg_mem(mr->mem, "deregister MR\n");
+
+ atomic_dec(&sdev->num_mr);
+
+ siw_mr_drop_mem(mr);
+ kfree_rcu(mr, rcu);
+
+ return 0;
+}
+
+/*
+ * siw_reg_user_mr()
+ *
+ * Register Memory Region.
+ *
+ * @pd: Protection Domain
+ * @start: starting address of MR (virtual address)
+ * @len: len of MR
+ * @rnic_va: not used by siw
+ * @rights: MR access rights
+ * @udata: user buffer to communicate STag and Key.
+ */
+struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
+ u64 rnic_va, int rights, struct ib_udata *udata)
+{
+ struct siw_mr *mr = NULL;
+ struct siw_umem *umem = NULL;
+ struct siw_ureq_reg_mr ureq;
+ struct siw_device *sdev = to_siw_dev(pd->device);
+
+ unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
+ int rv;
+
+ siw_dbg_pd(pd, "start: 0x%016llx, va: 0x%016llx, len: %llu\n",
+ (unsigned long long)start, (unsigned long long)rnic_va,
+ (unsigned long long)len);
+
+ if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) {
+ siw_dbg_pd(pd, "too many mr's\n");
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ if (!len) {
+ rv = -EINVAL;
+ goto err_out;
+ }
+ if (mem_limit != RLIM_INFINITY) {
+ unsigned long num_pages =
+ (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
+ mem_limit >>= PAGE_SHIFT;
+
+ if (num_pages > mem_limit - current->mm->locked_vm) {
+ siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
+ num_pages, mem_limit,
+ current->mm->locked_vm);
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ }
+ umem = siw_umem_get(start, len, ib_access_writable(rights));
+ if (IS_ERR(umem)) {
+ rv = PTR_ERR(umem);
+ siw_dbg_pd(pd, "getting user memory failed: %d\n", rv);
+ umem = NULL;
+ goto err_out;
+ }
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ rv = siw_mr_add_mem(mr, pd, umem, start, len, rights);
+ if (rv)
+ goto err_out;
+
+ if (udata) {
+ struct siw_uresp_reg_mr uresp = {};
+ struct siw_mem *mem = mr->mem;
+
+ if (udata->inlen < sizeof(ureq)) {
+ rv = -EINVAL;
+ goto err_out;
+ }
+ rv = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
+ if (rv)
+ goto err_out;
+
+ mr->base_mr.lkey |= ureq.stag_key;
+ mr->base_mr.rkey |= ureq.stag_key;
+ mem->stag |= ureq.stag_key;
+ uresp.stag = mem->stag;
+
+ if (udata->outlen < sizeof(uresp)) {
+ rv = -EINVAL;
+ goto err_out;
+ }
+ rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rv)
+ goto err_out;
+ }
+ mr->mem->stag_valid = 1;
+
+ return &mr->base_mr;
+
+err_out:
+ atomic_dec(&sdev->num_mr);
+ if (mr) {
+ if (mr->mem)
+ siw_mr_drop_mem(mr);
+ kfree_rcu(mr, rcu);
+ } else {
+ if (umem)
+ siw_umem_release(umem, false);
+ }
+ return ERR_PTR(rv);
+}
+
+struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_sge, struct ib_udata *udata)
+{
+ struct siw_device *sdev = to_siw_dev(pd->device);
+ struct siw_mr *mr = NULL;
+ struct siw_pbl *pbl = NULL;
+ int rv;
+
+ if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) {
+ siw_dbg_pd(pd, "too many mr's\n");
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ if (mr_type != IB_MR_TYPE_MEM_REG) {
+ siw_dbg_pd(pd, "mr type %d unsupported\n", mr_type);
+ rv = -EOPNOTSUPP;
+ goto err_out;
+ }
+ if (max_sge > SIW_MAX_SGE_PBL) {
+ siw_dbg_pd(pd, "too many sge's: %d\n", max_sge);
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ pbl = siw_pbl_alloc(max_sge);
+ if (IS_ERR(pbl)) {
+ rv = PTR_ERR(pbl);
+ siw_dbg_pd(pd, "pbl allocation failed: %d\n", rv);
+ pbl = NULL;
+ goto err_out;
+ }
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ rv = siw_mr_add_mem(mr, pd, pbl, 0, max_sge * PAGE_SIZE, 0);
+ if (rv)
+ goto err_out;
+
+ mr->mem->is_pbl = 1;
+
+ siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag);
+
+ return &mr->base_mr;
+
+err_out:
+ atomic_dec(&sdev->num_mr);
+
+ if (!mr) {
+ kfree(pbl);
+ } else {
+ if (mr->mem)
+ siw_mr_drop_mem(mr);
+ kfree_rcu(mr, rcu);
+ }
+ siw_dbg_pd(pd, "failed: %d\n", rv);
+
+ return ERR_PTR(rv);
+}
+
+/* Just used to count number of pages being mapped */
+static int siw_set_pbl_page(struct ib_mr *base_mr, u64 buf_addr)
+{
+ return 0;
+}
+
+int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle,
+ unsigned int *sg_off)
+{
+ struct scatterlist *slp;
+ struct siw_mr *mr = to_siw_mr(base_mr);
+ struct siw_mem *mem = mr->mem;
+ struct siw_pbl *pbl = mem->pbl;
+ struct siw_pble *pble;
+ u64 pbl_size;
+ int i, rv;
+
+ if (!pbl) {
+ siw_dbg_mem(mem, "no PBL allocated\n");
+ return -EINVAL;
+ }
+ pble = pbl->pbe;
+
+ if (pbl->max_buf < num_sle) {
+ siw_dbg_mem(mem, "too many SGE's: %d > %d\n",
+ mem->pbl->max_buf, num_sle);
+ return -ENOMEM;
+ }
+ for_each_sg(sl, slp, num_sle, i) {
+ if (sg_dma_len(slp) == 0) {
+ siw_dbg_mem(mem, "empty SGE\n");
+ return -EINVAL;
+ }
+ if (i == 0) {
+ pble->addr = sg_dma_address(slp);
+ pble->size = sg_dma_len(slp);
+ pble->pbl_off = 0;
+ pbl_size = pble->size;
+ pbl->num_buf = 1;
+ } else {
+ /* Merge PBL entries if adjacent */
+ if (pble->addr + pble->size == sg_dma_address(slp)) {
+ pble->size += sg_dma_len(slp);
+ } else {
+ pble++;
+ pbl->num_buf++;
+ pble->addr = sg_dma_address(slp);
+ pble->size = sg_dma_len(slp);
+ pble->pbl_off = pbl_size;
+ }
+ pbl_size += sg_dma_len(slp);
+ }
+ siw_dbg_mem(mem,
+ "sge[%d], size %llu, addr 0x%016llx, total %llu\n",
+ i, pble->size, pble->addr, pbl_size);
+ }
+ rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page);
+ if (rv > 0) {
+ mem->len = base_mr->length;
+ mem->va = base_mr->iova;
+ siw_dbg_mem(mem,
+ "%llu bytes, start 0x%016llx, %u SLE to %u entries\n",
+ mem->len, mem->va, num_sle, pbl->num_buf);
+ }
+ return rv;
+}
+
+/*
+ * siw_get_dma_mr()
+ *
+ * Create a (empty) DMA memory region, where no umem is attached.
+ */
+struct ib_mr *siw_get_dma_mr(struct ib_pd *pd, int rights)
+{
+ struct siw_device *sdev = to_siw_dev(pd->device);
+ struct siw_mr *mr = NULL;
+ int rv;
+
+ if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) {
+ siw_dbg_pd(pd, "too many mr's\n");
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ rv = siw_mr_add_mem(mr, pd, NULL, 0, ULONG_MAX, rights);
+ if (rv)
+ goto err_out;
+
+ mr->mem->stag_valid = 1;
+
+ siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag);
+
+ return &mr->base_mr;
+
+err_out:
+ if (rv)
+ kfree(mr);
+
+ atomic_dec(&sdev->num_mr);
+
+ return ERR_PTR(rv);
+}
+
+/*
+ * siw_create_srq()
+ *
+ * Create Shared Receive Queue of attributes @init_attrs
+ * within protection domain given by @pd.
+ *
+ * @base_srq: Base SRQ contained in siw SRQ.
+ * @init_attrs: SRQ init attributes.
+ * @udata: points to user context
+ */
+int siw_create_srq(struct ib_srq *base_srq,
+ struct ib_srq_init_attr *init_attrs, struct ib_udata *udata)
+{
+ struct siw_srq *srq = to_siw_srq(base_srq);
+ struct ib_srq_attr *attrs = &init_attrs->attr;
+ struct siw_device *sdev = to_siw_dev(base_srq->device);
+ struct siw_ucontext *ctx =
+ rdma_udata_to_drv_context(udata, struct siw_ucontext,
+ base_ucontext);
+ int rv;
+
+ if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) {
+ siw_dbg_pd(base_srq->pd, "too many SRQ's\n");
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ if (attrs->max_wr == 0 || attrs->max_wr > SIW_MAX_SRQ_WR ||
+ attrs->max_sge > SIW_MAX_SGE || attrs->srq_limit > attrs->max_wr) {
+ rv = -EINVAL;
+ goto err_out;
+ }
+ srq->max_sge = attrs->max_sge;
+ srq->num_rqe = roundup_pow_of_two(attrs->max_wr);
+ srq->xa_srq_index = SIW_INVAL_UOBJ_KEY;
+ srq->limit = attrs->srq_limit;
+ if (srq->limit)
+ srq->armed = 1;
+
+ srq->kernel_verbs = !udata;
+
+ if (udata)
+ srq->recvq =
+ vmalloc_user(srq->num_rqe * sizeof(struct siw_rqe));
+ else
+ srq->recvq = vzalloc(srq->num_rqe * sizeof(struct siw_rqe));
+
+ if (srq->recvq == NULL) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ if (udata) {
+ struct siw_uresp_create_srq uresp = {};
+
+ srq->xa_srq_index = siw_create_uobj(
+ ctx, srq->recvq, srq->num_rqe * sizeof(struct siw_rqe));
+
+ if (srq->xa_srq_index == SIW_INVAL_UOBJ_KEY) {
+ rv = -ENOMEM;
+ goto err_out;
+ }
+ uresp.srq_key = srq->xa_srq_index;
+ uresp.num_rqe = srq->num_rqe;
+
+ if (udata->outlen < sizeof(uresp)) {
+ rv = -EINVAL;
+ goto err_out;
+ }
+ rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rv)
+ goto err_out;
+ }
+ spin_lock_init(&srq->lock);
+
+ siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: success\n", srq);
+
+ return 0;
+
+err_out:
+ if (srq->recvq) {
+ if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY)
+ kfree(xa_erase(&ctx->xa, srq->xa_srq_index));
+ vfree(srq->recvq);
+ }
+ atomic_dec(&sdev->num_srq);
+
+ return rv;
+}
+
+/*
+ * siw_modify_srq()
+ *
+ * Modify SRQ. The caller may resize SRQ and/or set/reset notification
+ * limit and (re)arm IB_EVENT_SRQ_LIMIT_REACHED notification.
+ *
+ * NOTE: it is unclear if RDMA core allows for changing the MAX_SGE
+ * parameter. siw_modify_srq() does not check the attrs->max_sge param.
+ */
+int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+ struct siw_srq *srq = to_siw_srq(base_srq);
+ unsigned long flags;
+ int rv = 0;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ if (attr_mask & IB_SRQ_MAX_WR) {
+ /* resize request not yet supported */
+ rv = -EOPNOTSUPP;
+ goto out;
+ }
+ if (attr_mask & IB_SRQ_LIMIT) {
+ if (attrs->srq_limit) {
+ if (unlikely(attrs->srq_limit > srq->num_rqe)) {
+ rv = -EINVAL;
+ goto out;
+ }
+ srq->armed = 1;
+ } else {
+ srq->armed = 0;
+ }
+ srq->limit = attrs->srq_limit;
+ }
+out:
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return rv;
+}
+
+/*
+ * siw_query_srq()
+ *
+ * Query SRQ attributes.
+ */
+int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs)
+{
+ struct siw_srq *srq = to_siw_srq(base_srq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ attrs->max_wr = srq->num_rqe;
+ attrs->max_sge = srq->max_sge;
+ attrs->srq_limit = srq->limit;
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return 0;
+}
+
+/*
+ * siw_destroy_srq()
+ *
+ * Destroy SRQ.
+ * It is assumed that the SRQ is not referenced by any
+ * QP anymore - the code trusts the RDMA core environment to keep track
+ * of QP references.
+ */
+void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata)
+{
+ struct siw_srq *srq = to_siw_srq(base_srq);
+ struct siw_device *sdev = to_siw_dev(base_srq->device);
+ struct siw_ucontext *ctx =
+ rdma_udata_to_drv_context(udata, struct siw_ucontext,
+ base_ucontext);
+
+ if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY)
+ kfree(xa_erase(&ctx->xa, srq->xa_srq_index));
+
+ vfree(srq->recvq);
+ atomic_dec(&sdev->num_srq);
+}
+
+/*
+ * siw_post_srq_recv()
+ *
+ * Post a list of receive queue elements to SRQ.
+ * NOTE: The function does not check or lock a certain SRQ state
+ * during the post operation. The code simply trusts the
+ * RDMA core environment.
+ *
+ * @base_srq: Base SRQ contained in siw SRQ
+ * @wr: List of R-WR's
+ * @bad_wr: Updated to failing WR if posting fails.
+ */
+int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct siw_srq *srq = to_siw_srq(base_srq);
+ unsigned long flags;
+ int rv = 0;
+
+ if (unlikely(!srq->kernel_verbs)) {
+ siw_dbg_pd(base_srq->pd,
+ "[SRQ 0x%p]: no kernel post_recv for mapped srq\n",
+ srq);
+ rv = -EINVAL;
+ goto out;
+ }
+ /*
+ * Serialize potentially multiple producers.
+ * Also needed to serialize potentially multiple
+ * consumers.
+ */
+ spin_lock_irqsave(&srq->lock, flags);
+
+ while (wr) {
+ u32 idx = srq->rq_put % srq->num_rqe;
+ struct siw_rqe *rqe = &srq->recvq[idx];
+
+ if (rqe->flags) {
+ siw_dbg_pd(base_srq->pd, "SRQ full\n");
+ rv = -ENOMEM;
+ break;
+ }
+ if (unlikely(wr->num_sge > srq->max_sge)) {
+ siw_dbg_pd(base_srq->pd,
+ "[SRQ 0x%p]: too many sge's: %d\n", srq,
+ wr->num_sge);
+ rv = -EINVAL;
+ break;
+ }
+ rqe->id = wr->wr_id;
+ rqe->num_sge = wr->num_sge;
+ siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge);
+
+ /* Make sure S-RQE is completely written before valid */
+ smp_wmb();
+
+ rqe->flags = SIW_WQE_VALID;
+
+ srq->rq_put++;
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&srq->lock, flags);
+out:
+ if (unlikely(rv < 0)) {
+ siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: error %d\n", srq, rv);
+ *bad_wr = wr;
+ }
+ return rv;
+}
+
+void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype)
+{
+ struct ib_event event;
+ struct ib_qp *base_qp = qp->ib_qp;
+
+ /*
+ * Do not report asynchronous errors on QP which gets
+ * destroyed via verbs interface (siw_destroy_qp())
+ */
+ if (qp->attrs.flags & SIW_QP_IN_DESTROY)
+ return;
+
+ event.event = etype;
+ event.device = base_qp->device;
+ event.element.qp = base_qp;
+
+ if (base_qp->event_handler) {
+ siw_dbg_qp(qp, "reporting event %d\n", etype);
+ base_qp->event_handler(&event, base_qp->qp_context);
+ }
+}
+
+void siw_cq_event(struct siw_cq *cq, enum ib_event_type etype)
+{
+ struct ib_event event;
+ struct ib_cq *base_cq = &cq->base_cq;
+
+ event.event = etype;
+ event.device = base_cq->device;
+ event.element.cq = base_cq;
+
+ if (base_cq->event_handler) {
+ siw_dbg_cq(cq, "reporting CQ event %d\n", etype);
+ base_cq->event_handler(&event, base_cq->cq_context);
+ }
+}
+
+void siw_srq_event(struct siw_srq *srq, enum ib_event_type etype)
+{
+ struct ib_event event;
+ struct ib_srq *base_srq = &srq->base_srq;
+
+ event.event = etype;
+ event.device = base_srq->device;
+ event.element.srq = base_srq;
+
+ if (base_srq->event_handler) {
+ siw_dbg_pd(srq->base_srq.pd,
+ "reporting SRQ event %d\n", etype);
+ base_srq->event_handler(&event, base_srq->srq_context);
+ }
+}
+
+void siw_port_event(struct siw_device *sdev, u8 port, enum ib_event_type etype)
+{
+ struct ib_event event;
+
+ event.event = etype;
+ event.device = &sdev->base_dev;
+ event.element.port_num = port;
+
+ siw_dbg(&sdev->base_dev, "reporting port event %d\n", etype);
+
+ ib_dispatch_event(&event);
+}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
new file mode 100644
index 000000000000..1910869281cb
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_verbs.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#ifndef _SIW_VERBS_H
+#define _SIW_VERBS_H
+
+#include <linux/errno.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "siw.h"
+#include "siw_cm.h"
+
+/*
+ * siw_copy_sgl()
+ *
+ * Copy SGL from RDMA core representation to local
+ * representation.
+ */
+static inline void siw_copy_sgl(struct ib_sge *sge, struct siw_sge *siw_sge,
+ int num_sge)
+{
+ while (num_sge--) {
+ siw_sge->laddr = sge->addr;
+ siw_sge->length = sge->length;
+ siw_sge->lkey = sge->lkey;
+
+ siw_sge++;
+ sge++;
+ }
+}
+
+int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata);
+void siw_dealloc_ucontext(struct ib_ucontext *base_ctx);
+int siw_query_port(struct ib_device *base_dev, u8 port,
+ struct ib_port_attr *attr);
+int siw_get_port_immutable(struct ib_device *base_dev, u8 port,
+ struct ib_port_immutable *port_immutable);
+int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
+ struct ib_udata *udata);
+int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+int siw_query_port(struct ib_device *base_dev, u8 port,
+ struct ib_port_attr *attr);
+int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey);
+int siw_query_gid(struct ib_device *base_dev, u8 port, int idx,
+ union ib_gid *gid);
+int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
+void siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
+struct ib_qp *siw_create_qp(struct ib_pd *base_pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata);
+int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata);
+int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata);
+int siw_poll_cq(struct ib_cq *base_cq, int num_entries, struct ib_wc *wc);
+int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags);
+struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len,
+ u64 rnic_va, int rights, struct ib_udata *udata);
+struct ib_mr *siw_alloc_mr(struct ib_pd *base_pd, enum ib_mr_type mr_type,
+ u32 max_sge, struct ib_udata *udata);
+struct ib_mr *siw_get_dma_mr(struct ib_pd *base_pd, int rights);
+int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle,
+ unsigned int *sg_off);
+int siw_dereg_mr(struct ib_mr *base_mr, struct ib_udata *udata);
+int siw_create_srq(struct ib_srq *base_srq, struct ib_srq_init_attr *attr,
+ struct ib_udata *udata);
+int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask mask, struct ib_udata *udata);
+int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr);
+void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata);
+int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
+void siw_qp_event(struct siw_qp *qp, enum ib_event_type type);
+void siw_cq_event(struct siw_cq *cq, enum ib_event_type type);
+void siw_srq_event(struct siw_srq *srq, enum ib_event_type type);
+void siw_port_event(struct siw_device *dev, u8 port, enum ib_event_type type);
+
+#endif
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 4760ce465d89..7af68604af77 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -7,7 +7,7 @@ config INFINIBAND_IPOIB
transports IP packets over InfiniBand so you can use your IB
device as a fancy NIC.
- See Documentation/infiniband/ipoib.txt for more information
+ See Documentation/infiniband/ipoib.rst for more information
config INFINIBAND_IPOIB_CM
bool "IP-over-InfiniBand Connected Mode support"
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index aa9dcfc36cd3..c59e00a0881f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1153,7 +1153,6 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
ret = -ENOMEM;
goto err_tx;
}
- memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring));
p->qp = ipoib_cm_create_tx_qp(p->dev, p);
memalloc_noio_restore(noio_flag);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 83429925dfc6..63e4f9d15fd9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -138,7 +138,6 @@ static void ipoib_get_strings(struct net_device __always_unused *dev,
p += ETH_GSTRING_LEN;
}
break;
- case ETH_SS_TEST:
default:
break;
}
@@ -149,7 +148,6 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev,
switch (sset) {
case ETH_SS_STATS:
return IPOIB_GLOBAL_STATS_LEN;
- case ETH_SS_TEST:
default:
break;
}
@@ -222,6 +220,7 @@ static const struct ethtool_ops ipoib_ethtool_ops = {
.get_strings = ipoib_get_strings,
.get_ethtool_stats = ipoib_get_ethtool_stats,
.get_sset_count = ipoib_get_sset_count,
+ .get_link = ethtool_op_get_link,
};
void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 04ea7db08e87..ac0583ff280d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1893,12 +1893,6 @@ static void ipoib_child_init(struct net_device *ndev)
struct ipoib_dev_priv *priv = ipoib_priv(ndev);
struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
- dev_hold(priv->parent);
-
- down_write(&ppriv->vlan_rwsem);
- list_add_tail(&priv->list, &ppriv->child_intfs);
- up_write(&ppriv->vlan_rwsem);
-
priv->max_ib_mtu = ppriv->max_ib_mtu;
set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
@@ -1941,6 +1935,17 @@ static int ipoib_ndo_init(struct net_device *ndev)
if (rc) {
pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n",
priv->ca->name, priv->dev->name, priv->port, rc);
+ return rc;
+ }
+
+ if (priv->parent) {
+ struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
+
+ dev_hold(priv->parent);
+
+ down_write(&ppriv->vlan_rwsem);
+ list_add_tail(&priv->list, &ppriv->child_intfs);
+ up_write(&ppriv->vlan_rwsem);
}
return 0;
@@ -1958,6 +1963,14 @@ static void ipoib_ndo_uninit(struct net_device *dev)
*/
WARN_ON(!list_empty(&priv->child_intfs));
+ if (priv->parent) {
+ struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
+
+ down_write(&ppriv->vlan_rwsem);
+ list_del(&priv->list);
+ up_write(&ppriv->vlan_rwsem);
+ }
+
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev);
@@ -1969,15 +1982,8 @@ static void ipoib_ndo_uninit(struct net_device *dev)
priv->wq = NULL;
}
- if (priv->parent) {
- struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
-
- down_write(&ppriv->vlan_rwsem);
- list_del(&priv->list);
- up_write(&ppriv->vlan_rwsem);
-
+ if (priv->parent)
dev_put(priv->parent);
- }
}
static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index ba09068f6200..b69304d28f06 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -260,11 +260,8 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
priv->qp = NULL;
}
- if (ib_destroy_cq(priv->send_cq))
- ipoib_warn(priv, "ib_cq_destroy (send) failed\n");
-
- if (ib_destroy_cq(priv->recv_cq))
- ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
+ ib_destroy_cq(priv->send_cq);
+ ib_destroy_cq(priv->recv_cq);
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 9c185a8dabd3..c7a3d75fb308 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -205,7 +205,8 @@ iser_initialize_task_headers(struct iscsi_task *task,
goto out;
}
- tx_desc->wr_idx = 0;
+ tx_desc->inv_wr.next = NULL;
+ tx_desc->reg_wr.wr.next = NULL;
tx_desc->mapped = true;
tx_desc->dma_addr = dma_addr;
tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
@@ -406,13 +407,10 @@ static u8
iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ enum iser_data_dir dir = iser_task->dir[ISER_DIR_IN] ?
+ ISER_DIR_IN : ISER_DIR_OUT;
- if (iser_task->dir[ISER_DIR_IN])
- return iser_check_task_pi_status(iser_task, ISER_DIR_IN,
- sector);
- else
- return iser_check_task_pi_status(iser_task, ISER_DIR_OUT,
- sector);
+ return iser_check_task_pi_status(iser_task, dir, sector);
}
/**
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 36d525110fd2..39bf213444cb 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -225,14 +225,6 @@ enum iser_desc_type {
ISCSI_TX_DATAOUT
};
-/* Maximum number of work requests per task:
- * Data memory region local invalidate + fast registration
- * Protection memory region local invalidate + fast registration
- * Signature memory region local invalidate + fast registration
- * PDU send
- */
-#define ISER_MAX_WRS 7
-
/**
* struct iser_tx_desc - iSER TX descriptor
*
@@ -245,11 +237,9 @@ enum iser_desc_type {
* unsolicited data-out or control
* @num_sge: number sges used on this TX task
* @mapped: Is the task header mapped
- * @wr_idx: Current WR index
- * @wrs: Array of WRs per task
- * @data_reg: Data buffer registration details
- * @prot_reg: Protection buffer registration details
- * @sig_attrs: Signature attributes
+ * reg_wr: registration WR
+ * send_wr: send WR
+ * inv_wr: invalidate WR
*/
struct iser_tx_desc {
struct iser_ctrl iser_header;
@@ -260,15 +250,9 @@ struct iser_tx_desc {
int num_sge;
struct ib_cqe cqe;
bool mapped;
- u8 wr_idx;
- union iser_wr {
- struct ib_send_wr send;
- struct ib_reg_wr fast_reg;
- struct ib_sig_handover_wr sig;
- } wrs[ISER_MAX_WRS];
- struct iser_mem_reg data_reg;
- struct iser_mem_reg prot_reg;
- struct ib_sig_attrs sig_attrs;
+ struct ib_reg_wr reg_wr;
+ struct ib_send_wr send_wr;
+ struct ib_send_wr inv_wr;
};
#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
@@ -388,6 +372,7 @@ struct iser_device {
*
* @mr: memory region
* @fmr_pool: pool of fmrs
+ * @sig_mr: signature memory region
* @page_vec: fast reg page list used by fmr pool
* @mr_valid: is mr valid indicator
*/
@@ -396,36 +381,22 @@ struct iser_reg_resources {
struct ib_mr *mr;
struct ib_fmr_pool *fmr_pool;
};
+ struct ib_mr *sig_mr;
struct iser_page_vec *page_vec;
u8 mr_valid:1;
};
/**
- * struct iser_pi_context - Protection information context
- *
- * @rsc: protection buffer registration resources
- * @sig_mr: signature enable memory region
- * @sig_mr_valid: is sig_mr valid indicator
- * @sig_protected: is region protected indicator
- */
-struct iser_pi_context {
- struct iser_reg_resources rsc;
- struct ib_mr *sig_mr;
- u8 sig_mr_valid:1;
- u8 sig_protected:1;
-};
-
-/**
* struct iser_fr_desc - Fast registration descriptor
*
* @list: entry in connection fastreg pool
* @rsc: data buffer registration resources
- * @pi_ctx: protection information context
+ * @sig_protected: is region protected indicator
*/
struct iser_fr_desc {
struct list_head list;
struct iser_reg_resources rsc;
- struct iser_pi_context *pi_ctx;
+ bool sig_protected;
struct list_head all_list;
};
@@ -674,21 +645,6 @@ void
iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
struct iser_fr_desc *desc);
-static inline struct ib_send_wr *
-iser_tx_next_wr(struct iser_tx_desc *tx_desc)
-{
- struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx].send;
- struct ib_send_wr *last_wr;
-
- if (tx_desc->wr_idx) {
- last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1].send;
- last_wr->next = cur_wr;
- }
- tx_desc->wr_idx++;
-
- return cur_wr;
-}
-
static inline struct iser_conn *
to_iser_conn(struct ib_conn *ib_conn)
{
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 96af06cfe0af..5cbb4b3a0566 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -592,15 +592,14 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
static inline int
iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
{
- if (likely(rkey == desc->rsc.mr->rkey)) {
- desc->rsc.mr_valid = 0;
- } else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) {
- desc->pi_ctx->sig_mr_valid = 0;
- } else {
+ if (unlikely((!desc->sig_protected && rkey != desc->rsc.mr->rkey) ||
+ (desc->sig_protected && rkey != desc->rsc.sig_mr->rkey))) {
iser_err("Bogus remote invalidation for rkey %#x\n", rkey);
return -EINVAL;
}
+ desc->rsc.mr_valid = 0;
+
return 0;
}
@@ -750,6 +749,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
iser_task->prot[ISER_DIR_IN].data_len = 0;
iser_task->prot[ISER_DIR_OUT].data_len = 0;
+ iser_task->prot[ISER_DIR_IN].dma_nents = 0;
+ iser_task->prot[ISER_DIR_OUT].dma_nents = 0;
+
memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
sizeof(struct iser_mem_reg));
memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 2ba70729d7b0..2cc89a9b9e9b 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -302,8 +302,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
}
static void
-iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
- struct ib_sig_domain *domain)
+iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
domain->sig.dif.pi_interval = scsi_prot_interval(sc);
@@ -326,21 +325,21 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
case SCSI_PROT_WRITE_INSERT:
case SCSI_PROT_READ_STRIP:
sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
- iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
+ iser_set_dif_domain(sc, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
break;
case SCSI_PROT_READ_INSERT:
case SCSI_PROT_WRITE_STRIP:
sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
- iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
+ iser_set_dif_domain(sc, &sig_attrs->mem);
sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
IB_T10DIF_CSUM : IB_T10DIF_CRC;
break;
case SCSI_PROT_READ_PASS:
case SCSI_PROT_WRITE_PASS:
- iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
+ iser_set_dif_domain(sc, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
- iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
+ iser_set_dif_domain(sc, &sig_attrs->mem);
sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
IB_T10DIF_CSUM : IB_T10DIF_CRC;
break;
@@ -366,27 +365,29 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
static inline void
iser_inv_rkey(struct ib_send_wr *inv_wr,
struct ib_mr *mr,
- struct ib_cqe *cqe)
+ struct ib_cqe *cqe,
+ struct ib_send_wr *next_wr)
{
inv_wr->opcode = IB_WR_LOCAL_INV;
inv_wr->wr_cqe = cqe;
inv_wr->ex.invalidate_rkey = mr->rkey;
inv_wr->send_flags = 0;
inv_wr->num_sge = 0;
+ inv_wr->next = next_wr;
}
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
- struct iser_pi_context *pi_ctx,
- struct iser_mem_reg *data_reg,
- struct iser_mem_reg *prot_reg,
+ struct iser_data_buf *mem,
+ struct iser_data_buf *sig_mem,
+ struct iser_reg_resources *rsc,
struct iser_mem_reg *sig_reg)
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
- struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
- struct ib_sig_handover_wr *wr;
- struct ib_mr *mr = pi_ctx->sig_mr;
+ struct ib_mr *mr = rsc->sig_mr;
+ struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
+ struct ib_reg_wr *wr = &tx_desc->reg_wr;
int ret;
memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -396,33 +397,36 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
- if (pi_ctx->sig_mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+ if (rsc->mr_valid)
+ iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr);
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
- wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr,
- wr);
- wr->wr.opcode = IB_WR_REG_SIG_MR;
+ ret = ib_map_mr_sg_pi(mr, mem->sg, mem->dma_nents, NULL,
+ sig_mem->sg, sig_mem->dma_nents, NULL, SZ_4K);
+ if (unlikely(ret)) {
+ iser_err("failed to map PI sg (%d)\n",
+ mem->dma_nents + sig_mem->dma_nents);
+ goto err;
+ }
+
+ memset(wr, 0, sizeof(*wr));
+ wr->wr.next = &tx_desc->send_wr;
+ wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
wr->wr.wr_cqe = cqe;
- wr->wr.sg_list = &data_reg->sge;
- wr->wr.num_sge = 1;
+ wr->wr.num_sge = 0;
wr->wr.send_flags = 0;
- wr->sig_attrs = sig_attrs;
- wr->sig_mr = mr;
- if (scsi_prot_sg_count(iser_task->sc))
- wr->prot = &prot_reg->sge;
- else
- wr->prot = NULL;
- wr->access_flags = IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
- pi_ctx->sig_mr_valid = 1;
+ wr->mr = mr;
+ wr->key = mr->rkey;
+ wr->access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE;
+ rsc->mr_valid = 1;
sig_reg->sge.lkey = mr->lkey;
sig_reg->rkey = mr->rkey;
- sig_reg->sge.addr = 0;
- sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
+ sig_reg->sge.addr = mr->iova;
+ sig_reg->sge.length = mr->length;
iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n",
sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
@@ -439,11 +443,11 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_mr *mr = rsc->mr;
- struct ib_reg_wr *wr;
+ struct ib_reg_wr *wr = &tx_desc->reg_wr;
int n;
if (rsc->mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+ iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr);
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
@@ -454,7 +458,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
return n < 0 ? n : -EINVAL;
}
- wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr);
+ wr->wr.next = &tx_desc->send_wr;
wr->wr.opcode = IB_WR_REG_MR;
wr->wr.wr_cqe = cqe;
wr->wr.send_flags = 0;
@@ -479,21 +483,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
}
static int
-iser_reg_prot_sg(struct iscsi_iser_task *task,
- struct iser_data_buf *mem,
- struct iser_fr_desc *desc,
- bool use_dma_key,
- struct iser_mem_reg *reg)
-{
- struct iser_device *device = task->iser_conn->ib_conn.device;
-
- if (use_dma_key)
- return iser_reg_dma(device, mem, reg);
-
- return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg);
-}
-
-static int
iser_reg_data_sg(struct iscsi_iser_task *task,
struct iser_data_buf *mem,
struct iser_fr_desc *desc,
@@ -516,7 +505,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
struct iser_device *device = ib_conn->device;
struct iser_data_buf *mem = &task->data[dir];
struct iser_mem_reg *reg = &task->rdma_reg[dir];
- struct iser_mem_reg *data_reg;
struct iser_fr_desc *desc = NULL;
bool use_dma_key;
int err;
@@ -529,32 +517,17 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
reg->mem_h = desc;
}
- if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL)
- data_reg = reg;
- else
- data_reg = &task->desc.data_reg;
-
- err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg);
- if (unlikely(err))
- goto err_reg;
-
- if (scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) {
- struct iser_mem_reg *prot_reg = &task->desc.prot_reg;
-
- if (scsi_prot_sg_count(task->sc)) {
- mem = &task->prot[dir];
- err = iser_reg_prot_sg(task, mem, desc,
- use_dma_key, prot_reg);
- if (unlikely(err))
- goto err_reg;
- }
-
- err = iser_reg_sig_mr(task, desc->pi_ctx, data_reg,
- prot_reg, reg);
+ if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) {
+ err = iser_reg_data_sg(task, mem, desc, use_dma_key, reg);
+ if (unlikely(err))
+ goto err_reg;
+ } else {
+ err = iser_reg_sig_mr(task, mem, &task->prot[dir],
+ &desc->rsc, reg);
if (unlikely(err))
goto err_reg;
- desc->pi_ctx->sig_protected = 1;
+ desc->sig_protected = 1;
}
return 0;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 4ff3d98fa6a4..a6548de0e218 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -233,116 +233,63 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
kfree(desc);
}
-static int
-iser_alloc_reg_res(struct iser_device *device,
- struct ib_pd *pd,
- struct iser_reg_resources *res,
- unsigned int size)
+static struct iser_fr_desc *
+iser_create_fastreg_desc(struct iser_device *device,
+ struct ib_pd *pd,
+ bool pi_enable,
+ unsigned int size)
{
+ struct iser_fr_desc *desc;
struct ib_device *ib_dev = device->ib_device;
enum ib_mr_type mr_type;
int ret;
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ return ERR_PTR(-ENOMEM);
+
if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
mr_type = IB_MR_TYPE_SG_GAPS;
else
mr_type = IB_MR_TYPE_MEM_REG;
- res->mr = ib_alloc_mr(pd, mr_type, size);
- if (IS_ERR(res->mr)) {
- ret = PTR_ERR(res->mr);
+ desc->rsc.mr = ib_alloc_mr(pd, mr_type, size);
+ if (IS_ERR(desc->rsc.mr)) {
+ ret = PTR_ERR(desc->rsc.mr);
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
- return ret;
+ goto err_alloc_mr;
}
- res->mr_valid = 0;
-
- return 0;
-}
-static void
-iser_free_reg_res(struct iser_reg_resources *rsc)
-{
- ib_dereg_mr(rsc->mr);
-}
-
-static int
-iser_alloc_pi_ctx(struct iser_device *device,
- struct ib_pd *pd,
- struct iser_fr_desc *desc,
- unsigned int size)
-{
- struct iser_pi_context *pi_ctx = NULL;
- int ret;
-
- desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
- if (!desc->pi_ctx)
- return -ENOMEM;
-
- pi_ctx = desc->pi_ctx;
-
- ret = iser_alloc_reg_res(device, pd, &pi_ctx->rsc, size);
- if (ret) {
- iser_err("failed to allocate reg_resources\n");
- goto alloc_reg_res_err;
- }
-
- pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2);
- if (IS_ERR(pi_ctx->sig_mr)) {
- ret = PTR_ERR(pi_ctx->sig_mr);
- goto sig_mr_failure;
+ if (pi_enable) {
+ desc->rsc.sig_mr = ib_alloc_mr_integrity(pd, size, size);
+ if (IS_ERR(desc->rsc.sig_mr)) {
+ ret = PTR_ERR(desc->rsc.sig_mr);
+ iser_err("Failed to allocate sig_mr err=%d\n", ret);
+ goto err_alloc_mr_integrity;
+ }
}
- pi_ctx->sig_mr_valid = 0;
- desc->pi_ctx->sig_protected = 0;
-
- return 0;
+ desc->rsc.mr_valid = 0;
-sig_mr_failure:
- iser_free_reg_res(&pi_ctx->rsc);
-alloc_reg_res_err:
- kfree(desc->pi_ctx);
+ return desc;
- return ret;
-}
+err_alloc_mr_integrity:
+ ib_dereg_mr(desc->rsc.mr);
+err_alloc_mr:
+ kfree(desc);
-static void
-iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
-{
- iser_free_reg_res(&pi_ctx->rsc);
- ib_dereg_mr(pi_ctx->sig_mr);
- kfree(pi_ctx);
+ return ERR_PTR(ret);
}
-static struct iser_fr_desc *
-iser_create_fastreg_desc(struct iser_device *device,
- struct ib_pd *pd,
- bool pi_enable,
- unsigned int size)
+static void iser_destroy_fastreg_desc(struct iser_fr_desc *desc)
{
- struct iser_fr_desc *desc;
- int ret;
-
- desc = kzalloc(sizeof(*desc), GFP_KERNEL);
- if (!desc)
- return ERR_PTR(-ENOMEM);
-
- ret = iser_alloc_reg_res(device, pd, &desc->rsc, size);
- if (ret)
- goto reg_res_alloc_failure;
+ struct iser_reg_resources *res = &desc->rsc;
- if (pi_enable) {
- ret = iser_alloc_pi_ctx(device, pd, desc, size);
- if (ret)
- goto pi_ctx_alloc_failure;
+ ib_dereg_mr(res->mr);
+ if (res->sig_mr) {
+ ib_dereg_mr(res->sig_mr);
+ res->sig_mr = NULL;
}
-
- return desc;
-
-pi_ctx_alloc_failure:
- iser_free_reg_res(&desc->rsc);
-reg_res_alloc_failure:
kfree(desc);
-
- return ERR_PTR(ret);
}
/**
@@ -399,10 +346,7 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
list_del(&desc->all_list);
- iser_free_reg_res(&desc->rsc);
- if (desc->pi_ctx)
- iser_free_pi_ctx(desc->pi_ctx);
- kfree(desc);
+ iser_destroy_fastreg_desc(desc);
++i;
}
@@ -455,7 +399,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
init_attr.qp_type = IB_QPT_RC;
if (ib_conn->pi_support) {
init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
- init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
+ init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
} else {
@@ -707,6 +651,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
struct ib_device_attr *attr = &device->ib_device->attrs;
unsigned short sg_tablesize, sup_sg_tablesize;
unsigned short reserved_mr_pages;
+ u32 max_num_sg;
/*
* FRs without SG_GAPS or FMRs can only map up to a (device) page per
@@ -720,12 +665,17 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
else
reserved_mr_pages = 1;
+ if (iser_conn->ib_conn.pi_support)
+ max_num_sg = attr->max_pi_fast_reg_page_list_len;
+ else
+ max_num_sg = attr->max_fast_reg_page_list_len;
+
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)
sup_sg_tablesize =
min_t(
uint, ISCSI_ISER_MAX_SG_TABLESIZE,
- attr->max_fast_reg_page_list_len - reserved_mr_pages);
+ max_num_sg - reserved_mr_pages);
else
sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE;
@@ -762,7 +712,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
/* connection T10-PI support */
if (iser_pi_enable) {
if (!(device->ib_device->attrs.device_cap_flags &
- IB_DEVICE_SIGNATURE_HANDOVER)) {
+ IB_DEVICE_INTEGRITY_HANDOVER)) {
iser_warn("T10-PI requested but not supported on %s, "
"continue without T10-PI\n",
dev_name(&ib_conn->device->ib_device->dev));
@@ -1087,7 +1037,8 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
bool signal)
{
- struct ib_send_wr *wr = iser_tx_next_wr(tx_desc);
+ struct ib_send_wr *wr = &tx_desc->send_wr;
+ struct ib_send_wr *first_wr;
int ib_ret;
ib_dma_sync_single_for_device(ib_conn->device->ib_device,
@@ -1101,7 +1052,14 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
wr->opcode = IB_WR_SEND;
wr->send_flags = signal ? IB_SEND_SIGNALED : 0;
- ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, NULL);
+ if (tx_desc->inv_wr.next)
+ first_wr = &tx_desc->inv_wr;
+ else if (tx_desc->reg_wr.wr.next)
+ first_wr = &tx_desc->reg_wr.wr;
+ else
+ first_wr = wr;
+
+ ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL);
if (ib_ret)
iser_err("ib_post_send failed, ret:%d opcode:%d\n",
ib_ret, wr->opcode);
@@ -1118,9 +1076,9 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
struct ib_mr_status mr_status;
int ret;
- if (desc && desc->pi_ctx->sig_protected) {
- desc->pi_ctx->sig_protected = 0;
- ret = ib_check_mr_status(desc->pi_ctx->sig_mr,
+ if (desc && desc->sig_protected) {
+ desc->sig_protected = 0;
+ ret = ib_check_mr_status(desc->rsc.sig_mr,
IB_MR_CHECK_SIG_STATUS, &mr_status);
if (ret) {
pr_err("ib_check_mr_status failed, ret %d\n", ret);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 0205cf142b2f..a1a035270cab 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -133,7 +133,7 @@ isert_create_qp(struct isert_conn *isert_conn,
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC;
if (device->pi_capable)
- attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
+ attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
ret = rdma_create_qp(cma_id, device->pd, &attr);
if (ret) {
@@ -309,7 +309,7 @@ isert_create_device_ib_res(struct isert_device *device)
/* Check signature cap */
device->pi_capable = ib_dev->attrs.device_cap_flags &
- IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
+ IB_DEVICE_INTEGRITY_HANDOVER ? true : false;
return 0;
@@ -1669,7 +1669,7 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
isert_dbg("Cmd %p\n", isert_cmd);
- ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr);
+ ret = isert_check_pi_status(cmd, isert_cmd->rw.reg->mr);
isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
if (ret) {
@@ -1715,7 +1715,7 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
iscsit_stop_dataout_timer(cmd);
if (isert_prot_cmd(isert_conn, se_cmd))
- ret = isert_check_pi_status(se_cmd, isert_cmd->rw.sig->sig_mr);
+ ret = isert_check_pi_status(se_cmd, isert_cmd->rw.reg->mr);
isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
cmd->write_data_done = 0;
@@ -2059,8 +2059,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
}
static inline void
-isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs,
- struct ib_sig_domain *domain)
+isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_domain *domain)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
domain->sig.dif.bg_type = IB_T10DIF_CRC;
@@ -2088,17 +2087,17 @@ isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
case TARGET_PROT_DIN_INSERT:
case TARGET_PROT_DOUT_STRIP:
sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
- isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire);
+ isert_set_dif_domain(se_cmd, &sig_attrs->wire);
break;
case TARGET_PROT_DOUT_INSERT:
case TARGET_PROT_DIN_STRIP:
sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
- isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
+ isert_set_dif_domain(se_cmd, &sig_attrs->mem);
break;
case TARGET_PROT_DIN_PASS:
case TARGET_PROT_DOUT_PASS:
- isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire);
- isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
+ isert_set_dif_domain(se_cmd, &sig_attrs->wire);
+ isert_set_dif_domain(se_cmd, &sig_attrs->mem);
break;
default:
isert_err("Unsupported PI operation %d\n", se_cmd->prot_op);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index d5cbad2c61e4..c7bd96edce80 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3466,13 +3466,14 @@ static const match_table_t srp_opt_tokens = {
* @net: [in] Network namespace.
* @sa: [out] Address family, IP address and port number.
* @addr_port_str: [in] IP address and port number.
+ * @has_port: [out] Whether or not @addr_port_str includes a port number.
*
* Parse the following address formats:
* - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
* - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
*/
static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
- const char *addr_port_str)
+ const char *addr_port_str, bool *has_port)
{
char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
char *port_str;
@@ -3481,9 +3482,12 @@ static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
if (!addr)
return -ENOMEM;
port_str = strrchr(addr, ':');
- if (!port_str)
- return -EINVAL;
- *port_str++ = '\0';
+ if (port_str && strchr(port_str, ']'))
+ port_str = NULL;
+ if (port_str)
+ *port_str++ = '\0';
+ if (has_port)
+ *has_port = port_str != NULL;
ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
if (ret && addr[0]) {
addr_end = addr + strlen(addr) - 1;
@@ -3505,6 +3509,7 @@ static int srp_parse_options(struct net *net, const char *buf,
char *p;
substring_t args[MAX_OPT_ARGS];
unsigned long long ull;
+ bool has_port;
int opt_mask = 0;
int token;
int ret = -EINVAL;
@@ -3603,7 +3608,8 @@ static int srp_parse_options(struct net *net, const char *buf,
ret = -ENOMEM;
goto out;
}
- ret = srp_parse_in(net, &target->rdma_cm.src.ss, p);
+ ret = srp_parse_in(net, &target->rdma_cm.src.ss, p,
+ NULL);
if (ret < 0) {
pr_warn("bad source parameter '%s'\n", p);
kfree(p);
@@ -3619,7 +3625,10 @@ static int srp_parse_options(struct net *net, const char *buf,
ret = -ENOMEM;
goto out;
}
- ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p);
+ ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p,
+ &has_port);
+ if (!has_port)
+ ret = -EINVAL;
if (ret < 0) {
pr_warn("bad dest parameter '%s'\n", p);
kfree(p);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 676619c1454a..a249db528d54 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -482,7 +482,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
queue->queue_size,
IB_MR_TYPE_MEM_REG,
- nvme_rdma_get_max_fr_pages(ibdev));
+ nvme_rdma_get_max_fr_pages(ibdev), 0);
if (ret) {
dev_err(queue->ctrl->ctrl.device,
"failed to initialize MR pool sized %d for QID %d\n",