aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds2017-12-28 23:06:01 -0800
committerLinus Torvalds2017-12-28 23:06:01 -0800
commit19286e4a7a0ce0a7ac584be614c40513d6318ad6 (patch)
tree10b2f6defef19db7a49a66c940d4ba53221cee95
parent5f520fc318764df800789edd202b5e3b55130613 (diff)
parent45e6ae7ef21b907dacb18da62d5787d74a31d860 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma fixes from Jason Gunthorpe: "This is the next batch of for-rc patches from RDMA. It includes the fix for the ipoib regression I mentioned last time, and the result of a fairly major debugging effort to get iser working reliably on cxgb4 hardware - it turns out the cxgb4 driver was not handling QP error flushing properly causing iser to fail. - cxgb4 fix for an iser testing failure as debugged by Steve and Sagi. The problem was a driver bug in the handling of shutting down a QP. - Various vmw_pvrdma fixes for bogus WARN_ON, missed resource free on error unwind and a use after free bug - Improper congestion counter values on mlx5 when link aggregation is enabled - ipoib lockdep regression introduced in this merge window - hfi1 regression supporting the device in a VM introduced in a recent patch - Typo that breaks future uAPI compatibility in the verbs core - More SELinux related oops fixing - Fix an oops during error unwind in mlx5" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: IB/mlx5: Fix mlx5_ib_alloc_mr error flow IB/core: Verify that QP is security enabled in create and destroy IB/uverbs: Fix command checking as part of ib_uverbs_ex_modify_qp() IB/mlx5: Serialize access to the VMA list IB/hfi: Only read capability registers if the capability exists IB/ipoib: Fix lockdep issue found on ipoib_ib_dev_heavy_flush IB/mlx5: Fix congestion counters in LAG mode RDMA/vmw_pvrdma: Avoid use after free due to QP/CQ/SRQ destroy RDMA/vmw_pvrdma: Use refcount_dec_and_test to avoid warning RDMA/vmw_pvrdma: Call ib_umem_release on destroy QP path iw_cxgb4: when flushing, complete all wrs in a chain iw_cxgb4: reflect the original WR opcode in drain cqes iw_cxgb4: Only validate the MSN for successful completions
-rw-r--r--drivers/infiniband/core/security.c3
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c4
-rw-r--r--drivers/infiniband/core/verbs.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c13
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h2
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c72
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h6
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h1
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c30
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c11
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c43
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h6
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c17
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c14
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c56
-rw-r--r--include/linux/mlx5/driver.h4
22 files changed, 208 insertions, 105 deletions
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index feafdb961c48..59b2f96d986a 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
if (ret)
return ret;
+ if (!qp->qp_sec)
+ return 0;
+
mutex_lock(&real_qp->qp_sec->mutex);
ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
qp->qp_sec);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index d0202bb176a4..840b24096690 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2074,8 +2074,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
if (ucore->inlen > sizeof(cmd)) {
- if (ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
+ if (!ib_is_udata_cleared(ucore, sizeof(cmd),
+ ucore->inlen - sizeof(cmd)))
return -EOPNOTSUPP;
}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 3fb8fb6cc824..e36d27ed4daa 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1438,7 +1438,8 @@ int ib_close_qp(struct ib_qp *qp)
spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
atomic_dec(&real_qp->usecnt);
- ib_close_shared_qp_security(qp->qp_sec);
+ if (qp->qp_sec)
+ ib_close_shared_qp_security(qp->qp_sec);
kfree(qp);
return 0;
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index b7bfc536e00f..6f2b26126c64 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -395,7 +395,7 @@ next_cqe:
static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
{
- if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) {
+ if (DRAIN_CQE(cqe)) {
WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
return 0;
}
@@ -494,7 +494,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
/*
* Special cqe for drain WR completions...
*/
- if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
+ if (DRAIN_CQE(hw_cqe)) {
*cookie = CQE_DRAIN_COOKIE(hw_cqe);
*cqe = *hw_cqe;
goto skip_cqe;
@@ -571,10 +571,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
ret = -EAGAIN;
goto skip_cqe;
}
- if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
+ if (unlikely(!CQE_STATUS(hw_cqe) &&
+ CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
t4_set_wq_in_error(wq);
- hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN));
- goto proc_cqe;
+ hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
}
goto proc_cqe;
}
@@ -748,9 +748,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
c4iw_invalidate_mr(qhp->rhp,
CQE_WRID_FR_STAG(&cqe));
break;
- case C4IW_DRAIN_OPCODE:
- wc->opcode = IB_WC_SEND;
- break;
default:
pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
CQE_OPCODE(&cqe), CQE_QPID(&cqe));
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 470f97a79ebb..65dd3726ca02 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -693,8 +693,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state)
return IB_QPS_ERR;
}
-#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN
-
static inline u32 c4iw_ib_to_tpt_access(int a)
{
return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 38bddd02a943..d5c92fc520d6 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -790,21 +790,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
return 0;
}
-static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
+static int ib_to_fw_opcode(int ib_opcode)
+{
+ int opcode;
+
+ switch (ib_opcode) {
+ case IB_WR_SEND_WITH_INV:
+ opcode = FW_RI_SEND_WITH_INV;
+ break;
+ case IB_WR_SEND:
+ opcode = FW_RI_SEND;
+ break;
+ case IB_WR_RDMA_WRITE:
+ opcode = FW_RI_RDMA_WRITE;
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ opcode = FW_RI_READ_REQ;
+ break;
+ case IB_WR_REG_MR:
+ opcode = FW_RI_FAST_REGISTER;
+ break;
+ case IB_WR_LOCAL_INV:
+ opcode = FW_RI_LOCAL_INV;
+ break;
+ default:
+ opcode = -EINVAL;
+ }
+ return opcode;
+}
+
+static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
{
struct t4_cqe cqe = {};
struct c4iw_cq *schp;
unsigned long flag;
struct t4_cq *cq;
+ int opcode;
schp = to_c4iw_cq(qhp->ibqp.send_cq);
cq = &schp->cq;
+ opcode = ib_to_fw_opcode(wr->opcode);
+ if (opcode < 0)
+ return opcode;
+
cqe.u.drain_cookie = wr->wr_id;
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
- CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
+ CQE_OPCODE_V(opcode) |
CQE_TYPE_V(1) |
CQE_SWCQE_V(1) |
+ CQE_DRAIN_V(1) |
CQE_QPID_V(qhp->wq.sq.qid));
spin_lock_irqsave(&schp->lock, flag);
@@ -819,6 +855,23 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
schp->ibcq.cq_context);
spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
}
+ return 0;
+}
+
+static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ int ret = 0;
+
+ while (wr) {
+ ret = complete_sq_drain_wr(qhp, wr);
+ if (ret) {
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ return ret;
}
static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
@@ -833,9 +886,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
cqe.u.drain_cookie = wr->wr_id;
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
- CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
+ CQE_OPCODE_V(FW_RI_SEND) |
CQE_TYPE_V(0) |
CQE_SWCQE_V(1) |
+ CQE_DRAIN_V(1) |
CQE_QPID_V(qhp->wq.sq.qid));
spin_lock_irqsave(&rchp->lock, flag);
@@ -852,6 +906,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
}
}
+static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
+{
+ while (wr) {
+ complete_rq_drain_wr(qhp, wr);
+ wr = wr->next;
+ }
+}
+
int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -875,7 +937,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
*/
if (qhp->wq.flushed) {
spin_unlock_irqrestore(&qhp->lock, flag);
- complete_sq_drain_wr(qhp, wr);
+ err = complete_sq_drain_wrs(qhp, wr, bad_wr);
return err;
}
num_wrs = t4_sq_avail(&qhp->wq);
@@ -1023,7 +1085,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
*/
if (qhp->wq.flushed) {
spin_unlock_irqrestore(&qhp->lock, flag);
- complete_rq_drain_wr(qhp, wr);
+ complete_rq_drain_wrs(qhp, wr);
return err;
}
num_wrs = t4_rq_avail(&qhp->wq);
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index e9ea94268d51..79e8ee12c391 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -197,6 +197,11 @@ struct t4_cqe {
#define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M)
#define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S)
+#define CQE_DRAIN_S 10
+#define CQE_DRAIN_M 0x1
+#define CQE_DRAIN_G(x) ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M)
+#define CQE_DRAIN_V(x) ((x)<<CQE_DRAIN_S)
+
#define CQE_STATUS_S 5
#define CQE_STATUS_M 0x1F
#define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M)
@@ -213,6 +218,7 @@ struct t4_cqe {
#define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S)
#define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header)))
+#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header)))
#define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header)))
#define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header)))
#define SQ_TYPE(x) (CQE_TYPE((x)))
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4a9b4d7efe63..8ce9118d4a7f 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1131,7 +1131,6 @@ struct hfi1_devdata {
u16 pcie_lnkctl;
u16 pcie_devctl2;
u32 pci_msix0;
- u32 pci_lnkctl3;
u32 pci_tph2;
/*
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 09e50fd2a08f..8c7e7a60b715 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd)
if (ret)
goto error;
- ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
- dd->pci_lnkctl3);
- if (ret)
- goto error;
-
- ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
- if (ret)
- goto error;
-
+ if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+ ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+ dd->pci_tph2);
+ if (ret)
+ goto error;
+ }
return 0;
error:
@@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd)
if (ret)
goto error;
- ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
- &dd->pci_lnkctl3);
- if (ret)
- goto error;
-
- ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
- if (ret)
- goto error;
-
+ if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+ ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+ &dd->pci_tph2);
+ if (ret)
+ goto error;
+ }
return 0;
error:
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 470995fa38d2..6f6712f87a73 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -47,17 +47,6 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
return err;
}
-int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
- bool reset, void *out, int out_size)
-{
- u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
-
- MLX5_SET(query_cong_statistics_in, in, opcode,
- MLX5_CMD_OP_QUERY_CONG_STATISTICS);
- MLX5_SET(query_cong_statistics_in, in, clear, reset);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size)
{
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index af4c24596274..78ffded7cc2c 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,8 +37,6 @@
#include <linux/mlx5/driver.h>
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
-int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
- bool reset, void *out, int out_size);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size);
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 543d0a4c8bf3..8ac50de2b242 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1463,6 +1463,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
}
INIT_LIST_HEAD(&context->vma_private_list);
+ mutex_init(&context->vma_private_list_mutex);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@@ -1624,7 +1625,9 @@ static void mlx5_ib_vma_close(struct vm_area_struct *area)
* mlx5_ib_disassociate_ucontext().
*/
mlx5_ib_vma_priv_data->vma = NULL;
+ mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
list_del(&mlx5_ib_vma_priv_data->list);
+ mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
kfree(mlx5_ib_vma_priv_data);
}
@@ -1644,10 +1647,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
return -ENOMEM;
vma_prv->vma = vma;
+ vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
vma->vm_private_data = vma_prv;
vma->vm_ops = &mlx5_ib_vm_ops;
+ mutex_lock(&ctx->vma_private_list_mutex);
list_add(&vma_prv->list, vma_head);
+ mutex_unlock(&ctx->vma_private_list_mutex);
return 0;
}
@@ -1690,6 +1696,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
* mlx5_ib_vma_close.
*/
down_write(&owning_mm->mmap_sem);
+ mutex_lock(&context->vma_private_list_mutex);
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
list) {
vma = vma_private->vma;
@@ -1704,6 +1711,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
list_del(&vma_private->list);
kfree(vma_private);
}
+ mutex_unlock(&context->vma_private_list_mutex);
up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
@@ -3737,34 +3745,6 @@ free:
return ret;
}
-static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_port *port,
- struct rdma_hw_stats *stats)
-{
- int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
- void *out;
- int ret, i;
- int offset = port->cnts.num_q_counters;
-
- out = kvzalloc(outlen, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
- if (ret)
- goto free;
-
- for (i = 0; i < port->cnts.num_cong_counters; i++) {
- stats->value[i + offset] =
- be64_to_cpup((__be64 *)(out +
- port->cnts.offsets[i + offset]));
- }
-
-free:
- kvfree(out);
- return ret;
-}
-
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u8 port_num, int index)
@@ -3782,7 +3762,12 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
num_counters = port->cnts.num_q_counters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- ret = mlx5_ib_query_cong_counters(dev, port, stats);
+ ret = mlx5_lag_query_cong_counters(dev->mdev,
+ stats->value +
+ port->cnts.num_q_counters,
+ port->cnts.num_cong_counters,
+ port->cnts.offsets +
+ port->cnts.num_q_counters);
if (ret)
return ret;
num_counters += port->cnts.num_cong_counters;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6dd8cac78de2..2c5f3533bbc9 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -115,6 +115,8 @@ enum {
struct mlx5_ib_vma_private_data {
struct list_head list;
struct vm_area_struct *vma;
+ /* protect vma_private_list add/del */
+ struct mutex *vma_private_list_mutex;
};
struct mlx5_ib_ucontext {
@@ -129,6 +131,8 @@ struct mlx5_ib_ucontext {
/* Transport Domain number */
u32 tdn;
struct list_head vma_private_list;
+ /* protect vma_private_list add/del */
+ struct mutex vma_private_list_mutex;
unsigned long upd_xlt_page;
/* protect ODP/KSM */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ee0ee1f9994b..d109fe8290a7 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1637,6 +1637,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
MLX5_SET(mkc, mkc, umr_en, 1);
+ mr->ibmr.device = pd->device;
err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
if (err)
goto err_destroy_psv;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 63bc2efc34eb..4f7bd3b6a315 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -94,7 +94,7 @@ struct pvrdma_cq {
u32 cq_handle;
bool is_kernel;
atomic_t refcnt;
- wait_queue_head_t wait;
+ struct completion free;
};
struct pvrdma_id_table {
@@ -175,7 +175,7 @@ struct pvrdma_srq {
u32 srq_handle;
int npages;
refcount_t refcnt;
- wait_queue_head_t wait;
+ struct completion free;
};
struct pvrdma_qp {
@@ -197,7 +197,7 @@ struct pvrdma_qp {
bool is_kernel;
struct mutex mutex; /* QP state mutex. */
atomic_t refcnt;
- wait_queue_head_t wait;
+ struct completion free;
};
struct pvrdma_dev {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 3562c0c30492..e529622cefad 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -179,7 +179,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
atomic_set(&cq->refcnt, 1);
- init_waitqueue_head(&cq->wait);
+ init_completion(&cq->free);
spin_lock_init(&cq->cq_lock);
memset(cmd, 0, sizeof(*cmd));
@@ -230,8 +230,9 @@ err_cq:
static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
{
- atomic_dec(&cq->refcnt);
- wait_event(cq->wait, !atomic_read(&cq->refcnt));
+ if (atomic_dec_and_test(&cq->refcnt))
+ complete(&cq->free);
+ wait_for_completion(&cq->free);
if (!cq->is_kernel)
ib_umem_release(cq->umem);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 1f4e18717a00..e92681878c93 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -346,9 +346,8 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
ibqp->event_handler(&e, ibqp->qp_context);
}
if (qp) {
- atomic_dec(&qp->refcnt);
- if (atomic_read(&qp->refcnt) == 0)
- wake_up(&qp->wait);
+ if (atomic_dec_and_test(&qp->refcnt))
+ complete(&qp->free);
}
}
@@ -373,9 +372,8 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
ibcq->event_handler(&e, ibcq->cq_context);
}
if (cq) {
- atomic_dec(&cq->refcnt);
- if (atomic_read(&cq->refcnt) == 0)
- wake_up(&cq->wait);
+ if (atomic_dec_and_test(&cq->refcnt))
+ complete(&cq->free);
}
}
@@ -404,7 +402,7 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
}
if (srq) {
if (refcount_dec_and_test(&srq->refcnt))
- wake_up(&srq->wait);
+ complete(&srq->free);
}
}
@@ -539,9 +537,8 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
if (cq && cq->ibcq.comp_handler)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
if (cq) {
- atomic_dec(&cq->refcnt);
- if (atomic_read(&cq->refcnt))
- wake_up(&cq->wait);
+ if (atomic_dec_and_test(&cq->refcnt))
+ complete(&cq->free);
}
pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 10420a18d02f..4059308e1454 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -246,7 +246,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
spin_lock_init(&qp->rq.lock);
mutex_init(&qp->mutex);
atomic_set(&qp->refcnt, 1);
- init_waitqueue_head(&qp->wait);
+ init_completion(&qp->free);
qp->state = IB_QPS_RESET;
@@ -428,8 +428,16 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
- atomic_dec(&qp->refcnt);
- wait_event(qp->wait, !atomic_read(&qp->refcnt));
+ if (atomic_dec_and_test(&qp->refcnt))
+ complete(&qp->free);
+ wait_for_completion(&qp->free);
+
+ if (!qp->is_kernel) {
+ if (qp->rumem)
+ ib_umem_release(qp->rumem);
+ if (qp->sumem)
+ ib_umem_release(qp->sumem);
+ }
pvrdma_page_dir_cleanup(dev, &qp->pdir);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 826ccb864596..5acebb1ef631 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -149,7 +149,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
spin_lock_init(&srq->lock);
refcount_set(&srq->refcnt, 1);
- init_waitqueue_head(&srq->wait);
+ init_completion(&srq->free);
dev_dbg(&dev->pdev->dev,
"create shared receive queue from user space\n");
@@ -236,8 +236,9 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
dev->srq_tbl[srq->srq_handle] = NULL;
spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
- refcount_dec(&srq->refcnt);
- wait_event(srq->wait, !refcount_read(&srq->refcnt));
+ if (refcount_dec_and_test(&srq->refcnt))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
/* There is no support for kernel clients, so this is safe. */
ib_umem_release(srq->umem);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 3b96cdaf9a83..e6151a29c412 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1236,13 +1236,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) {
- rtnl_lock();
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
ipoib_ib_dev_stop(dev);
- result = ipoib_ib_dev_open(dev);
- rtnl_unlock();
- if (result)
+ if (ipoib_ib_dev_open(dev))
return;
if (netif_queue_stopped(dev))
@@ -1282,7 +1279,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, flush_heavy);
+ rtnl_lock();
__ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
+ rtnl_unlock();
}
void ipoib_ib_dev_cleanup(struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index f26f97fe4666..582b2f18010a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -137,6 +137,17 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+ bool reset, void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
+
+ MLX5_SET(query_cong_statistics_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+ MLX5_SET(query_cong_statistics_in, in, clear, reset);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+
static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
{
return dev->priv.lag;
@@ -633,3 +644,48 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
/* If bonded, we do not add an IB device for PF1. */
return false;
}
+
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+ u64 *values,
+ int num_counters,
+ size_t *offsets)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+ struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
+ struct mlx5_lag *ldev;
+ int num_ports;
+ int ret, i, j;
+ void *out;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ memset(values, 0, sizeof(*values) * num_counters);
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+ if (ldev && mlx5_lag_is_bonded(ldev)) {
+ num_ports = MLX5_MAX_PORTS;
+ mdev[0] = ldev->pf[0].dev;
+ mdev[1] = ldev->pf[1].dev;
+ } else {
+ num_ports = 1;
+ mdev[0] = dev;
+ }
+
+ for (i = 0; i < num_ports; ++i) {
+ ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
+ if (ret)
+ goto unlock;
+
+ for (j = 0; j < num_counters; ++j)
+ values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
+ }
+
+unlock:
+ mutex_unlock(&lag_mutex);
+ kvfree(out);
+ return ret;
+}
+EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 57b109c6e422..1f509d072026 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1165,6 +1165,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+ u64 *values,
+ int num_counters,
+ size_t *offsets);
struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);