aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaike Wan2019-01-23 21:51:49 -0800
committerDoug Ledford2019-02-05 18:07:44 -0500
commitc6c231175ccdf188d443c27e5456b9e2f65e44d4 (patch)
treecc918e4b107e56d44cfc9a99ff1fa032e0abb006
parent3c6cb20a0d17d7a75778fb0935d6fa427c8177af (diff)
IB/hfi1: Add interlock between TID RDMA WRITE and other requests
This locking mechanism is designed to provent vavious memory corruption scenarios from occurring when requests are pipelined, especially when RDMA WRITE requests are interleaved with TID RDMA READ requests: 1. READ-AFTER-READ; 2. READ-AFTER-WRITE; 3. WRITE-AFTER-READ; 4. WRITE-AFTER-WRITE. When memory corruption is likely, a request will be held back until previous requests have been completed. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com> Signed-off-by: Kaike Wan <kaike.wan@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c6
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c46
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h9
3 files changed, 59 insertions, 2 deletions
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 6d2abea896e5..cfb863364f50 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -173,6 +173,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
}
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ /* Check for tid write fence */
+ if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
+ hfi1_tid_rdma_ack_interlock(qp, e)) {
+ iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
+ goto bail;
+ }
if (e->opcode == OP(RDMA_READ_REQUEST)) {
/*
* If a RDMA read response is being resent and
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 490e47a0f68b..286752011f25 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -2179,6 +2179,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
req->state = TID_REQUEST_RESEND;
req->cur_seg = req->comp_seg;
}
+ qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
}
/* Re-process old requests.*/
if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
@@ -3229,6 +3230,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
struct rvt_swqe *prev;
struct hfi1_qp_priv *priv = qp->priv;
u32 s_prev;
+ struct tid_rdma_request *req;
s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
prev = rvt_get_swqe_ptr(qp, s_prev);
@@ -3240,14 +3242,28 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_RDMA_WRITE:
+ switch (prev->wr.opcode) {
+ case IB_WR_TID_RDMA_WRITE:
+ req = wqe_to_tid_req(prev);
+ if (req->ack_seg != req->total_segs)
+ goto interlock;
+ default:
+ break;
+ }
case IB_WR_RDMA_READ:
- break;
+ if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
+ break;
+ /* fall through */
case IB_WR_TID_RDMA_READ:
switch (prev->wr.opcode) {
case IB_WR_RDMA_READ:
if (qp->s_acked != qp->s_cur)
goto interlock;
break;
+ case IB_WR_TID_RDMA_WRITE:
+ req = wqe_to_tid_req(prev);
+ if (req->ack_seg != req->total_segs)
+ goto interlock;
default:
break;
}
@@ -5157,7 +5173,9 @@ static int make_tid_rdma_ack(struct rvt_qp *qp,
e = &qp->s_ack_queue[qpriv->r_tid_ack];
req = ack_to_tid_req(e);
flow = req->acked_tail;
- }
+ } else if (req->ack_seg == req->total_segs &&
+ qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
+ qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
&bth2);
@@ -5310,3 +5328,27 @@ bool hfi1_schedule_tid_send(struct rvt_qp *qp)
IOWAIT_PENDING_TID);
return false;
}
+
+bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
+{
+ struct rvt_ack_entry *prev;
+ struct tid_rdma_request *req;
+ struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+ struct hfi1_qp_priv *priv = qp->priv;
+ u32 s_prev;
+
+ s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
+ (qp->s_tail_ack_queue - 1);
+ prev = &qp->s_ack_queue[s_prev];
+
+ if ((e->opcode == TID_OP(READ_REQ) ||
+ e->opcode == OP(RDMA_READ_REQUEST)) &&
+ prev->opcode == TID_OP(WRITE_REQ)) {
+ req = ack_to_tid_req(prev);
+ if (req->ack_seg != req->total_segs) {
+ priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 7f8f17ba6c14..44468188a374 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -25,6 +25,7 @@
* s_flags, there are no collisions.
*
* HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
+ * HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
*/
#define HFI1_S_TID_BUSY_SET BIT(0)
/* BIT(1) reserved for RVT_S_BUSY. */
@@ -32,9 +33,15 @@
/* BIT(3) reserved for RVT_S_RESP_PENDING. */
/* BIT(4) reserved for RVT_S_ACK_PENDING. */
#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
+#define HFI1_R_TID_WAIT_INTERLCK BIT(6)
/* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
+/* BIT(16) reserved for RVT_S_SEND_ONE */
#define HFI1_S_TID_RETRY_TIMER BIT(17)
+/* BIT(18) reserved for RVT_S_ECN. */
#define HFI1_R_TID_SW_PSN BIT(19)
+/* BIT(26) reserved for HFI1_S_WAIT_HALT */
+/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
+/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
/*
* Unlike regular IB RDMA VERBS, which do not require an entry
@@ -309,4 +316,6 @@ void _hfi1_do_tid_send(struct work_struct *work);
bool hfi1_schedule_tid_send(struct rvt_qp *qp);
+bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);
+
#endif /* HFI1_TID_RDMA_H */