From e463c7b197dbe64b8a99b0612c65f286937e5bf1 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 29 Apr 2008 13:46:50 -0700 Subject: mlx4_core: Add a way to set the "collapsed" CQ flag Extend the mlx4_cq_resize() API with a way to set the "collapsed" flag for the CQ being created. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/net/mlx4/cq.c | 4 +++- include/linux/mlx4/device.h | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index e3dddfc687f9..2f199c5c4a72 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -221,7 +221,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector } err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq); + cq->db.dma, &cq->mcq, 0); if (err) goto err_dbmap; diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c index 6fda0af9d0a6..95e87a2f8896 100644 --- a/drivers/net/mlx4/cq.c +++ b/drivers/net/mlx4/cq.c @@ -188,7 +188,8 @@ int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq, EXPORT_SYMBOL_GPL(mlx4_cq_resize); int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, - struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq) + struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, + int collapsed) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; @@ -224,6 +225,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, cq_context = mailbox->buf; memset(cq_context, 0, sizeof *cq_context); + cq_context->flags = cpu_to_be32(!!collapsed << 18); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); cq_context->comp_eqn = priv->eq_table.eq[MLX4_EQ_COMP].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9fa1a8002ce2..a744383d16e9 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -382,7 +382,8 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int size); int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, - struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq); + struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, + int collapsed); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp); -- cgit v1.2.3 From 989a1780698c65dfe093a6aa89ceeff84c31f528 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 29 Apr 2008 13:46:51 -0700 Subject: RDMA/cxgb3: Correctly serialize peer abort path Open MPI and other stress testing exposed a few bad bugs in handling aborts in the middle of a normal close. Fix these by: - serializing abort reply and peer abort processing with disconnect processing - warning (and ignoring) if ep timer is stopped when it wasn't running - cleaning up disconnect path to correctly deal with aborting and dead endpoints - in iwch_modify_qp(), taking a ref on the ep before releasing the qp lock if iwch_ep_disconnect() will be called. The ref is dropped after calling disconnect. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 100 ++++++++++++++++++++++------------ drivers/infiniband/hw/cxgb3/iwch_cm.h | 1 + drivers/infiniband/hw/cxgb3/iwch_qp.c | 6 +- 3 files changed, 72 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 72ca360c3dbc..0b515d899f6c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -125,6 +125,12 @@ static void start_ep_timer(struct iwch_ep *ep) static void stop_ep_timer(struct iwch_ep *ep) { PDBG("%s ep %p\n", __func__, ep); + if (!timer_pending(&ep->timer)) { + printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", + __func__, ep, ep->com.state); + WARN_ON(1); + return; + } del_timer_sync(&ep->timer); put_ep(&ep->com); } @@ -1083,8 +1089,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; + unsigned long flags; + int release = 0; PDBG("%s ep %p\n", __func__, ep); + BUG_ON(!ep); /* * We get 2 abort replies from the HW. The first one must @@ -1095,9 +1104,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } - close_complete_upcall(ep); - state_set(&ep->com, DEAD); - release_ep_resources(ep); + spin_lock_irqsave(&ep->com.lock, flags); + switch (ep->com.state) { + case ABORTING: + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + default: + printk(KERN_ERR "%s ep %p state %d\n", + __func__, ep, ep->com.state); + break; + } + spin_unlock_irqrestore(&ep->com.lock, flags); + + if (release) + release_ep_resources(ep); return CPL_RET_BUF_DONE; } @@ -1470,7 +1492,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct sk_buff *rpl_skb; struct iwch_qp_attributes attrs; int ret; - int state; + int release = 0; + unsigned long flags; if (is_neg_adv_abort(req->status)) { PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, @@ -1488,9 +1511,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } - state = state_read(&ep->com); - PDBG("%s ep %p state %u\n", __func__, ep, state); - switch (state) { + spin_lock_irqsave(&ep->com.lock, flags); + PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state); + switch (ep->com.state) { case CONNECTING: break; case MPA_REQ_WAIT: @@ -1536,21 +1559,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) break; case DEAD: PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); + spin_unlock_irqrestore(&ep->com.lock, flags); return CPL_RET_BUF_DONE; default: BUG_ON(1); break; } dst_confirm(ep->dst); + if (ep->com.state != ABORTING) { + __state_set(&ep->com, DEAD); + release = 1; + } + spin_unlock_irqrestore(&ep->com.lock, flags); rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); if (!rpl_skb) { printk(KERN_ERR MOD "%s - cannot allocate skb!\n", __func__); - dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); - put_ep(&ep->com); - return CPL_RET_BUF_DONE; + release = 1; + goto out; } rpl_skb->priority = CPL_PRIORITY_DATA; rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); @@ -1559,10 +1586,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); rpl->cmd = CPL_ABORT_NO_RST; cxgb3_ofld_send(ep->com.tdev, rpl_skb); - if (state != ABORTING) { - state_set(&ep->com, DEAD); +out: + if (release) release_ep_resources(ep); - } return CPL_RET_BUF_DONE; } @@ -1661,15 +1687,18 @@ static void ep_timeout(unsigned long arg) struct iwch_ep *ep = (struct iwch_ep *)arg; struct iwch_qp_attributes attrs; unsigned long flags; + int abort = 1; spin_lock_irqsave(&ep->com.lock, flags); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); switch (ep->com.state) { case MPA_REQ_SENT: + __state_set(&ep->com, ABORTING); connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: + __state_set(&ep->com, ABORTING); break; case CLOSING: case MORIBUND: @@ -1679,13 +1708,17 @@ static void ep_timeout(unsigned long arg) ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); } + __state_set(&ep->com, ABORTING); break; default: - BUG(); + printk(KERN_ERR "%s unexpected state ep %p state %u\n", + __func__, ep, ep->com.state); + WARN_ON(1); + abort = 0; } - __state_set(&ep->com, CLOSING); spin_unlock_irqrestore(&ep->com.lock, flags); - abort_connection(ep, NULL, GFP_ATOMIC); + if (abort) + abort_connection(ep, NULL, GFP_ATOMIC); put_ep(&ep->com); } @@ -1968,40 +2001,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, states[ep->com.state], abrupt); - if (ep->com.state == DEAD) { - PDBG("%s already dead ep %p\n", __func__, ep); - goto out; - } - - if (abrupt) { - if (ep->com.state != ABORTING) { - ep->com.state = ABORTING; - close = 1; - } - goto out; - } - switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: - start_ep_timer(ep); - ep->com.state = CLOSING; close = 1; + if (abrupt) + ep->com.state = ABORTING; + else { + ep->com.state = CLOSING; + start_ep_timer(ep); + } break; case CLOSING: - ep->com.state = MORIBUND; close = 1; + if (abrupt) { + stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; break; case MORIBUND: + case ABORTING: + case DEAD: + PDBG("%s ignoring disconnect ep %p state %u\n", + __func__, ep, ep->com.state); break; default: BUG(); break; } -out: + spin_unlock_irqrestore(&ep->com.lock, flags); if (close) { if (abrupt) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 2bb7fbdb3ff4..a2f1b787d970 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -56,6 +56,7 @@ #define put_ep(ep) { \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ ep, atomic_read(&((ep)->kref.refcount))); \ + WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ kref_put(&((ep)->kref), __free_ep); \ } diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 8891c3b0a3d5..6cd484e11c11 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -832,6 +832,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, abort=0; disconnect = 1; ep = qhp->ep; + get_ep(&ep->com); } flush_qp(qhp, &flag); break; @@ -848,6 +849,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, abort=1; disconnect = 1; ep = qhp->ep; + get_ep(&ep->com); } goto err; break; @@ -929,8 +931,10 @@ out: * on the EP. This can be a normal close (RTS->CLOSING) or * an abnormal close (RTS/CLOSING->ERROR). */ - if (disconnect) + if (disconnect) { iwch_ep_disconnect(ep, abort, GFP_KERNEL); + put_ep(&ep->com); + } /* * If free is 1, then we've disassociated the EP from the QP -- cgit v1.2.3 From ccaf10d0ad17bf755750160ebe594de7261a893e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 29 Apr 2008 13:46:52 -0700 Subject: RDMA/cxgb3: Set the max_mr_size device attribute correctly cxgb3 only supports 4GB memory regions. The lustre RDMA code uses this attribute and currently has to code around our bad setting. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.h | 1 + drivers/infiniband/hw/cxgb3/iwch.c | 1 + drivers/infiniband/hw/cxgb3/iwch.h | 1 + drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 99543d634704..2bcff7f5046e 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -53,6 +53,7 @@ #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 #define T3_MAX_NUM_STAG (1<<15) +#define T3_MAX_MR_SIZE 0x100000000ULL #define T3_STAG_UNSET 0xffffffff diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 6ba4138c8ec3..71554eacb13c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -83,6 +83,7 @@ static void rnic_init(struct iwch_dev *rnicp) rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */ + rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; rnicp->attr.can_resize_wq = 0; rnicp->attr.max_rdma_reads_per_qp = 8; rnicp->attr.max_rdma_read_resources = diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index 9ad9b1e7c8c1..d2409a505e8d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -66,6 +66,7 @@ struct iwch_rnic_attributes { * size (4k)^i. Phys block list mode unsupported. */ u32 mem_pgsizes_bitmask; + u64 max_mr_size; u8 can_resize_wq; /* diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index e343e9e64844..d07d3a377b5f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -998,7 +998,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->device_cap_flags = dev->device_cap_flags; props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; - props->max_mr_size = ~0ull; + props->max_mr_size = dev->attr.max_mr_size; props->max_qp = dev->attr.max_qps; props->max_qp_wr = dev->attr.max_wrs; props->max_sge = dev->attr.max_sge_per_wr; -- cgit v1.2.3 From f8b0dfd15277974b5c9f3ff17f9e3ab6fdbe45ee Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 29 Apr 2008 13:46:52 -0700 Subject: RDMA/cxgb3: Support peer-2-peer connection setup Open MPI, Intel MPI and other applications don't respect the iWARP requirement that the client (active) side of the connection send the first RDMA message. This class of application connection setup is called peer-to-peer. Typically once the connection is setup, _both_ sides want to send data. This patch enables supporting peer-to-peer over the chelsio RNIC by enforcing this iWARP requirement in the driver itself as part of RDMA connection setup. Connection setup is extended, when the peer2peer module option is 1, such that the MPA initiator will send a 0B Read (the RTR) just after connection setup. The MPA responder will suspend SQ processing until the RTR message is received and reply-to. In the longer term, this will be handled in a standardized way by enhancing the MPA negotiation so peers can indicate whether they want/need the RTR and what type of RTR (0B read, 0B write, or 0B send) should be sent. This will be done by standardizing a few bits of the private data in order to negotiate all this. However this patch enables peer-to-peer applications now and allows most of the required firmware and driver changes to be done and tested now. Design: - Add a module option, peer2peer, to enable this mode. - New firmware support for peer-to-peer mode: - a new bit in the rdma_init WR to tell it to do peer-2-peer and what form of RTR message to send or expect. - process _all_ preposted recvs before moving the connection into rdma mode. - passive side: defer completing the rdma_init WR until all pre-posted recvs are processed. Suspend SQ processing until the RTR is received. - active side: expect and process the 0B read WR on offload TX queue. Defer completing the rdma_init WR until all pre-posted recvs are processed. Suspend SQ processing until the 0B read WR is processed from the offload TX queue. - If peer2peer is set, driver posts 0B read request on offload TX queue just after posting the rdma_init WR to the offload TX queue. - Add CQ poll logic to ignore unsolicitied read responses. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 18 +++++++- drivers/infiniband/hw/cxgb3/cxio_wr.h | 21 +++++++-- drivers/infiniband/hw/cxgb3/iwch_cm.c | 67 +++++++++++++++++++++-------- drivers/infiniband/hw/cxgb3/iwch_cm.h | 1 + drivers/infiniband/hw/cxgb3/iwch_provider.h | 3 ++ drivers/infiniband/hw/cxgb3/iwch_qp.c | 54 +++++++++++++++++++++-- drivers/net/cxgb3/version.h | 2 +- 7 files changed, 137 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 66eb7030aea8..ed2ee4ba4b7c 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -456,7 +456,8 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) ptr = cq->sw_rptr; while (!Q_EMPTY(ptr, cq->sw_wptr)) { cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) && + if ((SQ_TYPE(*cqe) || + ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) && (CQE_QPID(*cqe) == wq->qpid)) (*count)++; ptr++; @@ -829,7 +830,8 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) wqe->mpaattrs = attr->mpaattrs; wqe->qpcaps = attr->qpcaps; wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); - wqe->flags = cpu_to_be32(attr->flags); + wqe->rqe_count = cpu_to_be16(attr->rqe_count); + wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type)); wqe->ord = cpu_to_be32(attr->ord); wqe->ird = cpu_to_be32(attr->ird); wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); @@ -1134,6 +1136,18 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, */ if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { + /* + * If this is an unsolicited read response, then the read + * was generated by the kernel driver as part of peer-2-peer + * connection setup. So ignore the completion. + */ + if (!wq->oldest_read) { + if (CQE_STATUS(*hw_cqe)) + wq->error = 1; + ret = -1; + goto skip_cqe; + } + /* * Don't write to the HWCQ, so create a new read req CQE * in local memory. diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index 969d4d928455..f1a25a821a45 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -278,6 +278,17 @@ enum t3_qp_caps { uP_RI_QP_STAG0_ENABLE = 0x10 } __attribute__ ((packed)); +enum rdma_init_rtr_types { + RTR_READ = 1, + RTR_WRITE = 2, + RTR_SEND = 3, +}; + +#define S_RTR_TYPE 2 +#define M_RTR_TYPE 0x3 +#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) +#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) + struct t3_rdma_init_attr { u32 tid; u32 qpid; @@ -293,7 +304,9 @@ struct t3_rdma_init_attr { u32 ird; u64 qp_dma_addr; u32 qp_dma_size; - u32 flags; + enum rdma_init_rtr_types rtr_type; + u16 flags; + u16 rqe_count; u32 irs; }; @@ -309,8 +322,8 @@ struct t3_rdma_init_wr { u8 mpaattrs; /* 5 */ u8 qpcaps; __be16 ulpdu_size; - __be32 flags; /* bits 31-1 - reservered */ - /* bit 0 - set if RECV posted */ + __be16 flags_rtr_type; + __be16 rqe_count; __be32 ord; /* 6 */ __be32 ird; __be64 qp_dma_addr; /* 7 */ @@ -324,7 +337,7 @@ struct t3_genbit { }; enum rdma_init_wr_flags { - RECVS_POSTED = (1<<0), + MPA_INITIATOR = (1<<0), PRIV_QP = (1<<1), }; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 0b515d899f6c..d44a6df9ad8c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -63,6 +63,10 @@ static char *states[] = { NULL, }; +int peer2peer = 0; +module_param(peer2peer, int, 0644); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); + static int ep_timeout_secs = 10; module_param(ep_timeout_secs, int, 0644); MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " @@ -514,7 +518,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | @@ -565,7 +569,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) set_arp_failure_handler(skb, arp_failure_discard); skb_reset_transport_header(skb); req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); req->len = htonl(mpalen); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | @@ -617,7 +621,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | @@ -885,6 +889,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) * the MPA header is valid. */ state_set(&ep->com, FPDU_MODE); + ep->mpa_attr.initiator = 1; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -907,8 +912,14 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) /* bind QP and TID with INIT_WR */ err = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); - if (!err) - goto out; + if (err) + goto err; + + if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { + iwch_post_zb_read(ep->com.qp); + } + + goto out; err: abort_connection(ep, skb, GFP_KERNEL); out: @@ -1001,6 +1012,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) * If we get here we have accumulated the entire mpa * start reply message including private data. */ + ep->mpa_attr.initiator = 0; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -1071,17 +1083,33 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p credits %u\n", __func__, ep, credits); - if (credits == 0) + if (credits == 0) { + PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n", + __func__, ep, state_read(&ep->com)); return CPL_RET_BUF_DONE; + } + BUG_ON(credits != 1); - BUG_ON(ep->mpa_skb == NULL); - kfree_skb(ep->mpa_skb); - ep->mpa_skb = NULL; dst_confirm(ep->dst); - if (state_read(&ep->com) == MPA_REP_SENT) { - ep->com.rpl_done = 1; - PDBG("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); + if (!ep->mpa_skb) { + PDBG("%s rdma_init wr_ack ep %p state %u\n", + __func__, ep, state_read(&ep->com)); + if (ep->mpa_attr.initiator) { + PDBG("%s initiator ep %p state %u\n", + __func__, ep, state_read(&ep->com)); + if (peer2peer) + iwch_post_zb_read(ep->com.qp); + } else { + PDBG("%s responder ep %p state %u\n", + __func__, ep, state_read(&ep->com)); + ep->com.rpl_done = 1; + wake_up(&ep->com.waitq); + } + } else { + PDBG("%s lsm ack ep %p state %u freeing skb\n", + __func__, ep, state_read(&ep->com)); + kfree_skb(ep->mpa_skb); + ep->mpa_skb = NULL; } return CPL_RET_BUF_DONE; } @@ -1795,16 +1823,19 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (err) goto err; + /* if needed, wait for wr_ack */ + if (iwch_rqes_posted(qp)) { + wait_event(ep->com.waitq, ep->com.rpl_done); + err = ep->com.rpl_err; + if (err) + goto err; + } + err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); if (err) goto err; - /* wait for wr_ack */ - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (err) - goto err; state_set(&ep->com, FPDU_MODE); established_upcall(ep); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index a2f1b787d970..d7c7e09f0996 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -226,5 +226,6 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, st int __init iwch_cm_init(void); void __exit iwch_cm_term(void); +extern int peer2peer; #endif /* _IWCH_CM_H_ */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 61356f91109d..db5100d27ca2 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -118,6 +118,7 @@ enum IWCH_QP_FLAGS { }; struct iwch_mpa_attributes { + u8 initiator; u8 recv_marker_enabled; u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ u8 crc_enabled; @@ -322,6 +323,7 @@ enum iwch_qp_query_flags { IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ }; +u16 iwch_rqes_posted(struct iwch_qp *qhp); int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, @@ -331,6 +333,7 @@ int iwch_bind_mw(struct ib_qp *qp, struct ib_mw_bind *mw_bind); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); +int iwch_post_zb_read(struct iwch_qp *qhp); int iwch_register_device(struct iwch_dev *dev); void iwch_unregister_device(struct iwch_dev *dev); int iwch_quiesce_qps(struct iwch_cq *chp); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 6cd484e11c11..9b4be889c58e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -586,6 +586,36 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg, } } +int iwch_post_zb_read(struct iwch_qp *qhp) +{ + union t3_wr *wqe; + struct sk_buff *skb; + u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; + + PDBG("%s enter\n", __func__); + skb = alloc_skb(40, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR "%s cannot send zb_read!!\n", __func__); + return -ENOMEM; + } + wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr)); + memset(wqe, 0, sizeof(struct t3_rdma_read_wr)); + wqe->read.rdmaop = T3_READ_REQ; + wqe->read.reserved[0] = 0; + wqe->read.reserved[1] = 0; + wqe->read.reserved[2] = 0; + wqe->read.rem_stag = cpu_to_be32(1); + wqe->read.rem_to = cpu_to_be64(1); + wqe->read.local_stag = cpu_to_be32(1); + wqe->read.local_len = cpu_to_be32(0); + wqe->read.local_to = cpu_to_be64(1); + wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)| + V_FW_RIWR_LEN(flit_cnt)); + skb->priority = CPL_PRIORITY_DATA; + return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); +} + /* * This posts a TERMINATE with layer=RDMA, type=catastrophic. */ @@ -671,11 +701,18 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) /* - * Return non zero if at least one RECV was pre-posted. + * Return count of RECV WRs posted */ -static int rqes_posted(struct iwch_qp *qhp) +u16 iwch_rqes_posted(struct iwch_qp *qhp) { - return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV; + union t3_wr *wqe = qhp->wq.queue; + u16 count = 0; + while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { + count++; + wqe++; + } + PDBG("%s qhp %p count %u\n", __func__, qhp, count); + return count; } static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, @@ -716,8 +753,17 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.ird = qhp->attr.max_ird; init_attr.qp_dma_addr = qhp->wq.dma_addr; init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); - init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; + init_attr.rqe_count = iwch_rqes_posted(qhp); + init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; + if (peer2peer) { + init_attr.rtr_type = RTR_READ; + if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) + init_attr.ord = 1; + if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator) + init_attr.ird = 1; + } else + init_attr.rtr_type = 0; init_attr.irs = qhp->ep->rcv_seq; PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " "flags 0x%x qpcaps 0x%x\n", __func__, diff --git a/drivers/net/cxgb3/version.h b/drivers/net/cxgb3/version.h index 229303ff6a39..a0177fc55e28 100644 --- a/drivers/net/cxgb3/version.h +++ b/drivers/net/cxgb3/version.h @@ -38,7 +38,7 @@ #define DRV_VERSION "1.0-ko" /* Firmware version */ -#define FW_VERSION_MAJOR 5 +#define FW_VERSION_MAJOR 6 #define FW_VERSION_MINOR 0 #define FW_VERSION_MICRO 0 #endif /* __CHELSIO_VERSION_H */ -- cgit v1.2.3 From 7df109d917e85d3da2e25bd495c4997e87ed2a4e Mon Sep 17 00:00:00 2001 From: Hoang-Nam Nguyen Date: Tue, 29 Apr 2008 13:46:52 -0700 Subject: IB/ehca: handle negative return value from ibmebus_request_irq() properly ehca_create_eq() was assigning a signed return value to an unsiged local variable and then checking if the variable was < 0, which meant that errors were always ignored. Fix this by using one variable for signed integer return values and another for u64 hcall return values. Bug originally found by Roel Kluin <12o3l@tiscali.nl>. Signed-off-by: Hoang-Nam Nguyen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_eq.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index b4ac617a70e6..49660dfa1867 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -54,7 +54,8 @@ int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, const enum ehca_eq_type type, const u32 length) { - u64 ret; + int ret; + u64 h_ret; u32 nr_pages; u32 i; void *vpage; @@ -73,15 +74,15 @@ int ehca_create_eq(struct ehca_shca *shca, return -EINVAL; } - ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, - &eq->pf, - type, - length, - &eq->ipz_eq_handle, - &eq->length, - &nr_pages, &eq->ist); + h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); - if (ret != H_SUCCESS) { + if (h_ret != H_SUCCESS) { ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); return -EINVAL; } @@ -97,24 +98,22 @@ int ehca_create_eq(struct ehca_shca *shca, u64 rpage; vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (!vpage) { - ret = H_RESOURCE; + if (!vpage) goto create_eq_exit2; - } rpage = virt_to_abs(vpage); - ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, - eq->ipz_eq_handle, - &eq->pf, - 0, 0, rpage, 1); + h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); if (i == (nr_pages - 1)) { /* last page */ vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (ret != H_SUCCESS || vpage) + if (h_ret != H_SUCCESS || vpage) goto create_eq_exit2; } else { - if (ret != H_PAGE_REGISTERED || !vpage) + if (h_ret != H_PAGE_REGISTERED || !vpage) goto create_eq_exit2; } } -- cgit v1.2.3 From 6f735e36bad6fa4949271b3c3d0f331aad812313 Mon Sep 17 00:00:00 2001 From: Eli Dorfman Date: Tue, 29 Apr 2008 13:46:52 -0700 Subject: IB/iser: Move high-volume debug output to higher debug level Add another level for debug. Signed-off-by: Eli Dorfman Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 7 +++++++ drivers/infiniband/ulp/iser/iser_memory.c | 7 +++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 1ee867b1b341..a8c1b300e34d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -70,6 +70,13 @@ #define DRV_DATE "May 7th, 2006" #define iser_dbg(fmt, arg...) \ + do { \ + if (iser_debug_level > 1) \ + printk(KERN_DEBUG PFX "%s:" fmt,\ + __func__ , ## arg); \ + } while (0) + +#define iser_warn(fmt, arg...) \ do { \ if (iser_debug_level > 0) \ printk(KERN_DEBUG PFX "%s:" fmt,\ diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 4a17743a639f..ee58199136a8 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -334,8 +334,11 @@ static void iser_data_buf_dump(struct iser_data_buf *data, struct scatterlist *sg; int i; + if (iser_debug_level == 0) + return; + for_each_sg(sgl, sg, data->dma_nents, i) - iser_err("sg[%d] dma_addr:0x%lX page:0x%p " + iser_warn("sg[%d] dma_addr:0x%lX page:0x%p " "off:0x%x sz:0x%x dma_len:0x%x\n", i, (unsigned long)ib_sg_dma_address(ibdev, sg), sg_page(sg), sg->offset, @@ -434,7 +437,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { - iser_err("rdma alignment violation %d/%d aligned\n", + iser_warn("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); iser_data_buf_dump(mem, ibdev); -- cgit v1.2.3 From 87528227dfa8776d12779d073c217f0835fd6d20 Mon Sep 17 00:00:00 2001 From: Eli Dorfman Date: Tue, 29 Apr 2008 13:46:52 -0700 Subject: IB/iser: Count FMR alignment violations per session Count FMR alignment violations per session as part of the iscsi statistics. Signed-off-by: Eli Dorfman Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 4 +++- drivers/infiniband/ulp/iser/iser_memory.c | 2 ++ include/scsi/libiscsi.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index be1b9fbd416d..aeb58cae9a3f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -473,13 +473,15 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */ stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; - stats->custom_length = 3; + stats->custom_length = 4; strcpy(stats->custom[0].desc, "qp_tx_queue_full"); stats->custom[0].value = 0; /* TB iser_conn->qp_tx_queue_full; */ strcpy(stats->custom[1].desc, "fmr_map_not_avail"); stats->custom[1].value = 0; /* TB iser_conn->fmr_map_not_avail */; strcpy(stats->custom[2].desc, "eh_abort_cnt"); stats->custom[2].value = conn->eh_abort_cnt; + strcpy(stats->custom[3].desc, "fmr_unalign_cnt"); + stats->custom[3].value = conn->fmr_unalign_cnt; } static int diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index ee58199136a8..cac50c4dc159 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -423,6 +423,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, enum iser_data_dir cmd_dir) { + struct iscsi_conn *iscsi_conn = iser_ctask->iser_conn->iscsi_conn; struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; @@ -437,6 +438,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { + iscsi_conn->fmr_unalign_cnt++; iser_warn("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); iser_data_buf_dump(mem, ibdev); diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 7b90b63fb5c7..cd3ca63d4fb1 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -225,6 +225,7 @@ struct iscsi_conn { /* custom statistics */ uint32_t eh_abort_cnt; + uint32_t fmr_unalign_cnt; }; struct iscsi_pool { -- cgit v1.2.3 From f56bcd8013566d4ad4759ae5fc85a6660e4655c7 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 29 Apr 2008 13:46:53 -0700 Subject: IPoIB: Use separate CQ for UD send completions Use a dedicated CQ for UD send completions. Also, do not arm the UD send CQ, which reduces the number of interrupts generated. This patch farther reduces overhead by not calling poll CQ for every posted send WR -- it does polls only when there 16 or more outstanding work requests. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 +++-- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 ++--- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 45 ++++++++++++++++------------ drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 +- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 39 ++++++++++++++++-------- 6 files changed, 64 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index f1f142dc64b1..9044f8803532 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -95,6 +95,8 @@ enum { IPOIB_MCAST_FLAG_SENDONLY = 1, IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + + MAX_SEND_CQE = 16, }; #define IPOIB_OP_RECV (1ul << 31) @@ -285,7 +287,8 @@ struct ipoib_dev_priv { u16 pkey_index; struct ib_pd *pd; struct ib_mr *mr; - struct ib_cq *cq; + struct ib_cq *recv_cq; + struct ib_cq *send_cq; struct ib_qp *qp; u32 qkey; @@ -305,6 +308,7 @@ struct ipoib_dev_priv { struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_send_wr tx_wr; unsigned tx_outstanding; + struct ib_wc send_wc[MAX_SEND_CQE]; struct ib_recv_wr rx_wr; struct ib_sge rx_sge[IPOIB_UD_RX_SG]; @@ -662,7 +666,6 @@ static inline int ipoib_register_debugfs(void) { return 0; } static inline void ipoib_unregister_debugfs(void) { } #endif - #define ipoib_printk(level, priv, format, arg...) \ printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) #define ipoib_warn(priv, format, arg...) \ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 9db7b0bd9134..97e67d36378f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -249,8 +249,8 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { .event_handler = ipoib_cm_rx_event_handler, - .send_cq = priv->cq, /* For drain WR */ - .recv_cq = priv->cq, + .send_cq = priv->recv_cq, /* For drain WR */ + .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = 1, /* For drain WR */ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ @@ -951,8 +951,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { - .send_cq = priv->cq, - .recv_cq = priv->cq, + .send_cq = priv->recv_cq, + .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = ipoib_sendq_size, .cap.max_send_sge = 1, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 9a47428366c9..10279b79c44d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -71,7 +71,7 @@ static int ipoib_set_coalesce(struct net_device *dev, coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; - ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames, + ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 7cf1fa7074ab..97b815c1a3fc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -364,7 +364,6 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id; struct ipoib_tx_buf *tx_req; - unsigned long flags; ipoib_dbg_data(priv, "send completion: id %d, status: %d\n", wr_id, wc->status); @@ -384,13 +383,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); ++priv->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) @@ -399,6 +396,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) wc->status, wr_id, wc->vendor_err); } +static int poll_tx(struct ipoib_dev_priv *priv) +{ + int n, i; + + n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc); + for (i = 0; i < n; ++i) + ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i); + + return n == MAX_SEND_CQE; +} + int ipoib_poll(struct napi_struct *napi, int budget) { struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); @@ -414,7 +422,7 @@ poll_more: int max = (budget - done); t = min(IPOIB_NUM_WC, max); - n = ib_poll_cq(priv->cq, t, priv->ibwc); + n = ib_poll_cq(priv->recv_cq, t, priv->ibwc); for (i = 0; i < n; i++) { struct ib_wc *wc = priv->ibwc + i; @@ -425,12 +433,8 @@ poll_more: ipoib_cm_handle_rx_wc(dev, wc); else ipoib_ib_handle_rx_wc(dev, wc); - } else { - if (wc->wr_id & IPOIB_OP_CM) - ipoib_cm_handle_tx_wc(dev, wc); - else - ipoib_ib_handle_tx_wc(dev, wc); - } + } else + ipoib_cm_handle_tx_wc(priv->dev, wc); } if (n != t) @@ -439,7 +443,7 @@ poll_more: if (done < budget) { netif_rx_complete(dev, napi); - if (unlikely(ib_req_notify_cq(priv->cq, + if (unlikely(ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && netif_rx_reschedule(dev, napi)) @@ -562,12 +566,16 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, address->last_send = priv->tx_head; ++priv->tx_head; + skb_orphan(skb); if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); netif_stop_queue(dev); } } + + if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) + poll_tx(priv); } static void __ipoib_reap_ah(struct net_device *dev) @@ -714,7 +722,7 @@ void ipoib_drain_cq(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); int i, n; do { - n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc); + n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc); for (i = 0; i < n; ++i) { /* * Convert any successful completions to flush @@ -729,14 +737,13 @@ void ipoib_drain_cq(struct net_device *dev) ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); else ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); - } else { - if (priv->ibwc[i].wr_id & IPOIB_OP_CM) - ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); - else - ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); - } + } else + ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); } } while (n == IPOIB_NUM_WC); + + while (poll_tx(priv)) + ; /* nothing */ } int ipoib_ib_dev_stop(struct net_device *dev, int flush) @@ -826,7 +833,7 @@ timeout: msleep(1); } - ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7a4ed9d3d844..2442090ac8d1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1298,7 +1298,8 @@ static int __init ipoib_init_module(void) ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); - ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); + ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, + IPOIB_MIN_QUEUE_SIZE)); #ifdef CONFIG_INFINIBAND_IPOIB_CM ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 07c03f178a49..c1e7ece1fd44 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -171,26 +171,33 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) goto out_free_pd; } - size = ipoib_sendq_size + ipoib_recvq_size + 1; + size = ipoib_recvq_size + 1; ret = ipoib_cm_dev_init(dev); if (!ret) { + size += ipoib_sendq_size; if (ipoib_cm_has_srq(dev)) size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */ else size += ipoib_recvq_size * ipoib_max_conn_qp; } - priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); - if (IS_ERR(priv->cq)) { - printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); + priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); + if (IS_ERR(priv->recv_cq)) { + printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_free_mr; } - if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) - goto out_free_cq; + priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0); + if (IS_ERR(priv->send_cq)) { + printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); + goto out_free_recv_cq; + } + + if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP)) + goto out_free_send_cq; - init_attr.send_cq = priv->cq; - init_attr.recv_cq = priv->cq; + init_attr.send_cq = priv->send_cq; + init_attr.recv_cq = priv->recv_cq; if (priv->hca_caps & IB_DEVICE_UD_TSO) init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; @@ -201,7 +208,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); - goto out_free_cq; + goto out_free_send_cq; } priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; @@ -230,8 +237,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) return 0; -out_free_cq: - ib_destroy_cq(priv->cq); +out_free_send_cq: + ib_destroy_cq(priv->send_cq); + +out_free_recv_cq: + ib_destroy_cq(priv->recv_cq); out_free_mr: ib_dereg_mr(priv->mr); @@ -254,8 +264,11 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } - if (ib_destroy_cq(priv->cq)) - ipoib_warn(priv, "ib_cq_destroy failed\n"); + if (ib_destroy_cq(priv->send_cq)) + ipoib_warn(priv, "ib_cq_destroy (send) failed\n"); + + if (ib_destroy_cq(priv->recv_cq)) + ipoib_warn(priv, "ib_cq_destroy (recv) failed\n"); ipoib_cm_dev_cleanup(dev); -- cgit v1.2.3 From d227fa7288adebe5ba37fa8e4a589c977d4e4a34 Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Tue, 29 Apr 2008 13:46:53 -0700 Subject: IB/ehca: Allocate event queue size depending on max number of CQs and QPs If a lot of QPs fall into Error state at once and the EQ of the respective HCA is too small, it might overrun, causing the eHCA driver to stop processing completion events and calling the application's completion handlers, effectively causing traffic to stop. Fix this by limiting available QPs and CQs to a customizable max count, and determining EQ size based on these counts and a worst-case assumption. Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 5 +++++ drivers/infiniband/hw/ehca/ehca_cq.c | 11 ++++++++++ drivers/infiniband/hw/ehca/ehca_main.c | 36 +++++++++++++++++++++++++++++-- drivers/infiniband/hw/ehca/ehca_qp.c | 26 ++++++++++++++++++++-- 4 files changed, 74 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 3d6d9461c31d..00bab60f6de4 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -66,6 +66,7 @@ struct ehca_av; #include "ehca_irq.h" #define EHCA_EQE_CACHE_SIZE 20 +#define EHCA_MAX_NUM_QUEUES 0xffff struct ehca_eqe_cache_entry { struct ehca_eqe *eqe; @@ -127,6 +128,8 @@ struct ehca_shca { /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ u32 hca_cap_mr_pgsize; int max_mtu; + atomic_t num_cqs; + atomic_t num_qps; }; struct ehca_pd { @@ -344,6 +347,8 @@ extern int ehca_use_hp_mr; extern int ehca_scaling_code; extern int ehca_lock_hcalls; extern int ehca_nr_ports; +extern int ehca_max_cq; +extern int ehca_max_qp; struct ipzu_queue_resp { u32 qe_size; /* queue entry size */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index ec0cfcf3073f..5540b276a33c 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -132,10 +132,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); + if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) { + ehca_err(device, "Unable to create CQ, max number of %i " + "CQs reached.", ehca_max_cq); + ehca_err(device, "To increase the maximum number of CQs " + "use the number_of_cqs module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); if (!my_cq) { ehca_err(device, "Out of memory for ehca_cq struct device=%p", device); + atomic_dec(&shca->num_cqs); return ERR_PTR(-ENOMEM); } @@ -305,6 +314,7 @@ create_cq_exit2: create_cq_exit1: kmem_cache_free(cq_cache, my_cq); + atomic_dec(&shca->num_cqs); return cq; } @@ -359,6 +369,7 @@ int ehca_destroy_cq(struct ib_cq *cq) ipz_queue_dtor(NULL, &my_cq->ipz_queue); kmem_cache_free(cq_cache, my_cq); + atomic_dec(&shca->num_cqs); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 65048976198c..482103eb6eac 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -68,6 +68,8 @@ int ehca_port_act_time = 30; int ehca_static_rate = -1; int ehca_scaling_code = 0; int ehca_lock_hcalls = -1; +int ehca_max_cq = -1; +int ehca_max_qp = -1; module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); @@ -79,6 +81,8 @@ module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); +module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); +module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); MODULE_PARM_DESC(open_aqp1, "Open AQP1 on startup (default: no)"); @@ -104,6 +108,12 @@ MODULE_PARM_DESC(scaling_code, MODULE_PARM_DESC(lock_hcalls, "Serialize all hCalls made by the driver " "(default: autodetect)"); +MODULE_PARM_DESC(number_of_cqs, + "Max number of CQs which can be allocated " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_qps, + "Max number of QPs which can be allocated " + "(default: autodetect)"); DEFINE_RWLOCK(ehca_qp_idr_lock); DEFINE_RWLOCK(ehca_cq_idr_lock); @@ -355,6 +365,25 @@ static int ehca_sense_attributes(struct ehca_shca *shca) if (rblock->memory_page_size_supported & pgsize_map[i]) shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; + /* Set maximum number of CQs and QPs to calculate EQ size */ + if (ehca_max_qp == -1) + ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES); + else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) { + ehca_gen_err("Requested number of QPs is out of range (1 - %i) " + "specified by HW", rblock->max_qp); + ret = -EINVAL; + goto sense_attributes1; + } + + if (ehca_max_cq == -1) + ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES); + else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) { + ehca_gen_err("Requested number of CQs is out of range (1 - %i) " + "specified by HW", rblock->max_cq); + ret = -EINVAL; + goto sense_attributes1; + } + /* query max MTU from first port -- it's the same for all ports */ port = (struct hipz_query_port *)rblock; h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); @@ -684,7 +713,7 @@ static int __devinit ehca_probe(struct of_device *dev, struct ehca_shca *shca; const u64 *handle; struct ib_pd *ibpd; - int ret, i; + int ret, i, eq_size; handle = of_get_property(dev->node, "ibm,hca-handle", NULL); if (!handle) { @@ -705,6 +734,8 @@ static int __devinit ehca_probe(struct of_device *dev, return -ENOMEM; } mutex_init(&shca->modify_mutex); + atomic_set(&shca->num_cqs, 0); + atomic_set(&shca->num_qps, 0); for (i = 0; i < ARRAY_SIZE(shca->sport); i++) spin_lock_init(&shca->sport[i].mod_sqp_lock); @@ -724,8 +755,9 @@ static int __devinit ehca_probe(struct of_device *dev, goto probe1; } + eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp; /* create event queues */ - ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); if (ret) { ehca_err(&shca->ib_device, "Cannot create EQ."); goto probe1; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 57bef1152cc2..18fba92fa7ae 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -421,8 +421,18 @@ static struct ehca_qp *internal_create_qp( u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; unsigned long flags; - if (init_attr->create_flags) + if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) { + ehca_err(pd->device, "Unable to create QP, max number of %i " + "QPs reached.", ehca_max_qp); + ehca_err(pd->device, "To increase the maximum number of QPs " + "use the number_of_qps module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + if (init_attr->create_flags) { + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); + } memset(&parms, 0, sizeof(parms)); qp_type = init_attr->qp_type; @@ -431,6 +441,7 @@ static struct ehca_qp *internal_create_qp( init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", init_attr->sq_sig_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -455,6 +466,7 @@ static struct ehca_qp *internal_create_qp( if (is_llqp && has_srq) { ehca_err(pd->device, "LLQPs can't have an SRQ"); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -466,6 +478,7 @@ static struct ehca_qp *internal_create_qp( ehca_err(pd->device, "no more than three SGEs " "supported for SRQ pd=%p max_sge=%x", pd, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } } @@ -477,6 +490,7 @@ static struct ehca_qp *internal_create_qp( qp_type != IB_QPT_SMI && qp_type != IB_QPT_GSI) { ehca_err(pd->device, "wrong QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -490,6 +504,7 @@ static struct ehca_qp *internal_create_qp( "or max_rq_wr=%x for RC LLQP", init_attr->cap.max_send_wr, init_attr->cap.max_recv_wr); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } break; @@ -497,6 +512,7 @@ static struct ehca_qp *internal_create_qp( if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { ehca_err(pd->device, "UD LLQP not supported " "by this adapter"); + atomic_dec(&shca->num_qps); return ERR_PTR(-ENOSYS); } if (!(init_attr->cap.max_send_sge <= 5 @@ -508,20 +524,22 @@ static struct ehca_qp *internal_create_qp( "or max_recv_sge=%x for UD LLQP", init_attr->cap.max_send_sge, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } else if (init_attr->cap.max_send_wr > 255) { ehca_err(pd->device, "Invalid Number of " "max_send_wr=%x for UD QP_TYPE=%x", init_attr->cap.max_send_wr, qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } break; default: ehca_err(pd->device, "unsupported LL QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); - break; } } else { int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI @@ -533,6 +551,7 @@ static struct ehca_qp *internal_create_qp( "send_sge=%x recv_sge=%x max_sge=%x", init_attr->cap.max_send_sge, init_attr->cap.max_recv_sge, max_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } } @@ -543,6 +562,7 @@ static struct ehca_qp *internal_create_qp( my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + atomic_dec(&shca->num_qps); return ERR_PTR(-ENOMEM); } @@ -823,6 +843,7 @@ create_qp_exit1: create_qp_exit0: kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); return ERR_PTR(ret); } @@ -1948,6 +1969,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (HAS_SQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); return 0; } -- cgit v1.2.3 From bbdc2821db041fb07ffa52e4a0e1ebb5410790e9 Mon Sep 17 00:00:00 2001 From: Olaf Kirch Date: Tue, 29 Apr 2008 13:46:53 -0700 Subject: mlx4_core: Avoid recycling old FMR R_Keys too soon When a FMR is unmapped, mlx4 resets the map count to 0, and clears the upper part of the R_Key which is used as the sequence counter. This poses a problem for RDS, which uses ib_fmr_unmap as a fence operation. RDS assumes that after issuing an unmap, the old R_Keys will be invalid for a "reasonable" period of time. For instance, Oracle processes uses shared memory buffers allocated from a pool of buffers. When a process dies, we want to reclaim these buffers -- but we must make sure there are no pending RDMA operations to/from those buffers. The only way to achieve that is by using unmap and sync the TPT. However, when the sequence count is reset on unmap, there is a high likelihood that a new mapping will be given the same R_Key that was issued a few milliseconds ago. To prevent this, don't reset the sequence count when unmapping a FMR. Signed-off-by: Olaf Kirch Signed-off-by: Roland Dreier --- drivers/net/mlx4/mr.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 79b317b88c86..cb46446b2691 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -607,15 +607,9 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_enable); void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey) { - u32 key; - if (!fmr->maps) return; - key = key_to_hw_index(fmr->mr.key); - key &= dev->caps.num_mpts - 1; - *lkey = *rkey = fmr->mr.key = hw_index_to_key(key); - fmr->maps = 0; *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW; -- cgit v1.2.3 From 0bfe151cc4049f3f304adf28b37ea5437d02ad96 Mon Sep 17 00:00:00 2001 From: Olaf Kirch Date: Tue, 29 Apr 2008 13:46:53 -0700 Subject: IB/mthca: Avoid recycling old FMR R_Keys too soon When a FMR is unmapped, mthca resets the map count to 0, and clears the upper part of the R_Key which is used as the sequence counter. This poses a problem for RDS, which uses ib_fmr_unmap as a fence operation. RDS assumes that after issuing an unmap, the old R_Keys will be invalid for a "reasonable" period of time. For instance, Oracle processes uses shared memory buffers allocated from a pool of buffers. When a process dies, we want to reclaim these buffers -- but we must make sure there are no pending RDMA operations to/from those buffers. The only way to achieve that is by using unmap and sync the TPT. However, when the sequence count is reset on unmap, there is a high likelihood that a new mapping will be given the same R_Key that was issued a few milliseconds ago. To prevent this, don't reset the sequence count when unmapping a FMR. Signed-off-by: Olaf Kirch Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_mr.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 3538da16e3fe..820205dec560 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -818,15 +818,9 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) { - u32 key; - if (!fmr->maps) return; - key = tavor_key_to_hw_index(fmr->ibmr.lkey); - key &= dev->limits.num_mpts - 1; - fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key); - fmr->maps = 0; writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); @@ -834,16 +828,9 @@ void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) { - u32 key; - if (!fmr->maps) return; - key = arbel_key_to_hw_index(fmr->ibmr.lkey); - key &= dev->limits.num_mpts - 1; - key = adjust_key(dev, key); - fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); - fmr->maps = 0; *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; -- cgit v1.2.3 From baaad380c0aa955f7d62e846467316c94067f1a5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 29 Apr 2008 13:46:53 -0700 Subject: IB/mthca: Avoid changing userspace ABI to handle DMA write barrier attribute Commit cb9fbc5c ("IB: expand ib_umem_get() prototype") changed the mthca userspace ABI to provide a way for userspace to indicate which memory regions need the DMA write barrier attribute. However, it is possible to handle this without breaking existing userspace, by having the mthca kernel driver recognize whether it is talking to old or new userspace, depending on the size of the register MR structure passed in. The only potential drawback of this is that is allows old userspace (which has a bug with DMA ordering on large SGI Altix systems) to continue to run on new kernels, but the advantage of allowing old userspace to continue to work on unaffected systems seems to outweigh this, and we can print a warning to push people to upgrade their userspace. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_provider.c | 14 +++++++++++++- drivers/infiniband/hw/mthca/mthca_provider.h | 1 + drivers/infiniband/hw/mthca/mthca_user.h | 10 ++++++---- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 2a9f460cf061..be34f99ca625 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -39,6 +39,8 @@ #include #include #include + +#include #include #include "mthca_dev.h" @@ -367,6 +369,8 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, return ERR_PTR(-EFAULT); } + context->reg_mr_warned = 0; + return &context->ibucontext; } @@ -1013,7 +1017,15 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int err = 0; int write_mtt_size; - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) { + if (!to_mucontext(pd->uobject->context)->reg_mr_warned) { + mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n", + current->comm); + mthca_warn(dev, " Update libmthca to fix this.\n"); + } + ++to_mucontext(pd->uobject->context)->reg_mr_warned; + ucmd.mr_attrs = 0; + } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); mr = kmalloc(sizeof *mr, GFP_KERNEL); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 262616c8ebb6..934bf9544037 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -67,6 +67,7 @@ struct mthca_ucontext { struct ib_ucontext ibucontext; struct mthca_uar uar; struct mthca_user_db_table *db_tab; + int reg_mr_warned; }; struct mthca_mtt; diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index f8cb3b664d37..e1262c942db8 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -41,7 +41,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define MTHCA_UVERBS_ABI_VERSION 2 +#define MTHCA_UVERBS_ABI_VERSION 1 /* * Make sure that all structs defined in this file remain laid out so @@ -62,10 +62,12 @@ struct mthca_alloc_pd_resp { }; struct mthca_reg_mr { +/* + * Mark the memory region with a DMA attribute that causes + * in-flight DMA to be flushed when the region is written to: + */ +#define MTHCA_MR_DMASYNC 0x1 __u32 mr_attrs; -#define MTHCA_MR_DMASYNC 0x1 -/* mark the memory region with a DMA attribute that causes - * in-flight DMA to be flushed when the region is written to */ __u32 reserved; }; -- cgit v1.2.3 From b4132efa1a47858d741ecb05b8735e6fcb603bc8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 29 Apr 2008 13:46:53 -0700 Subject: IPoIB: Copy child MTU from parent When creating a child interface, copy the MTU information from the parent. Otherwise when the child's multicast join completes, the MTU will not be updated since the code does dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); and priv->admin_mtu will be set to 0. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 431fdeaa2dc4..1cdb5cfb0ff1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -90,6 +90,9 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) } priv->max_ib_mtu = ppriv->max_ib_mtu; + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); priv->pkey = pkey; -- cgit v1.2.3 From 37dab4112d7b53c3574426ef7bdd92a78d32ac3e Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Tue, 29 Apr 2008 13:46:54 -0700 Subject: RDMA/nes: Use LRO Signed-off-by: Faisal Latif Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/Kconfig | 1 + drivers/infiniband/hw/nes/nes.c | 4 +++ drivers/infiniband/hw/nes/nes.h | 1 + drivers/infiniband/hw/nes/nes_hw.c | 53 +++++++++++++++++++++++++++++++------ drivers/infiniband/hw/nes/nes_hw.h | 11 ++++++-- drivers/infiniband/hw/nes/nes_nic.c | 12 +++++++-- 6 files changed, 70 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig index 2aeb7ac972a9..d449eb6ec78e 100644 --- a/drivers/infiniband/hw/nes/Kconfig +++ b/drivers/infiniband/hw/nes/Kconfig @@ -2,6 +2,7 @@ config INFINIBAND_NES tristate "NetEffect RNIC Driver" depends on PCI && INET && INFINIBAND select LIBCRC32C + select INET_LRO ---help--- This is a low-level driver for NetEffect RDMA enabled Network Interface Cards (RNIC). diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index a4e9269a29bd..9f7364a9096d 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -91,6 +91,10 @@ unsigned int nes_debug_level = 0; module_param_named(debug_level, nes_debug_level, uint, 0644); MODULE_PARM_DESC(debug_level, "Enable debug output level"); +unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; +module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR); +MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation"); + LIST_HEAD(nes_adapter_list); static LIST_HEAD(nes_dev_list); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index cdf2e9ad62f7..484b5e30badd 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -173,6 +173,7 @@ extern int disable_mpa_crc; extern unsigned int send_first; extern unsigned int nes_drv_opt; extern unsigned int nes_debug_level; +extern unsigned int nes_lro_max_aggr; extern struct list_head nes_adapter_list; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 08964cc7e98a..f824ecb9f879 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "nes.h" @@ -1375,6 +1376,25 @@ static void nes_rq_wqes_timeout(unsigned long parm) } +static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + unsigned int ip_len; + struct iphdr *iph; + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + return 0; +} + + /** * nes_init_nic_qp */ @@ -1592,15 +1612,21 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout; nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic; nes_debug(NES_DBG_INIT, "NAPI support Enabled\n"); - if (nesdev->nesadapter->et_use_adaptive_rx_coalesce) { nes_nic_init_timer(nesdev); if (netdev->mtu > 1500) jumbomode = 1; - nes_nic_init_timer_defaults(nesdev, jumbomode); - } - + nes_nic_init_timer_defaults(nesdev, jumbomode); + } + nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR; + nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; + nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; + nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; + nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; + nesvnic->lro_mgr.dev = netdev; + nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; + nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; return 0; } @@ -2254,10 +2280,13 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) u16 pkt_type; u16 rqes_processed = 0; u8 sq_cqes = 0; + u8 nes_use_lro = 0; head = cq->cq_head; cq_size = cq->cq_size; cq->cqes_pending = 1; + if (nesvnic->netdev->features & NETIF_F_LRO) + nes_use_lro = 1; do { if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & NES_NIC_CQE_VALID) { @@ -2379,9 +2408,16 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) >> 16); nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", nesvnic->netdev->name, vlan_tag); - nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); + if (nes_use_lro) + lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb, + nesvnic->vlan_grp, vlan_tag, NULL); + else + nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); } else { - nes_netif_rx(rx_skb); + if (nes_use_lro) + lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); + else + nes_netif_rx(rx_skb); } } @@ -2413,13 +2449,14 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) } while (1); + if (nes_use_lro) + lro_flush_all(&nesvnic->lro_mgr); if (sq_cqes) { barrier(); /* restart the queue if it had been stopped */ if (netif_queue_stopped(nesvnic->netdev)) netif_wake_queue(nesvnic->netdev); } - cq->cq_head = head; /* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n", cq->cq_number, cqe_count, cq->cq_head); */ @@ -2432,7 +2469,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) } if (atomic_read(&nesvnic->rx_skbs_needed)) nes_replenish_nic_rq(nesvnic); - } +} /** diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 8f36e231bdf5..6a0f94cc9f0c 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -33,6 +33,8 @@ #ifndef __NES_HW_H #define __NES_HW_H +#include + #define NES_PHY_TYPE_1G 2 #define NES_PHY_TYPE_IRIS 3 #define NES_PHY_TYPE_PUMA_10G 6 @@ -982,8 +984,10 @@ struct nes_hw_tune_timer { #define NES_TIMER_INT_LIMIT 2 #define NES_TIMER_INT_LIMIT_DYNAMIC 10 #define NES_TIMER_ENABLE_LIMIT 4 -#define NES_MAX_LINK_INTERRUPTS 128 -#define NES_MAX_LINK_CHECK 200 +#define NES_MAX_LINK_INTERRUPTS 128 +#define NES_MAX_LINK_CHECK 200 +#define NES_MAX_LRO_DESCRIPTORS 32 +#define NES_LRO_MAX_AGGR 64 struct nes_adapter { u64 fw_ver; @@ -1183,6 +1187,9 @@ struct nes_vnic { u8 of_device_registered; u8 rdma_enabled; u8 rx_checksum_disabled; + u32 lro_max_aggr; + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS]; }; struct nes_ib_device { diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index e5366b013c1a..6998af0172ab 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -936,8 +936,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) return ret; } -#define NES_ETHTOOL_STAT_COUNT 55 -static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] = { +static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "Link Change Interrupts", "Linearized SKBs", "T/GSO Requests", @@ -993,8 +992,12 @@ static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] "CQ Depth 32", "CQ Depth 128", "CQ Depth 256", + "LRO aggregated", + "LRO flushed", + "LRO no_desc", }; +#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset) /** * nes_netdev_get_rx_csum @@ -1189,6 +1192,9 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, target_stat_values[52] = int_mod_cq_depth_32; target_stat_values[53] = int_mod_cq_depth_128; target_stat_values[54] = int_mod_cq_depth_256; + target_stat_values[55] = nesvnic->lro_mgr.stats.aggregated; + target_stat_values[56] = nesvnic->lro_mgr.stats.flushed; + target_stat_values[57] = nesvnic->lro_mgr.stats.no_desc; } @@ -1454,6 +1460,8 @@ static struct ethtool_ops nes_ethtool_ops = { .set_sg = ethtool_op_set_sg, .get_tso = ethtool_op_get_tso, .set_tso = ethtool_op_set_tso, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, }; -- cgit v1.2.3 From 0e1de5d62e751ca9c589d8dfabfc1e5074e62724 Mon Sep 17 00:00:00 2001 From: Eric Schneider Date: Tue, 29 Apr 2008 13:46:54 -0700 Subject: RDMA/nes: Add support for SFP+ PHY This patch enables the iw_nes module for NetEffect RNICs to support additional PHYs including SFP+ (referred to as ARGUS in the code). Signed-off-by: Eric Schneider Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.h | 4 +- drivers/infiniband/hw/nes/nes_hw.c | 231 +++++++++++++++++++++++++++++----- drivers/infiniband/hw/nes/nes_hw.h | 6 +- drivers/infiniband/hw/nes/nes_nic.c | 72 +++++++---- drivers/infiniband/hw/nes/nes_utils.c | 10 +- 5 files changed, 259 insertions(+), 64 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 484b5e30badd..1f9f7bf73862 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -536,8 +536,8 @@ int nes_register_ofa_device(struct nes_ib_device *); int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *); void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16); void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *); -void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16); -void nes_read_10G_phy_reg(struct nes_device *, u16, u8); +void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16); +void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16); struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int); int nes_arp_table(struct nes_device *, u32, u8 *, u32); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index f824ecb9f879..e9db6ef04ad8 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1208,11 +1208,16 @@ int nes_init_phy(struct nes_device *nesdev) { struct nes_adapter *nesadapter = nesdev->nesadapter; u32 counter = 0; + u32 sds_common_control0; u32 mac_index = nesdev->mac_index; - u32 tx_config; + u32 tx_config = 0; u16 phy_data; + u32 temp_phy_data = 0; + u32 temp_phy_data2 = 0; + u32 i = 0; - if (nesadapter->OneG_Mode) { + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index); if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) { printk(PFX "%s: Programming mdc config for 1G\n", __func__); @@ -1278,12 +1283,126 @@ int nes_init_phy(struct nes_device *nesdev) nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300); } else { - if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) { + if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) || + (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { /* setup 10G MDIO operation */ tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); tx_config |= 0x14; nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); } + if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); + + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + mdelay(10); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + /* + * if firmware is already running (like from a + * driver un-load/load, don't do anything. + */ + if (temp_phy_data == temp_phy_data2) { + /* configure QT2505 AMCC PHY */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528); + + /* + * remove micro from reset; chip boots from ROM, + * uploads EEPROM f/w image, uC executes f/w + */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002); + + /* + * wait for heart beat to start to + * know loading is done + */ + counter = 0; + do { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if (counter++ > 1000) { + nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check \n"); + break; + } + mdelay(100); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + } while ((temp_phy_data2 == temp_phy_data)); + + /* + * wait for tracking to start to know + * f/w is good to go + */ + counter = 0; + do { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if (counter++ > 1000) { + nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check \n"); + break; + } + mdelay(1000); + /* + * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n", + * temp_phy_data); + */ + } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70)); + + /* set LOS Control invert RXLOSB_I_PADINV */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000); + /* set LOS Control to mask of RXLOSB_I */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042); + /* set LED1 to input mode (LED1 and LED2 share same LED) */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007); + /* set LED2 to RX link_status and activity */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A); + /* set LED3 to RX link_status */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009); + + /* + * reset the res-calibration on t2 + * serdes; ensures it is stable after + * the amcc phy is stable + */ + + sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); + sds_common_control0 |= 0x1; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); + + /* release the res-calibration reset */ + sds_common_control0 &= 0xfffffffe; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); + + i = 0; + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) + && (i++ < 5000)) { + /* mdelay(1); */ + } + + /* + * wait for link train done before moving on, + * or will get an interupt storm + */ + counter = 0; + do { + temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1))); + if (counter++ > 1000) { + nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait \n"); + break; + } + mdelay(1); + } while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000)); + } + } } return 0; } @@ -2107,6 +2226,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) u32 u32temp; u16 phy_data; u16 temp_phy_data; + u32 pcs_val = 0x0f0f0000; + u32 pcs_mask = 0x0f1f0000; spin_lock_irqsave(&nesadapter->phy_lock, flags); if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) { @@ -2170,13 +2291,30 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n", nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0), nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200)); - pcs_control_status = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200)); - pcs_control_status = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200)); + + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) { + switch (mac_index) { + case 1: + case 3: + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + break; + default: + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + break; + } + } else { + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200)); + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200)); + } + nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n", mac_index, pcs_control_status); - if (nesadapter->OneG_Mode) { + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { u32temp = 0x01010000; if (nesadapter->port_count > 2) { u32temp |= 0x02020000; @@ -2185,24 +2323,59 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) phy_data = 0; nes_debug(NES_DBG_PHY, "PCS says the link is down\n"); } - } else if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) { - nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]); - temp_phy_data = (u16)nes_read_indexed(nesdev, - NES_IDX_MAC_MDIO_CONTROL); - u32temp = 20; - do { - nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]); - phy_data = (u16)nes_read_indexed(nesdev, - NES_IDX_MAC_MDIO_CONTROL); - if ((phy_data == temp_phy_data) || (!(--u32temp))) - break; - temp_phy_data = phy_data; - } while (1); - nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", - __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP"); - } else { - phy_data = (0x0f0f0000 == (pcs_control_status & 0x0f1f0000)) ? 4 : 0; + switch (nesadapter->phy_type[mac_index]) { + case NES_PHY_TYPE_IRIS: + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + u32temp = 20; + do { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); + break; + + case NES_PHY_TYPE_ARGUS: + /* clear the alarms */ + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005); + /* check link status */ + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + u32temp = 100; + do { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP"); + break; + + case NES_PHY_TYPE_PUMA_1G: + if (mac_index < 2) + pcs_val = pcs_mask = 0x01010000; + else + pcs_val = pcs_mask = 0x02020000; + /* fall through */ + default: + phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0; + break; + } } if (phy_data & 0x0004) { @@ -2211,8 +2384,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n", nesvnic->linkup); if (nesvnic->linkup == 0) { - printk(PFX "The Link is now up for port %u, netdev %p.\n", - mac_index, nesvnic->netdev); + printk(PFX "The Link is now up for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); if (netif_queue_stopped(nesvnic->netdev)) netif_start_queue(nesvnic->netdev); nesvnic->linkup = 1; @@ -2225,8 +2398,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n", nesvnic->linkup); if (nesvnic->linkup == 1) { - printk(PFX "The Link is now down for port %u, netdev %p.\n", - mac_index, nesvnic->netdev); + printk(PFX "The Link is now down for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); if (!(netif_queue_stopped(nesvnic->netdev))) netif_stop_queue(nesvnic->netdev); nesvnic->linkup = 0; diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 6a0f94cc9f0c..405b32ec3127 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -35,8 +35,10 @@ #include -#define NES_PHY_TYPE_1G 2 -#define NES_PHY_TYPE_IRIS 3 +#define NES_PHY_TYPE_1G 2 +#define NES_PHY_TYPE_IRIS 3 +#define NES_PHY_TYPE_ARGUS 4 +#define NES_PHY_TYPE_PUMA_1G 5 #define NES_PHY_TYPE_PUMA_10G 6 #define NES_MULTICAST_PF_MAX 8 diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 6998af0172ab..d65a846f04be 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1377,21 +1377,29 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd et_cmd->duplex = DUPLEX_FULL; et_cmd->port = PORT_MII; + if (nesadapter->OneG_Mode) { - et_cmd->supported = SUPPORTED_1000baseT_Full|SUPPORTED_Autoneg; - et_cmd->advertising = ADVERTISED_1000baseT_Full|ADVERTISED_Autoneg; et_cmd->speed = SPEED_1000; - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], - &phy_data); - if (phy_data&0x1000) { - et_cmd->autoneg = AUTONEG_ENABLE; + if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) { + et_cmd->supported = SUPPORTED_1000baseT_Full; + et_cmd->advertising = ADVERTISED_1000baseT_Full; + et_cmd->autoneg = AUTONEG_DISABLE; + et_cmd->transceiver = XCVR_INTERNAL; + et_cmd->phy_address = nesdev->mac_index; } else { - et_cmd->autoneg = AUTONEG_DISABLE; + et_cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg; + et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg; + nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data); + if (phy_data & 0x1000) + et_cmd->autoneg = AUTONEG_ENABLE; + else + et_cmd->autoneg = AUTONEG_DISABLE; + et_cmd->transceiver = XCVR_EXTERNAL; + et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; } - et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; } else { - if (nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) { + if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) || + (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) { et_cmd->transceiver = XCVR_EXTERNAL; et_cmd->port = PORT_FIBRE; et_cmd->supported = SUPPORTED_FIBRE; @@ -1422,7 +1430,8 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd struct nes_adapter *nesadapter = nesdev->nesadapter; u16 phy_data; - if (nesadapter->OneG_Mode) { + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) { nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data); if (et_cmd->autoneg) { @@ -1615,27 +1624,34 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]); if ((nesdev->netdev_count == 0) && - (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) { - nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", - NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1))); + ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) || + ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) && + (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || + ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { + /* + * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", + * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1))); + */ u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200*(nesvnic->logical_port&1))); - u32temp |= 0x00200000; - nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200*(nesvnic->logical_port&1)), u32temp); + (0x200 * (nesdev->mac_index & 1))); + if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) { + u32temp |= 0x00200000; + nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1)), u32temp); + } + u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200*(nesvnic->logical_port&1)) ); + (0x200 * (nesdev->mac_index & 1))); + if ((u32temp&0x0f1f0000) == 0x0f0f0000) { - if (nesdev->nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) { + if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) { nes_init_phy(nesdev); - nes_read_10G_phy_reg(nesdev, 1, - nesdev->nesadapter->phy_index[nesvnic->logical_port]); + nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1); temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); u32temp = 20; do { - nes_read_10G_phy_reg(nesdev, 1, - nesdev->nesadapter->phy_index[nesvnic->logical_port]); + nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1); phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); if ((phy_data == temp_phy_data) || (!(--u32temp))) @@ -1652,6 +1668,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); nesvnic->linkup = 1; } + } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) { + nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n", + nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn)); + if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) || + ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) { + nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + nesvnic->linkup = 1; + } } /* clear the MAC interrupt status, assumes direct logical to physical mapping */ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index c6d5631a6995..fe83d1b2b177 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -444,15 +444,13 @@ void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 /** * nes_write_10G_phy_reg */ -void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, - u8 phy_addr, u16 data) +void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg, + u16 data) { - u32 dev_addr; u32 port_addr; u32 u32temp; u32 counter; - dev_addr = 1; port_addr = phy_addr; /* set address */ @@ -492,14 +490,12 @@ void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, * This routine only issues the read, the data must be read * separately. */ -void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr) +void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg) { - u32 dev_addr; u32 port_addr; u32 u32temp; u32 counter; - dev_addr = 1; port_addr = phy_addr; /* set address */ -- cgit v1.2.3 From 7495ab6837ea4660f5e14ad49e5bfc558d6862e7 Mon Sep 17 00:00:00 2001 From: Glenn Streiff Date: Tue, 29 Apr 2008 13:46:54 -0700 Subject: RDMA/nes: Formatting cleanup Various cleanups: - Change // to /* .. */ - Place whitespace around binary operators. - Trim down a few long lines. - Some minor alignment formatting for better readability. - Remove some silly tabs. Signed-off-by: Glenn Streiff Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 8 +-- drivers/infiniband/hw/nes/nes_hw.c | 103 ++++++++++++++++++---------------- drivers/infiniband/hw/nes/nes_hw.h | 2 +- drivers/infiniband/hw/nes/nes_nic.c | 96 +++++++++++++++---------------- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- 5 files changed, 109 insertions(+), 102 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index d940fc27129a..9a4b40fae40d 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -594,7 +594,7 @@ static void nes_cm_timer_tick(unsigned long pass) continue; } /* this seems like the correct place, but leave send entry unprotected */ - // spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */ atomic_inc(&send_entry->skb->users); cm_packets_retrans++; nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p," @@ -1335,7 +1335,7 @@ static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->loc_addr, cm_node->loc_port, cm_node->rem_addr, cm_node->rem_port, cm_node->state, atomic_read(&cm_node->ref_count)); - // create event + /* create event */ cm_node->state = NES_CM_STATE_CLOSED; create_event(cm_node, NES_CM_EVENT_ABORTED); @@ -1669,7 +1669,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, if (!cm_node) return NULL; - // set our node side to client (active) side + /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; @@ -1694,7 +1694,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, loopbackremotenode->mpa_frame_size = mpa_frame_size - sizeof(struct ietf_mpa_frame); - // we are done handling this state, set node to a TSA state + /* we are done handling this state, set node to a TSA state */ cm_node->state = NES_CM_STATE_TSA; cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num; loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index e9db6ef04ad8..8dc70f9bad2f 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -833,7 +833,7 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou nes_write_indexed(nesdev, 0x00000900, 0x20000001); nes_write_indexed(nesdev, 0x000060C0, 0x0000028e); nes_write_indexed(nesdev, 0x000060C8, 0x00000020); - // + nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0); /* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */ @@ -1229,7 +1229,7 @@ int nes_init_phy(struct nes_device *nesdev) nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data); nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n", nesadapter->phy_index[mac_index], phy_data); - nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000); + nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000); /* Reset the PHY */ nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000); @@ -1373,7 +1373,7 @@ int nes_init_phy(struct nes_device *nesdev) * the amcc phy is stable */ - sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); + sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); sds_common_control0 |= 0x1; nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); @@ -1382,7 +1382,7 @@ int nes_init_phy(struct nes_device *nesdev) nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); i = 0; - while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) + while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) && (i++ < 5000)) { /* mdelay(1); */ } @@ -1659,10 +1659,10 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) } u64temp = (u64)nesvnic->nic.sq_pbase; - nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); u64temp = (u64)nesvnic->nic.rq_pbase; - nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP | @@ -1714,7 +1714,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) nic_rqe = &nesvnic->nic.rq_vbase[counter]; nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size); nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32)); nesvnic->nic.rx_skb[counter] = skb; } @@ -1738,13 +1738,13 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) jumbomode = 1; nes_nic_init_timer_defaults(nesdev, jumbomode); } - nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR; - nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; - nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; + nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR; + nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; + nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; - nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; - nesvnic->lro_mgr.dev = netdev; - nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; + nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; + nesvnic->lro_mgr.dev = netdev; + nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; return 0; } @@ -1765,8 +1765,8 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) /* Free remaining NIC receive buffers */ while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { - nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; - wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); + nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; + wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); @@ -1849,17 +1849,17 @@ int nes_napi_isr(struct nes_device *nesdev) /* iff NIC, process here, else wait for DPC */ if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) { nesdev->napi_isr_ran = 0; - nes_write32(nesdev->regs+NES_INT_STAT, - (int_stat & - ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3))); + nes_write32(nesdev->regs + NES_INT_STAT, + (int_stat & + ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3))); /* Process the CEQs */ nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]); if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) && - (!nesadapter->et_use_adaptive_rx_coalesce)) || - ((nesadapter->et_use_adaptive_rx_coalesce) && - (nesdev->deepcq_count > nesadapter->et_pkt_rate_low)))) ) { + (!nesadapter->et_use_adaptive_rx_coalesce)) || + ((nesadapter->et_use_adaptive_rx_coalesce) && + (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) { if ((nesdev->int_req & NES_INT_TIMER) == 0) { /* Enable Periodic timer interrupts */ nesdev->int_req |= NES_INT_TIMER; @@ -1937,12 +1937,12 @@ void nes_dpc(unsigned long param) } if (int_stat) { - if (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0| - NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)) { + if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0| + NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) { /* Ack the interrupts */ nes_write32(nesdev->regs+NES_INT_STAT, - (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0| - NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3))); + (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0| + NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3))); } temp_int_stat = int_stat; @@ -2007,8 +2007,8 @@ void nes_dpc(unsigned long param) } } /* Don't use the interface interrupt bit stay in loop */ - int_stat &= ~NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0| - NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3; + int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | + NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3; } while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS)); if (timer_ints == 1) { @@ -2019,9 +2019,9 @@ void nes_dpc(unsigned long param) nesdev->timer_only_int_count = 0; nesdev->int_req &= ~NES_INT_TIMER; nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); - nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req); } else { - nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); } } else { if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) @@ -2029,7 +2029,7 @@ void nes_dpc(unsigned long param) nes_nic_init_timer(nesdev); } nesdev->timer_only_int_count = 0; - nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); } } else { nesdev->timer_only_int_count = 0; @@ -2078,7 +2078,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq) do { if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) & NES_CEQE_VALID) { - u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX])))<<32) | + u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) | ((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX]))); u64temp <<= 1; cq = *((struct nes_hw_cq **)&u64temp); @@ -2106,7 +2106,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq) */ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) { -// u64 u64temp; + /* u64 u64temp; */ u32 head; u32 aeq_size; u32 aeqe_misc; @@ -2125,8 +2125,10 @@ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) { if (aeqe_cq_id >= NES_FIRST_QPN) { /* dealing with an accelerated QP related AE */ -// u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])))<<32) | -// ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]))); + /* + * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) | + * ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]))); + */ nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe); } else { /* TODO: dealing with a CQP related AE */ @@ -2474,8 +2476,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) /* bump past the vlan tag */ wqe_fragment_length++; if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { - u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]); - u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32; + u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX + + wqe_fragment_index * 2]); + u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index * 2])) << 32; bus_address = (dma_addr_t)u64temp; if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) { pci_unmap_single(nesdev->pcidev, @@ -2485,8 +2489,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) } for (; wqe_fragment_index < 5; wqe_fragment_index++) { if (wqe_fragment_length[wqe_fragment_index]) { - u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]); - u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32; + u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX + + wqe_fragment_index * 2]); + u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index * 2])) <<32; bus_address = (dma_addr_t)u64temp; pci_unmap_page(nesdev->pcidev, bus_address, @@ -2533,7 +2539,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) { nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); -// nesadapter->tune_timer.cq_count += cqe_count; + /* nesadapter->tune_timer.cq_count += cqe_count; */ nesdev->currcq_count += cqe_count; cqe_count = 0; nes_replenish_nic_rq(nesvnic); @@ -2608,7 +2614,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) /* Replenish Nic CQ */ nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); -// nesdev->nesadapter->tune_timer.cq_count += cqe_count; + /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */ nesdev->currcq_count += cqe_count; cqe_count = 0; } @@ -2636,7 +2642,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) cq->cqe_allocs_pending = cqe_count; if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) { -// nesdev->nesadapter->tune_timer.cq_count += cqe_count; + /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */ nesdev->currcq_count += cqe_count; nes_nic_tune_timer(nesdev); } @@ -2671,7 +2677,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. - cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) | + cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) | ((u64)(le32_to_cpu(cq->cq_vbase[head]. cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); cqp = *((struct nes_hw_cqp **)&u64temp); @@ -2688,7 +2694,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) } u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. - wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX])))<<32) | + wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | ((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX]))); cqp_request = *((struct nes_cqp_request **)&u64temp); @@ -2725,7 +2731,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) } else { nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f); + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); if (cqp_request->dynamic) { kfree(cqp_request); } else { @@ -2739,7 +2745,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) } cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; - nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (1 << 16)); + nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16)); if (++cqp->sq_tail >= cqp->sq_size) cqp->sq_tail = 0; @@ -2808,13 +2814,13 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nes_debug(NES_DBG_AEQ, "\n"); aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) { - context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); + context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; } else { aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; context = (unsigned long)nesadapter->qp_table[le32_to_cpu( - aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; + aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN]; BUG_ON(!context); } @@ -2827,7 +2833,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe, nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]); - switch (async_event_id) { case NES_AEQE_AEID_LLP_FIN_RECEIVED: nesqp = *((struct nes_qp **)&context); @@ -3231,7 +3236,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID); cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32( (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) | - (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); + (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32( (((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]); } else { diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 405b32ec3127..745bf94f3f07 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -969,7 +969,7 @@ struct nes_arp_entry { #define NES_NIC_CQ_DOWNWARD_TREND 16 struct nes_hw_tune_timer { - //u16 cq_count; + /* u16 cq_count; */ u16 threshold_low; u16 threshold_target; u16 threshold_high; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index d65a846f04be..1b0938c87774 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -185,12 +185,13 @@ static int nes_netdev_open(struct net_device *netdev) nic_active |= nic_active_bit; nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); - macaddr_high = ((u16)netdev->dev_addr[0]) << 8; + macaddr_high = ((u16)netdev->dev_addr[0]) << 8; macaddr_high += (u16)netdev->dev_addr[1]; - macaddr_low = ((u32)netdev->dev_addr[2]) << 24; - macaddr_low += ((u32)netdev->dev_addr[3]) << 16; - macaddr_low += ((u32)netdev->dev_addr[4]) << 8; - macaddr_low += (u32)netdev->dev_addr[5]; + + macaddr_low = ((u32)netdev->dev_addr[2]) << 24; + macaddr_low += ((u32)netdev->dev_addr[3]) << 16; + macaddr_low += ((u32)netdev->dev_addr[4]) << 8; + macaddr_low += (u32)netdev->dev_addr[5]; /* Program the various MAC regs */ for (i = 0; i < NES_MAX_PORT_COUNT; i++) { @@ -451,7 +452,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) __le16 *wqe_fragment_length; u32 nr_frags; u32 original_first_length; -// u64 *wqe_fragment_address; + /* u64 *wqe_fragment_address; */ /* first fragment (0) is used by copy buffer */ u16 wqe_fragment_index=1; u16 hoffset; @@ -461,11 +462,12 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) u32 old_head; u32 wqe_misc; - /* nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," - " (%u frags), tso_size=%u\n", - netdev->name, skb->len, skb_headlen(skb), - skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); - */ + /* + * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," + * " (%u frags), tso_size=%u\n", + * netdev->name, skb->len, skb_headlen(skb), + * skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); + */ if (!netif_carrier_ok(netdev)) return NETDEV_TX_OK; @@ -795,12 +797,12 @@ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p) memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len); printk(PFX "%s: Address length = %d, Address = %s\n", __func__, netdev->addr_len, print_mac(mac, mac_addr->sa_data)); - macaddr_high = ((u16)netdev->dev_addr[0]) << 8; + macaddr_high = ((u16)netdev->dev_addr[0]) << 8; macaddr_high += (u16)netdev->dev_addr[1]; - macaddr_low = ((u32)netdev->dev_addr[2]) << 24; - macaddr_low += ((u32)netdev->dev_addr[3]) << 16; - macaddr_low += ((u32)netdev->dev_addr[4]) << 8; - macaddr_low += (u32)netdev->dev_addr[5]; + macaddr_low = ((u32)netdev->dev_addr[2]) << 24; + macaddr_low += ((u32)netdev->dev_addr[3]) << 16; + macaddr_low += ((u32)netdev->dev_addr[4]) << 8; + macaddr_low += (u32)netdev->dev_addr[5]; for (i = 0; i < NES_MAX_PORT_COUNT; i++) { if (nesvnic->qp_nic_index[i] == 0xf) { @@ -881,12 +883,12 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) print_mac(mac, multicast_addr->dmi_addr), perfect_filter_register_address+(mc_index * 8), mc_nic_index); - macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8; + macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8; macaddr_high += (u16)multicast_addr->dmi_addr[1]; - macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24; - macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16; - macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8; - macaddr_low += (u32)multicast_addr->dmi_addr[5]; + macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24; + macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16; + macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8; + macaddr_low += (u32)multicast_addr->dmi_addr[5]; nes_write_indexed(nesdev, perfect_filter_register_address+(mc_index * 8), macaddr_low); @@ -910,23 +912,23 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) /** * nes_netdev_change_mtu */ -static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) +static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) { struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - int ret = 0; - u8 jumbomode=0; + struct nes_device *nesdev = nesvnic->nesdev; + int ret = 0; + u8 jumbomode = 0; - if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) + if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) return -EINVAL; - netdev->mtu = new_mtu; + netdev->mtu = new_mtu; nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN; if (netdev->mtu > 1500) { jumbomode=1; } - nes_nic_init_timer_defaults(nesdev, jumbomode); + nes_nic_init_timer_defaults(nesdev, jumbomode); if (netif_running(netdev)) { nes_netdev_stop(netdev); @@ -1225,14 +1227,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *et_coalesce) { struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; + struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; unsigned long flags; - spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); if (et_coalesce->rx_max_coalesced_frames_low) { - shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low; + shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low; } if (et_coalesce->rx_max_coalesced_frames_irq) { shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq; @@ -1252,14 +1254,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev, nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq; if (et_coalesce->use_adaptive_rx_coalesce) { nesadapter->et_use_adaptive_rx_coalesce = 1; - nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; nesadapter->et_rx_coalesce_usecs_irq = 0; if (et_coalesce->pkt_rate_low) { - nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low; + nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low; } } else { nesadapter->et_use_adaptive_rx_coalesce = 0; - nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; if (nesadapter->et_rx_coalesce_usecs_irq) { nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8))); @@ -1276,28 +1278,28 @@ static int nes_netdev_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *et_coalesce) { struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; + struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; struct ethtool_coalesce temp_et_coalesce; struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; unsigned long flags; memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce)); - temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq; - temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce; - temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval; + temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq; + temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce; + temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval; temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low; spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); - temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low; - temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target; + temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low; + temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target; temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high; - temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min; + temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min; temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max; if (nesadapter->et_use_adaptive_rx_coalesce) { temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use; } spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); - memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); + memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); return 0; } @@ -1376,7 +1378,7 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd u16 phy_data; et_cmd->duplex = DUPLEX_FULL; - et_cmd->port = PORT_MII; + et_cmd->port = PORT_MII; if (nesadapter->OneG_Mode) { et_cmd->speed = SPEED_1000; @@ -1401,13 +1403,13 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) || (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) { et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->port = PORT_FIBRE; - et_cmd->supported = SUPPORTED_FIBRE; + et_cmd->port = PORT_FIBRE; + et_cmd->supported = SUPPORTED_FIBRE; et_cmd->advertising = ADVERTISED_FIBRE; et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; } else { et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->supported = SUPPORTED_10000baseT_Full; + et_cmd->supported = SUPPORTED_10000baseT_Full; et_cmd->advertising = ADVERTISED_10000baseT_Full; et_cmd->phy_address = nesdev->mac_index; } @@ -1438,7 +1440,7 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd /* Turn on Full duplex, Autoneg, and restart autonegotiation */ phy_data |= 0x1300; } else { - // Turn off autoneg + /* Turn off autoneg */ phy_data &= ~0x1000; } nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 9ae397a0ff7e..99b3c4ae86eb 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1266,7 +1266,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, sq_size = init_attr->cap.max_send_wr; rq_size = init_attr->cap.max_recv_wr; - // check if the encoded sizes are OK or not... + /* check if the encoded sizes are OK or not... */ sq_encoded_size = nes_get_encoded_size(&sq_size); rq_encoded_size = nes_get_encoded_size(&rq_size); -- cgit v1.2.3