aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBart Van Assche2019-04-17 14:44:35 -0700
committerMartin K. Petersen2019-04-29 17:24:51 -0400
commit219d27d7147e07fe899a781bd72f9180b78c3852 (patch)
tree6e5335018bb6da9bdaf457a810ca5bfd65dbf2a6
parent982cc4be05d6d0d8b15b1340416737ad60bddcae (diff)
scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands
In the *_done() functions, instead of returning early if sp->ref_count >= 2, only decrement sp->ref_count. In qla2xxx_eh_abort(), instead of deciding what to do based on the value of sp->ref_count, decide which action to take depending on the completion status of the firmware abort. Remove srb.cwaitq and use srb.comp instead. In qla2x00_abort_srb(), call isp_ops->abort_command() directly instead of calling qla2xxx_eh_abort(). Cc: Himanshu Madhani <hmadhani@marvell.com> Cc: Giridhar Malavali <gmalavali@marvell.com> Signed-off-by: Bart Van Assche <bvanassche@acm.org> Acked-by: Himanshu Madhani <hmadhani@marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h1
-rw-r--r--drivers/scsi/qla2xxx/qla_nvme.c34
-rw-r--r--drivers/scsi/qla2xxx/qla_nvme.h1
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c150
4 files changed, 55 insertions, 131 deletions
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 6dd2d41713c9..8acaeba98da1 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -546,7 +546,6 @@ typedef struct srb {
int rc;
int retry_count;
struct completion *comp;
- wait_queue_head_t *cwaitq;
union {
struct srb_iocb iocb_cmd;
struct bsg_job *bsg_job;
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index 5d9191278f41..0829ab7f0d54 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -137,8 +137,7 @@ static void qla_nvme_sp_ls_done(void *ptr, int res)
return;
}
- if (!atomic_dec_and_test(&sp->ref_count))
- return;
+ atomic_dec(&sp->ref_count);
if (res)
res = -EINVAL;
@@ -161,8 +160,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
nvme = &sp->u.iocb_cmd;
fd = nvme->u.nvme.desc;
- if (!atomic_dec_and_test(&sp->ref_count))
- return;
+ atomic_dec(&sp->ref_count);
if (res == QLA_SUCCESS) {
fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len;
@@ -599,34 +597,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = {
.fcprqst_priv_sz = sizeof(struct nvme_private),
};
-#define NVME_ABORT_POLLING_PERIOD 2
-static int qla_nvme_wait_on_command(srb_t *sp)
-{
- int ret = QLA_SUCCESS;
-
- wait_event_timeout(sp->nvme_ls_waitq, (atomic_read(&sp->ref_count) > 1),
- NVME_ABORT_POLLING_PERIOD*HZ);
-
- if (atomic_read(&sp->ref_count) > 1)
- ret = QLA_FUNCTION_FAILED;
-
- return ret;
-}
-
-void qla_nvme_abort(struct qla_hw_data *ha, struct srb *sp, int res)
-{
- int rval;
-
- if (ha->flags.fw_started) {
- rval = ha->isp_ops->abort_command(sp);
- if (!rval && !qla_nvme_wait_on_command(sp))
- ql_log(ql_log_warn, NULL, 0x2112,
- "timed out waiting on sp=%p\n", sp);
- } else {
- sp->done(sp, res);
- }
-}
-
static void qla_nvme_unregister_remote_port(struct work_struct *work)
{
struct fc_port *fcport = container_of(work, struct fc_port,
diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h
index da8dad5ad693..0db04f0a4d5d 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.h
+++ b/drivers/scsi/qla2xxx/qla_nvme.h
@@ -145,7 +145,6 @@ struct pt_ls4_rx_unsol {
int qla_nvme_register_hba(struct scsi_qla_host *);
int qla_nvme_register_remote(struct scsi_qla_host *, struct fc_port *);
void qla_nvme_delete(struct scsi_qla_host *);
-void qla_nvme_abort(struct qla_hw_data *, struct srb *sp, int res);
void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *, struct pt_ls4_request *,
struct req_que *);
void qla24xx_async_gffid_sp_done(void *, int);
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index a41aaf071b52..35f62f171b20 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -714,7 +714,7 @@ qla2x00_sp_compl(void *ptr, int res)
{
srb_t *sp = ptr;
struct scsi_cmnd *cmd = GET_CMD_SP(sp);
- wait_queue_head_t *cwaitq = sp->cwaitq;
+ struct completion *comp = sp->comp;
if (atomic_read(&sp->ref_count) == 0) {
ql_dbg(ql_dbg_io, sp->vha, 0x3015,
@@ -724,15 +724,15 @@ qla2x00_sp_compl(void *ptr, int res)
WARN_ON(atomic_read(&sp->ref_count) == 0);
return;
}
- if (!atomic_dec_and_test(&sp->ref_count))
- return;
+
+ atomic_dec(&sp->ref_count);
sp->free(sp);
cmd->result = res;
CMD_SP(cmd) = NULL;
cmd->scsi_done(cmd);
- if (cwaitq)
- wake_up(cwaitq);
+ if (comp)
+ complete(comp);
qla2x00_rel_sp(sp);
}
@@ -825,7 +825,7 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
{
srb_t *sp = ptr;
struct scsi_cmnd *cmd = GET_CMD_SP(sp);
- wait_queue_head_t *cwaitq = sp->cwaitq;
+ struct completion *comp = sp->comp;
if (atomic_read(&sp->ref_count) == 0) {
ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3079,
@@ -835,15 +835,15 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
WARN_ON(atomic_read(&sp->ref_count) == 0);
return;
}
- if (!atomic_dec_and_test(&sp->ref_count))
- return;
+
+ atomic_dec(&sp->ref_count);
sp->free(sp);
cmd->result = res;
CMD_SP(cmd) = NULL;
cmd->scsi_done(cmd);
- if (cwaitq)
- wake_up(cwaitq);
+ if (comp)
+ complete(comp);
qla2xxx_rel_qpair_sp(sp->qpair, sp);
}
@@ -1286,7 +1286,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
unsigned int id;
uint64_t lun;
unsigned long flags;
- int rval, wait = 0;
+ int rval;
struct qla_hw_data *ha = vha->hw;
struct qla_qpair *qpair;
@@ -1299,7 +1299,6 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
ret = fc_block_scsi_eh(cmd);
if (ret != 0)
return ret;
- ret = SUCCESS;
sp = (srb_t *) CMD_SP(cmd);
if (!sp)
@@ -1310,7 +1309,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
return SUCCESS;
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
- if (!CMD_SP(cmd)) {
+ if (sp->type != SRB_SCSI_CMD || GET_CMD_SP(sp) != cmd) {
/* there's a chance an interrupt could clear
the ptr as part of done & free */
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
@@ -1331,66 +1330,31 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
"Aborting from RISC nexus=%ld:%d:%llu sp=%p cmd=%p handle=%x\n",
vha->host_no, id, lun, sp, cmd, sp->handle);
- /* Get a reference to the sp and drop the lock.*/
rval = ha->isp_ops->abort_command(sp);
- if (rval) {
- if (rval == QLA_FUNCTION_PARAMETER_ERROR)
- ret = SUCCESS;
- else
- ret = FAILED;
-
- ql_dbg(ql_dbg_taskm, vha, 0x8003,
- "Abort command mbx failed cmd=%p, rval=%x.\n", cmd, rval);
- } else {
- ql_dbg(ql_dbg_taskm, vha, 0x8004,
- "Abort command mbx success cmd=%p.\n", cmd);
- wait = 1;
- }
+ ql_dbg(ql_dbg_taskm, vha, 0x8003,
+ "Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);
- spin_lock_irqsave(qpair->qp_lock_ptr, flags);
-
- /*
- * Releasing of the SRB and associated command resources
- * is managed through ref_count.
- * Whether we need to wait for the abort completion or complete
- * the abort handler should be based on the ref_count.
- */
- if (atomic_read(&sp->ref_count) > 1) {
+ switch (rval) {
+ case QLA_SUCCESS:
/*
- * The command is not yet completed. We need to wait for either
- * command completion or abort completion.
+ * The command has been aborted. That means that the firmware
+ * won't report a completion.
*/
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(eh_waitq);
- uint32_t ratov = ha->r_a_tov/10;
-
- /* Go ahead and release the extra ref_count obtained earlier */
- sp->done(sp, DID_RESET << 16);
- sp->cwaitq = &eh_waitq;
-
- if (!wait_event_lock_irq_timeout(eh_waitq,
- CMD_SP(cmd) == NULL, *qpair->qp_lock_ptr,
- msecs_to_jiffies(4 * ratov * 1000))) {
- /*
- * The abort got dropped, LOGO will be sent and the
- * original command will be completed with CS_TIMEOUT
- * completion
- */
- ql_dbg(ql_dbg_taskm, vha, 0xffff,
- "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
- __func__, ha->r_a_tov);
- sp->cwaitq = NULL;
- ret = FAILED;
- goto end;
- }
- } else {
- /* Command completed while processing the abort */
- sp->done(sp, DID_RESET << 16);
+ sp->done(sp, DID_ABORT << 16);
+ ret = SUCCESS;
+ break;
+ default:
+ /*
+ * Either abort failed or abort and completion raced. Let
+ * the SCSI core retry the abort in the former case.
+ */
+ ret = FAILED;
+ break;
}
-end:
- spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+
ql_log(ql_log_info, vha, 0x801c,
- "Abort command issued nexus=%ld:%d:%llu -- %d %x.\n",
- vha->host_no, id, lun, wait, ret);
+ "Abort command issued nexus=%ld:%d:%llu -- %x.\n",
+ vha->host_no, id, lun, ret);
return ret;
}
@@ -1766,42 +1730,34 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
__releases(qp->qp_lock_ptr)
__acquires(qp->qp_lock_ptr)
{
+ DECLARE_COMPLETION_ONSTACK(comp);
scsi_qla_host_t *vha = qp->vha;
struct qla_hw_data *ha = vha->hw;
+ int rval;
- if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS) {
- if (!sp_get(sp)) {
- /* got sp */
- spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
- qla_nvme_abort(ha, sp, res);
- spin_lock_irqsave(qp->qp_lock_ptr, *flags);
- }
- } else if (GET_CMD_SP(sp) && !ha->flags.eeh_busy &&
- !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
- !qla2x00_isp_reg_stat(ha) && sp->type == SRB_SCSI_CMD) {
- /*
- * Don't abort commands in adapter during EEH recovery as it's
- * not accessible/responding.
- *
- * Get a reference to the sp and drop the lock. The reference
- * ensures this sp->done() call and not the call in
- * qla2xxx_eh_abort() ends the SCSI cmd (with result 'res').
- */
- if (!sp_get(sp)) {
- int status;
+ if (sp_get(sp))
+ return;
- spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
- status = qla2xxx_eh_abort(GET_CMD_SP(sp));
- spin_lock_irqsave(qp->qp_lock_ptr, *flags);
- /*
- * Get rid of extra reference caused
- * by early exit from qla2xxx_eh_abort
- */
- if (status == FAST_IO_FAIL)
- atomic_dec(&sp->ref_count);
+ if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS ||
+ (sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy &&
+ !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
+ !qla2x00_isp_reg_stat(ha))) {
+ sp->comp = &comp;
+ rval = ha->isp_ops->abort_command(sp);
+ spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
+
+ switch (rval) {
+ case QLA_SUCCESS:
+ sp->done(sp, res);
+ break;
+ case QLA_FUNCTION_PARAMETER_ERROR:
+ wait_for_completion(&comp);
+ break;
}
+
+ spin_lock_irqsave(qp->qp_lock_ptr, *flags);
+ sp->comp = NULL;
}
- sp->done(sp, res);
}
static void