From d22019778cd9ea04c1dadf7bf453920d5288f8d9 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 4 Sep 2012 15:23:29 +0200 Subject: [SCSI] zfcp: Adapt to new FC_PORTSPEED semantics Commit a9277e7783651d4e0a849f7988340b1c1cf748a4 "[SCSI] scsi_transport_fc: Getting FC Port Speed in sync with FC-GS" changed the semantics of FC_PORTSPEED defines to FDMI port attributes of FC-HBA/SM-HBA which is different from the previous bit reversed Report Port Speed Capabilities (RPSC) ELS of FC-GS/FC-LS. Zfcp showed "10 Gbit" instead of "4 Gbit" for supported_speeds. It now uses explicit bit conversion as the other LLDs already do, in order to be independent of the kernel bit semantics. See also http://marc.info/?l=linux-scsi&m=134452926830730&w=2 Signed-off-by: Steffen Maier Reviewed-by: Martin Peschke Cc: #3.4+ Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index e1c1efc2c5a0..30436d53d358 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -437,6 +437,34 @@ void zfcp_fsf_req_dismiss_all(struct zfcp_adapter *adapter) } } +#define ZFCP_FSF_PORTSPEED_1GBIT (1 << 0) +#define ZFCP_FSF_PORTSPEED_2GBIT (1 << 1) +#define ZFCP_FSF_PORTSPEED_4GBIT (1 << 2) +#define ZFCP_FSF_PORTSPEED_10GBIT (1 << 3) +#define ZFCP_FSF_PORTSPEED_8GBIT (1 << 4) +#define ZFCP_FSF_PORTSPEED_16GBIT (1 << 5) +#define ZFCP_FSF_PORTSPEED_NOT_NEGOTIATED (1 << 15) + +static u32 zfcp_fsf_convert_portspeed(u32 fsf_speed) +{ + u32 fdmi_speed = 0; + if (fsf_speed & ZFCP_FSF_PORTSPEED_1GBIT) + fdmi_speed |= FC_PORTSPEED_1GBIT; + if (fsf_speed & ZFCP_FSF_PORTSPEED_2GBIT) + fdmi_speed |= FC_PORTSPEED_2GBIT; + if (fsf_speed & ZFCP_FSF_PORTSPEED_4GBIT) + fdmi_speed |= FC_PORTSPEED_4GBIT; + if (fsf_speed & ZFCP_FSF_PORTSPEED_10GBIT) + fdmi_speed |= FC_PORTSPEED_10GBIT; + if (fsf_speed & ZFCP_FSF_PORTSPEED_8GBIT) + fdmi_speed |= FC_PORTSPEED_8GBIT; + if (fsf_speed & ZFCP_FSF_PORTSPEED_16GBIT) + fdmi_speed |= FC_PORTSPEED_16GBIT; + if (fsf_speed & ZFCP_FSF_PORTSPEED_NOT_NEGOTIATED) + fdmi_speed |= FC_PORTSPEED_NOT_NEGOTIATED; + return fdmi_speed; +} + static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) { struct fsf_qtcb_bottom_config *bottom = &req->qtcb->bottom.config; @@ -456,7 +484,8 @@ static int zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *req) fc_host_port_name(shost) = nsp->fl_wwpn; fc_host_node_name(shost) = nsp->fl_wwnn; fc_host_port_id(shost) = ntoh24(bottom->s_id); - fc_host_speed(shost) = bottom->fc_link_speed; + fc_host_speed(shost) = + zfcp_fsf_convert_portspeed(bottom->fc_link_speed); fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3; adapter->hydra_version = bottom->adapter_type; @@ -580,7 +609,8 @@ static void zfcp_fsf_exchange_port_evaluate(struct zfcp_fsf_req *req) } else fc_host_permanent_port_name(shost) = fc_host_port_name(shost); fc_host_maxframe_size(shost) = bottom->maximum_frame_size; - fc_host_supported_speeds(shost) = bottom->supported_speed; + fc_host_supported_speeds(shost) = + zfcp_fsf_convert_portspeed(bottom->supported_speed); memcpy(fc_host_supported_fc4s(shost), bottom->supported_fc4_types, FC_FC4_LIST_SIZE); memcpy(fc_host_active_fc4s(shost), bottom->active_fc4_types, -- cgit v1.2.3 From 0100998dbfe6dfcd90a6e912ca7ed6f255d48f25 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 4 Sep 2012 15:23:30 +0200 Subject: [SCSI] zfcp: Make trace record tags unique Duplicate fssrh_2 from a54ca0f62f953898b05549391ac2a8a4dad6482b "[SCSI] zfcp: Redesign of the debug tracing for HBA records." complicates distinction of generic status read response from local link up. Duplicate fsscth1 from 2c55b750a884b86dea8b4cc5f15e1484cc47a25c "[SCSI] zfcp: Redesign of the debug tracing for SAN records." complicates distinction of good common transport response from invalid port handle. Signed-off-by: Steffen Maier Reviewed-by: Martin Peschke Cc: #2.6.38+ Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 30436d53d358..6e2892e7c4be 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -219,7 +219,7 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req) return; } - zfcp_dbf_hba_fsf_uss("fssrh_2", req); + zfcp_dbf_hba_fsf_uss("fssrh_4", req); switch (sr_buf->status_type) { case FSF_STATUS_READ_PORT_CLOSED: @@ -915,7 +915,7 @@ static void zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *req) switch (header->fsf_status) { case FSF_GOOD: - zfcp_dbf_san_res("fsscth1", req); + zfcp_dbf_san_res("fsscth2", req); ct->status = 0; break; case FSF_SERVICE_CLASS_NOT_SUPPORTED: -- cgit v1.2.3 From 01e60527f0a49b3d7df603010bd6079bb4b6cf07 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 4 Sep 2012 15:23:31 +0200 Subject: [SCSI] zfcp: Bounds checking for deferred error trace The pl vector has scount elements, i.e. pl[scount-1] is the last valid element. For maximum sized requests, payload->counter == scount after the last loop iteration. Therefore, do bounds checking first (with boolean shortcut) to not access the invalid element pl[scount]. Do not trust the maximum sbale->scount value from the HBA but ensure we won't access the pl vector out of our allocated bounds. While at it, clean up scoping and prevent unnecessary memset. Minor fix for 86a9668a8d29ea711613e1cb37efa68e7c4db564 "[SCSI] zfcp: support for hardware data router" Signed-off-by: Steffen Maier Reviewed-by: Martin Peschke Cc: #3.2+ Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_dbf.c | 2 +- drivers/s390/scsi/zfcp_qdio.c | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 3c1d22097ad0..c6e47d553ad9 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -191,7 +191,7 @@ void zfcp_dbf_hba_def_err(struct zfcp_adapter *adapter, u64 req_id, u16 scount, length = min((u16)sizeof(struct qdio_buffer), (u16)ZFCP_DBF_PAY_MAX_REC); - while ((char *)pl[payload->counter] && payload->counter < scount) { + while (payload->counter < scount && (char *)pl[payload->counter]) { memcpy(payload->data, (char *)pl[payload->counter], length); debug_event(dbf->pay, 1, payload, zfcp_dbf_plen(length)); payload->counter++; diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index b9fffc8d94a7..50b5615848f6 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -102,18 +102,22 @@ static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int qdio_err, { struct zfcp_qdio *qdio = (struct zfcp_qdio *) parm; struct zfcp_adapter *adapter = qdio->adapter; - struct qdio_buffer_element *sbale; int sbal_no, sbal_idx; - void *pl[ZFCP_QDIO_MAX_SBALS_PER_REQ + 1]; - u64 req_id; - u8 scount; if (unlikely(qdio_err)) { - memset(pl, 0, ZFCP_QDIO_MAX_SBALS_PER_REQ * sizeof(void *)); if (zfcp_adapter_multi_buffer_active(adapter)) { + void *pl[ZFCP_QDIO_MAX_SBALS_PER_REQ + 1]; + struct qdio_buffer_element *sbale; + u64 req_id; + u8 scount; + + memset(pl, 0, + ZFCP_QDIO_MAX_SBALS_PER_REQ * sizeof(void *)); sbale = qdio->res_q[idx]->element; req_id = (u64) sbale->addr; - scount = sbale->scount + 1; /* incl. signaling SBAL */ + scount = min(sbale->scount + 1, + ZFCP_QDIO_MAX_SBALS_PER_REQ + 1); + /* incl. signaling SBAL */ for (sbal_no = 0; sbal_no < scount; sbal_no++) { sbal_idx = (idx + sbal_no) % -- cgit v1.2.3 From cb45214960bc989af8b911ebd77da541c797717d Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 4 Sep 2012 15:23:32 +0200 Subject: [SCSI] zfcp: Do not wakeup while suspended If the mapping of FCP device bus ID and corresponding subchannel is modified while the Linux image is suspended, the resume of FCP devices can fail. During resume, zfcp gets callbacks from cio regarding the modified subchannels but they can be arbitrarily mixed with the restore/resume callback. Since the cio callbacks would trigger adapter recovery, zfcp could wakeup before the resume callback. Therefore, ignore the cio callbacks regarding subchannels while being suspended. We can safely do so, since zfcp does not deal itself with subchannels. For problem determination purposes, we still trace the ignored callback events. The following kernel messages could be seen on resume: kernel: : parent should not be sleeping As part of adapter reopen recovery, zfcp performs auto port scanning which can erroneously try to register new remote ports with scsi_transport_fc and the device core code complains about the parent (adapter) still sleeping. kernel: zfcp.3dff9c: :\ Setting up the QDIO connection to the FCP adapter failed kernel: zfcp.574d43: :\ ERP cannot recover an error on the FCP device In such cases, the adapter gave up recovery and remained blocked along with its child objects: remote ports and LUNs/scsi devices. Even the adapter shutdown as part of giving up recovery failed because the ccw device state remained disconnected. Later, the corresponding remote ports ran into dev_loss_tmo. As a result, the LUNs were erroneously not available again after resume. Even a manually triggered adapter recovery (e.g. sysfs attribute failed, or device offline/online via sysfs) could not recover the adapter due to the remaining disconnected state of the corresponding ccw device. Signed-off-by: Steffen Maier Cc: #2.6.32+ Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_ccw.c | 73 ++++++++++++++++++++++++++++++++++++++------ drivers/s390/scsi/zfcp_dbf.c | 20 ++++++++++++ drivers/s390/scsi/zfcp_dbf.h | 1 + drivers/s390/scsi/zfcp_def.h | 1 + drivers/s390/scsi/zfcp_ext.h | 1 + 5 files changed, 86 insertions(+), 10 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index e37f04551948..9646766360d3 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -39,17 +39,23 @@ void zfcp_ccw_adapter_put(struct zfcp_adapter *adapter) spin_unlock_irqrestore(&zfcp_ccw_adapter_ref_lock, flags); } -static int zfcp_ccw_activate(struct ccw_device *cdev) - +/** + * zfcp_ccw_activate - activate adapter and wait for it to finish + * @cdev: pointer to belonging ccw device + * @clear: Status flags to clear. + * @tag: s390dbf trace record tag + */ +static int zfcp_ccw_activate(struct ccw_device *cdev, int clear, char *tag) { struct zfcp_adapter *adapter = zfcp_ccw_adapter_by_cdev(cdev); if (!adapter) return 0; + zfcp_erp_clear_adapter_status(adapter, clear); zfcp_erp_set_adapter_status(adapter, ZFCP_STATUS_COMMON_RUNNING); zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, - "ccresu2"); + tag); zfcp_erp_wait(adapter); flush_work(&adapter->scan_work); @@ -164,32 +170,47 @@ static int zfcp_ccw_set_online(struct ccw_device *cdev) BUG_ON(!zfcp_reqlist_isempty(adapter->req_list)); adapter->req_no = 0; - zfcp_ccw_activate(cdev); + zfcp_ccw_activate(cdev, 0, "ccsonl1"); zfcp_ccw_adapter_put(adapter); return 0; } /** - * zfcp_ccw_set_offline - set_offline function of zfcp driver + * zfcp_ccw_offline_sync - shut down adapter and wait for it to finish * @cdev: pointer to belonging ccw device + * @set: Status flags to set. + * @tag: s390dbf trace record tag * * This function gets called by the common i/o layer and sets an adapter * into state offline. */ -static int zfcp_ccw_set_offline(struct ccw_device *cdev) +static int zfcp_ccw_offline_sync(struct ccw_device *cdev, int set, char *tag) { struct zfcp_adapter *adapter = zfcp_ccw_adapter_by_cdev(cdev); if (!adapter) return 0; - zfcp_erp_adapter_shutdown(adapter, 0, "ccsoff1"); + zfcp_erp_set_adapter_status(adapter, set); + zfcp_erp_adapter_shutdown(adapter, 0, tag); zfcp_erp_wait(adapter); zfcp_ccw_adapter_put(adapter); return 0; } +/** + * zfcp_ccw_set_offline - set_offline function of zfcp driver + * @cdev: pointer to belonging ccw device + * + * This function gets called by the common i/o layer and sets an adapter + * into state offline. + */ +static int zfcp_ccw_set_offline(struct ccw_device *cdev) +{ + return zfcp_ccw_offline_sync(cdev, 0, "ccsoff1"); +} + /** * zfcp_ccw_notify - ccw notify function * @cdev: pointer to belonging ccw device @@ -207,6 +228,11 @@ static int zfcp_ccw_notify(struct ccw_device *cdev, int event) switch (event) { case CIO_GONE: + if (atomic_read(&adapter->status) & + ZFCP_STATUS_ADAPTER_SUSPENDED) { /* notification ignore */ + zfcp_dbf_hba_basic("ccnigo1", adapter); + break; + } dev_warn(&cdev->dev, "The FCP device has been detached\n"); zfcp_erp_adapter_shutdown(adapter, 0, "ccnoti1"); break; @@ -216,6 +242,11 @@ static int zfcp_ccw_notify(struct ccw_device *cdev, int event) zfcp_erp_adapter_shutdown(adapter, 0, "ccnoti2"); break; case CIO_OPER: + if (atomic_read(&adapter->status) & + ZFCP_STATUS_ADAPTER_SUSPENDED) { /* notification ignore */ + zfcp_dbf_hba_basic("ccniop1", adapter); + break; + } dev_info(&cdev->dev, "The FCP device is operational again\n"); zfcp_erp_set_adapter_status(adapter, ZFCP_STATUS_COMMON_RUNNING); @@ -251,6 +282,28 @@ static void zfcp_ccw_shutdown(struct ccw_device *cdev) zfcp_ccw_adapter_put(adapter); } +static int zfcp_ccw_suspend(struct ccw_device *cdev) +{ + zfcp_ccw_offline_sync(cdev, ZFCP_STATUS_ADAPTER_SUSPENDED, "ccsusp1"); + return 0; +} + +static int zfcp_ccw_thaw(struct ccw_device *cdev) +{ + /* trace records for thaw and final shutdown during suspend + can only be found in system dump until the end of suspend + but not after resume because it's based on the memory image + right after the very first suspend (freeze) callback */ + zfcp_ccw_activate(cdev, 0, "ccthaw1"); + return 0; +} + +static int zfcp_ccw_resume(struct ccw_device *cdev) +{ + zfcp_ccw_activate(cdev, ZFCP_STATUS_ADAPTER_SUSPENDED, "ccresu1"); + return 0; +} + struct ccw_driver zfcp_ccw_driver = { .driver = { .owner = THIS_MODULE, @@ -263,7 +316,7 @@ struct ccw_driver zfcp_ccw_driver = { .set_offline = zfcp_ccw_set_offline, .notify = zfcp_ccw_notify, .shutdown = zfcp_ccw_shutdown, - .freeze = zfcp_ccw_set_offline, - .thaw = zfcp_ccw_activate, - .restore = zfcp_ccw_activate, + .freeze = zfcp_ccw_suspend, + .thaw = zfcp_ccw_thaw, + .restore = zfcp_ccw_resume, }; diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index c6e47d553ad9..e1a8cc2526e7 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -200,6 +200,26 @@ void zfcp_dbf_hba_def_err(struct zfcp_adapter *adapter, u64 req_id, u16 scount, spin_unlock_irqrestore(&dbf->pay_lock, flags); } +/** + * zfcp_dbf_hba_basic - trace event for basic adapter events + * @adapter: pointer to struct zfcp_adapter + */ +void zfcp_dbf_hba_basic(char *tag, struct zfcp_adapter *adapter) +{ + struct zfcp_dbf *dbf = adapter->dbf; + struct zfcp_dbf_hba *rec = &dbf->hba_buf; + unsigned long flags; + + spin_lock_irqsave(&dbf->hba_lock, flags); + memset(rec, 0, sizeof(*rec)); + + memcpy(rec->tag, tag, ZFCP_DBF_TAG_LEN); + rec->id = ZFCP_DBF_HBA_BASIC; + + debug_event(dbf->hba, 1, rec, sizeof(*rec)); + spin_unlock_irqrestore(&dbf->hba_lock, flags); +} + static void zfcp_dbf_set_common(struct zfcp_dbf_rec *rec, struct zfcp_adapter *adapter, struct zfcp_port *port, diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index 714f087eb7a9..3ac7a4b30dd9 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -154,6 +154,7 @@ enum zfcp_dbf_hba_id { ZFCP_DBF_HBA_RES = 1, ZFCP_DBF_HBA_USS = 2, ZFCP_DBF_HBA_BIT = 3, + ZFCP_DBF_HBA_BASIC = 4, }; /** diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 2955e1a3deaf..5bc4afba8b09 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -77,6 +77,7 @@ struct zfcp_reqlist; #define ZFCP_STATUS_ADAPTER_SIOSL_ISSUED 0x00000004 #define ZFCP_STATUS_ADAPTER_XCONFIG_OK 0x00000008 #define ZFCP_STATUS_ADAPTER_HOST_CON_INIT 0x00000010 +#define ZFCP_STATUS_ADAPTER_SUSPENDED 0x00000040 #define ZFCP_STATUS_ADAPTER_ERP_PENDING 0x00000100 #define ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED 0x00000200 #define ZFCP_STATUS_ADAPTER_DATA_DIV_ENABLED 0x00000400 diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 36f422770ff5..4eeef78447f2 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -54,6 +54,7 @@ extern void zfcp_dbf_hba_fsf_res(char *, struct zfcp_fsf_req *); extern void zfcp_dbf_hba_bit_err(char *, struct zfcp_fsf_req *); extern void zfcp_dbf_hba_berr(struct zfcp_dbf *, struct zfcp_fsf_req *); extern void zfcp_dbf_hba_def_err(struct zfcp_adapter *, u64, u16, void **); +extern void zfcp_dbf_hba_basic(char *, struct zfcp_adapter *); extern void zfcp_dbf_san_req(char *, struct zfcp_fsf_req *, u32); extern void zfcp_dbf_san_res(char *, struct zfcp_fsf_req *); extern void zfcp_dbf_san_in_els(char *, struct zfcp_fsf_req *); -- cgit v1.2.3 From ca579c9f136af4274ccfd1bcaee7f38a29a0e2e9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 4 Sep 2012 15:23:33 +0200 Subject: [SCSI] zfcp: remove invalid reference to list iterator variable If list_for_each_entry, etc complete a traversal of the list, the iterator variable ends up pointing to an address at an offset from the list head, and not a meaningful structure. Thus this value should not be used after the end of the iterator. Replace port->adapter->scsi_host by adapter->scsi_host. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Oversight in upsteam commit of v2.6.37 a1ca48319a9aa1c5b57ce142f538e76050bb8972 "[SCSI] zfcp: Move ACL/CFDC code to zfcp_cfdc.c" which merged the content of zfcp_erp_port_access_changed(). Signed-off-by: Julia Lawall Signed-off-by: Steffen Maier Reviewed-by: Martin Peschke Cc: #2.6.37+ Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_cfdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_cfdc.c b/drivers/s390/scsi/zfcp_cfdc.c index fbd8b4db6025..49b82e46629e 100644 --- a/drivers/s390/scsi/zfcp_cfdc.c +++ b/drivers/s390/scsi/zfcp_cfdc.c @@ -293,7 +293,7 @@ void zfcp_cfdc_adapter_access_changed(struct zfcp_adapter *adapter) } read_unlock_irqrestore(&adapter->port_list_lock, flags); - shost_for_each_device(sdev, port->adapter->scsi_host) { + shost_for_each_device(sdev, adapter->scsi_host) { zfcp_sdev = sdev_to_zfcp(sdev); status = atomic_read(&zfcp_sdev->status); if ((status & ZFCP_STATUS_COMMON_ACCESS_DENIED) || -- cgit v1.2.3 From d99b601b63386f3395dc26a699ae703a273d9982 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 4 Sep 2012 15:23:34 +0200 Subject: [SCSI] zfcp: restore refcount check on port_remove Upstream commit f3450c7b917201bb49d67032e9f60d5125675d6a "[SCSI] zfcp: Replace local reference counting with common kref" accidentally dropped a reference count check before tearing down zfcp_ports that are potentially in use by zfcp_units. Even remote ports in use can be removed causing unreachable garbage objects zfcp_ports with zfcp_units. Thus units won't come back even after a manual port_rescan. The kref of zfcp_port->dev.kobj is already used by the driver core. We cannot re-use it to track the number of zfcp_units. Re-introduce our own counter for units per port and check on port_remove. Signed-off-by: Steffen Maier Reviewed-by: Heiko Carstens Cc: #2.6.33+ Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_aux.c | 1 + drivers/s390/scsi/zfcp_def.h | 1 + drivers/s390/scsi/zfcp_ext.h | 1 + drivers/s390/scsi/zfcp_sysfs.c | 18 ++++++++++++++++-- drivers/s390/scsi/zfcp_unit.c | 36 ++++++++++++++++++++++++++---------- 5 files changed, 45 insertions(+), 12 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index aff8621de806..f6adde44f226 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -519,6 +519,7 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, rwlock_init(&port->unit_list_lock); INIT_LIST_HEAD(&port->unit_list); + atomic_set(&port->units, 0); INIT_WORK(&port->gid_pn_work, zfcp_fc_port_did_lookup); INIT_WORK(&port->test_link_work, zfcp_fc_link_test_work); diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 5bc4afba8b09..1305955cbf59 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -205,6 +205,7 @@ struct zfcp_port { struct zfcp_adapter *adapter; /* adapter used to access port */ struct list_head unit_list; /* head of logical unit list */ rwlock_t unit_list_lock; /* unit list lock */ + atomic_t units; /* zfcp_unit count */ atomic_t status; /* status of this remote port */ u64 wwnn; /* WWNN if known */ u64 wwpn; /* WWPN */ diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 4eeef78447f2..03441a7fb463 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -159,6 +159,7 @@ extern void zfcp_scsi_dif_sense_error(struct scsi_cmnd *, int); extern struct attribute_group zfcp_sysfs_unit_attrs; extern struct attribute_group zfcp_sysfs_adapter_attrs; extern struct attribute_group zfcp_sysfs_port_attrs; +extern struct mutex zfcp_sysfs_port_units_mutex; extern struct device_attribute *zfcp_sysfs_sdev_attrs[]; extern struct device_attribute *zfcp_sysfs_shost_attrs[]; diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index c66af27b230b..1e0eb089dfba 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -227,6 +227,8 @@ static ssize_t zfcp_sysfs_port_rescan_store(struct device *dev, static ZFCP_DEV_ATTR(adapter, port_rescan, S_IWUSR, NULL, zfcp_sysfs_port_rescan_store); +DEFINE_MUTEX(zfcp_sysfs_port_units_mutex); + static ssize_t zfcp_sysfs_port_remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -249,6 +251,16 @@ static ssize_t zfcp_sysfs_port_remove_store(struct device *dev, else retval = 0; + mutex_lock(&zfcp_sysfs_port_units_mutex); + if (atomic_read(&port->units) > 0) { + retval = -EBUSY; + mutex_unlock(&zfcp_sysfs_port_units_mutex); + goto out; + } + /* port is about to be removed, so no more unit_add */ + atomic_set(&port->units, -1); + mutex_unlock(&zfcp_sysfs_port_units_mutex); + write_lock_irq(&adapter->port_list_lock); list_del(&port->list); write_unlock_irq(&adapter->port_list_lock); @@ -289,12 +301,14 @@ static ssize_t zfcp_sysfs_unit_add_store(struct device *dev, { struct zfcp_port *port = container_of(dev, struct zfcp_port, dev); u64 fcp_lun; + int retval; if (strict_strtoull(buf, 0, (unsigned long long *) &fcp_lun)) return -EINVAL; - if (zfcp_unit_add(port, fcp_lun)) - return -EINVAL; + retval = zfcp_unit_add(port, fcp_lun); + if (retval) + return retval; return count; } diff --git a/drivers/s390/scsi/zfcp_unit.c b/drivers/s390/scsi/zfcp_unit.c index 3f2bff0d3aa2..1cd2b99ab256 100644 --- a/drivers/s390/scsi/zfcp_unit.c +++ b/drivers/s390/scsi/zfcp_unit.c @@ -104,7 +104,7 @@ static void zfcp_unit_release(struct device *dev) { struct zfcp_unit *unit = container_of(dev, struct zfcp_unit, dev); - put_device(&unit->port->dev); + atomic_dec(&unit->port->units); kfree(unit); } @@ -119,16 +119,27 @@ static void zfcp_unit_release(struct device *dev) int zfcp_unit_add(struct zfcp_port *port, u64 fcp_lun) { struct zfcp_unit *unit; + int retval = 0; + + mutex_lock(&zfcp_sysfs_port_units_mutex); + if (atomic_read(&port->units) == -1) { + /* port is already gone */ + retval = -ENODEV; + goto out; + } unit = zfcp_unit_find(port, fcp_lun); if (unit) { put_device(&unit->dev); - return -EEXIST; + retval = -EEXIST; + goto out; } unit = kzalloc(sizeof(struct zfcp_unit), GFP_KERNEL); - if (!unit) - return -ENOMEM; + if (!unit) { + retval = -ENOMEM; + goto out; + } unit->port = port; unit->fcp_lun = fcp_lun; @@ -139,28 +150,33 @@ int zfcp_unit_add(struct zfcp_port *port, u64 fcp_lun) if (dev_set_name(&unit->dev, "0x%016llx", (unsigned long long) fcp_lun)) { kfree(unit); - return -ENOMEM; + retval = -ENOMEM; + goto out; } - get_device(&port->dev); - if (device_register(&unit->dev)) { put_device(&unit->dev); - return -ENOMEM; + retval = -ENOMEM; + goto out; } if (sysfs_create_group(&unit->dev.kobj, &zfcp_sysfs_unit_attrs)) { device_unregister(&unit->dev); - return -EINVAL; + retval = -EINVAL; + goto out; } + atomic_inc(&port->units); /* under zfcp_sysfs_port_units_mutex ! */ + write_lock_irq(&port->unit_list_lock); list_add_tail(&unit->list, &port->unit_list); write_unlock_irq(&port->unit_list_lock); zfcp_unit_scsi_scan(unit); - return 0; +out: + mutex_unlock(&zfcp_sysfs_port_units_mutex); + return retval; } /** -- cgit v1.2.3 From 43f60cbd56c4a3a8f7fb009ac52d6d57ac864921 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 4 Sep 2012 15:23:35 +0200 Subject: [SCSI] zfcp: No automatic port_rescan on events In FC fabrics with large zones, the automatic port_rescan on incoming ELS and any adapter recovery can cause quite some traffic at the very same time, especially if lots of Linux images share an HBA, which is common on s390. This can cause trouble and failures. Fix this by making such port rescans dependent on a user configurable module parameter. The following unconditional automatic port rescans remain as is: On setting an adapter online and on manual user-triggered writes to the sysfs attribute port_rescan. Signed-off-by: Steffen Maier Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_ccw.c | 7 ++++++- drivers/s390/scsi/zfcp_erp.c | 2 +- drivers/s390/scsi/zfcp_ext.h | 2 ++ drivers/s390/scsi/zfcp_fc.c | 23 ++++++++++++++++++++++- drivers/s390/scsi/zfcp_fsf.c | 2 +- 5 files changed, 32 insertions(+), 4 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index 9646766360d3..f2dd3a0a39eb 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -57,7 +57,7 @@ static int zfcp_ccw_activate(struct ccw_device *cdev, int clear, char *tag) zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, tag); zfcp_erp_wait(adapter); - flush_work(&adapter->scan_work); + flush_work(&adapter->scan_work); /* ok to call even if nothing queued */ zfcp_ccw_adapter_put(adapter); @@ -171,6 +171,11 @@ static int zfcp_ccw_set_online(struct ccw_device *cdev) adapter->req_no = 0; zfcp_ccw_activate(cdev, 0, "ccsonl1"); + /* scan for remote ports + either at the end of any successful adapter recovery + or only after the adapter recovery for setting a device online */ + zfcp_fc_inverse_conditional_port_scan(adapter); + flush_work(&adapter->scan_work); /* ok to call even if nothing queued */ zfcp_ccw_adapter_put(adapter); return 0; } diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 92d3df6ac8ba..4133ab6e20f1 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -1230,7 +1230,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) case ZFCP_ERP_ACTION_REOPEN_ADAPTER: if (result == ZFCP_ERP_SUCCEEDED) { register_service_level(&adapter->service_level); - queue_work(adapter->work_queue, &adapter->scan_work); + zfcp_fc_conditional_port_scan(adapter); queue_work(adapter->work_queue, &adapter->ns_up_work); } else unregister_service_level(&adapter->service_level); diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 03441a7fb463..1d3dd3f7d699 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -99,6 +99,8 @@ extern void zfcp_fc_gs_destroy(struct zfcp_adapter *); extern int zfcp_fc_exec_bsg_job(struct fc_bsg_job *); extern int zfcp_fc_timeout_bsg_job(struct fc_bsg_job *); extern void zfcp_fc_sym_name_update(struct work_struct *); +extern void zfcp_fc_conditional_port_scan(struct zfcp_adapter *); +extern void zfcp_fc_inverse_conditional_port_scan(struct zfcp_adapter *); /* zfcp_fsf.c */ extern struct kmem_cache *zfcp_fsf_qtcb_cache; diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 88688a80b2c1..ff598cd68b2d 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -26,6 +26,27 @@ static u32 zfcp_fc_rscn_range_mask[] = { [ELS_ADDR_FMT_FAB] = 0x000000, }; +static bool no_auto_port_rescan; +module_param_named(no_auto_port_rescan, no_auto_port_rescan, bool, 0600); +MODULE_PARM_DESC(no_auto_port_rescan, + "no automatic port_rescan (default off)"); + +void zfcp_fc_conditional_port_scan(struct zfcp_adapter *adapter) +{ + if (no_auto_port_rescan) + return; + + queue_work(adapter->work_queue, &adapter->scan_work); +} + +void zfcp_fc_inverse_conditional_port_scan(struct zfcp_adapter *adapter) +{ + if (!no_auto_port_rescan) + return; + + queue_work(adapter->work_queue, &adapter->scan_work); +} + /** * zfcp_fc_post_event - post event to userspace via fc_transport * @work: work struct with enqueued events @@ -206,7 +227,7 @@ static void zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req) zfcp_fc_enqueue_event(fsf_req->adapter, FCH_EVT_RSCN, *(u32 *)page); } - queue_work(fsf_req->adapter->work_queue, &fsf_req->adapter->scan_work); + zfcp_fc_conditional_port_scan(fsf_req->adapter); } static void zfcp_fc_incoming_wwpn(struct zfcp_fsf_req *req, u64 wwpn) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 6e2892e7c4be..9ffdc335429c 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -257,7 +257,7 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req) if (sr_buf->status_subtype & FSF_STATUS_READ_SUB_ACT_UPDATED) zfcp_cfdc_adapter_access_changed(adapter); if (sr_buf->status_subtype & FSF_STATUS_READ_SUB_INCOMING_ELS) - queue_work(adapter->work_queue, &adapter->scan_work); + zfcp_fc_conditional_port_scan(adapter); break; case FSF_STATUS_READ_CFDC_UPDATED: zfcp_cfdc_adapter_access_changed(adapter); -- cgit v1.2.3 From d436de8ce25f53a8a880a931886821f632247943 Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Tue, 4 Sep 2012 15:23:36 +0200 Subject: [SCSI] zfcp: only access zfcp_scsi_dev for valid scsi_device __scsi_remove_device (e.g. due to dev_loss_tmo) calls zfcp_scsi_slave_destroy which in turn sends a close LUN FSF request to the adapter. After 30 seconds without response, zfcp_erp_timeout_handler kicks the ERP thread failing the close LUN ERP action. zfcp_erp_wait in zfcp_erp_lun_shutdown_wait and thus zfcp_scsi_slave_destroy returns and then scsi_device is no longer valid. Sometime later the response to the close LUN FSF request may finally come in. However, commit b62a8d9b45b971a67a0f8413338c230e3117dff5 "[SCSI] zfcp: Use SCSI device data zfcp_scsi_dev instead of zfcp_unit" introduced a number of attempts to unconditionally access struct zfcp_scsi_dev through struct scsi_device causing a use-after-free. This leads to an Oops due to kernel page fault in one of: zfcp_fsf_abort_fcp_command_handler, zfcp_fsf_open_lun_handler, zfcp_fsf_close_lun_handler, zfcp_fsf_req_trace, zfcp_fsf_fcp_handler_common. Move dereferencing of zfcp private data zfcp_scsi_dev allocated in scsi_device via scsi_transport_reserve_device after the check for potentially aborted FSF request and thus no longer valid scsi_device. Only then assign sdev_to_zfcp(sdev) to the local auto variable struct zfcp_scsi_dev *zfcp_sdev. Signed-off-by: Martin Peschke Signed-off-by: Steffen Maier Cc: #2.6.37+ Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 9ffdc335429c..c96320d79fbc 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -801,12 +801,14 @@ out: static void zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *req) { struct scsi_device *sdev = req->data; - struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); + struct zfcp_scsi_dev *zfcp_sdev; union fsf_status_qual *fsq = &req->qtcb->header.fsf_status_qual; if (req->status & ZFCP_STATUS_FSFREQ_ERROR) return; + zfcp_sdev = sdev_to_zfcp(sdev); + switch (req->qtcb->header.fsf_status) { case FSF_PORT_HANDLE_NOT_VALID: if (fsq->word[0] == fsq->word[1]) { @@ -1769,13 +1771,15 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req) { struct zfcp_adapter *adapter = req->adapter; struct scsi_device *sdev = req->data; - struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); + struct zfcp_scsi_dev *zfcp_sdev; struct fsf_qtcb_header *header = &req->qtcb->header; struct fsf_qtcb_bottom_support *bottom = &req->qtcb->bottom.support; if (req->status & ZFCP_STATUS_FSFREQ_ERROR) return; + zfcp_sdev = sdev_to_zfcp(sdev); + atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED | ZFCP_STATUS_COMMON_ACCESS_BOXED | ZFCP_STATUS_LUN_SHARED | @@ -1886,11 +1890,13 @@ out: static void zfcp_fsf_close_lun_handler(struct zfcp_fsf_req *req) { struct scsi_device *sdev = req->data; - struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); + struct zfcp_scsi_dev *zfcp_sdev; if (req->status & ZFCP_STATUS_FSFREQ_ERROR) return; + zfcp_sdev = sdev_to_zfcp(sdev); + switch (req->qtcb->header.fsf_status) { case FSF_PORT_HANDLE_NOT_VALID: zfcp_erp_adapter_reopen(zfcp_sdev->port->adapter, 0, "fscuh_1"); @@ -1980,7 +1986,7 @@ static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi) { struct fsf_qual_latency_info *lat_in; struct latency_cont *lat = NULL; - struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(scsi->device); + struct zfcp_scsi_dev *zfcp_sdev; struct zfcp_blk_drv_data blktrc; int ticks = req->adapter->timer_ticks; @@ -1995,6 +2001,7 @@ static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi) if (req->adapter->adapter_features & FSF_FEATURE_MEASUREMENT_DATA && !(req->status & ZFCP_STATUS_FSFREQ_ERROR)) { + zfcp_sdev = sdev_to_zfcp(scsi->device); blktrc.flags |= ZFCP_BLK_LAT_VALID; blktrc.channel_lat = lat_in->channel_lat * ticks; blktrc.fabric_lat = lat_in->fabric_lat * ticks; @@ -2032,12 +2039,14 @@ static void zfcp_fsf_fcp_handler_common(struct zfcp_fsf_req *req) { struct scsi_cmnd *scmnd = req->data; struct scsi_device *sdev = scmnd->device; - struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); + struct zfcp_scsi_dev *zfcp_sdev; struct fsf_qtcb_header *header = &req->qtcb->header; if (unlikely(req->status & ZFCP_STATUS_FSFREQ_ERROR)) return; + zfcp_sdev = sdev_to_zfcp(sdev); + switch (header->fsf_status) { case FSF_HANDLE_MISMATCH: case FSF_PORT_HANDLE_NOT_VALID: -- cgit v1.2.3