diff options
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/core.c | 44 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 1 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 10 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 3 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 300 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 10 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 21 |
7 files changed, 208 insertions, 181 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6265d9225ec8..a6644a2c3ef7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1105,7 +1105,7 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); if (error) { - dev_warn(ctrl->device, "Identify namespace failed\n"); + dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); kfree(id); return NULL; } @@ -1588,9 +1588,13 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { - sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); + sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); unsigned short bs = 1 << ns->lba_shift; + if (ns->lba_shift > PAGE_SHIFT) { + /* unsupported block size, set capacity to 0 later */ + bs = (1 << 9); + } blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); @@ -1601,7 +1605,8 @@ static void nvme_update_disk_info(struct gendisk *disk, if (ns->ms && !ns->ext && (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) nvme_init_integrity(disk, ns->ms, ns->pi_type); - if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) + if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || + ns->lba_shift > PAGE_SHIFT) capacity = 0; set_capacity(disk, capacity); @@ -2549,7 +2554,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->crdt[2] = le16_to_cpu(id->crdt3); ctrl->oacs = le16_to_cpu(id->oacs); - ctrl->oncs = le16_to_cpup(&id->oncs); + ctrl->oncs = le16_to_cpu(id->oncs); ctrl->oaes = le32_to_cpu(id->oaes); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; @@ -3874,10 +3879,37 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues); -int __init nvme_core_init(void) +/* + * Check we didn't inadvertently grow the command structure sizes: + */ +static inline void _nvme_check_size(void) +{ + BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_identify) != 64); + BUILD_BUG_ON(sizeof(struct nvme_features) != 64); + BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64); + BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); + BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); + BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); + BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); +} + + +static int __init nvme_core_init(void) { int result = -ENOMEM; + _nvme_check_size(); + nvme_wq = alloc_workqueue("nvme-wq", WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); if (!nvme_wq) @@ -3924,7 +3956,7 @@ out: return result; } -void __exit nvme_core_exit(void) +static void __exit nvme_core_exit(void) { ida_destroy(&nvme_subsystems_ida); class_destroy(nvme_subsys_class); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index d4cb826f58ff..592d1e61ef7e 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1188,6 +1188,7 @@ static void __exit nvmf_exit(void) class_destroy(nvmf_class); nvmf_host_put(nvmf_default_host); + BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index f0716f6ce41f..5c9429d41120 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -232,6 +232,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, blk_qc_t ret = BLK_QC_T_NONE; int srcu_idx; + /* + * The namespace might be going away and the bio might + * be moved to a different queue via blk_steal_bios(), + * so we need to use the bio_split pool from the original + * queue to allocate the bvecs from. + */ + blk_queue_split(q, &bio); + srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); if (likely(ns)) { @@ -421,7 +429,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, unsigned *nr_change_groups = data; struct nvme_ns *ns; - dev_info(ctrl->device, "ANA group %d: %s.\n", + dev_dbg(ctrl->device, "ANA group %d: %s.\n", le32_to_cpu(desc->grpid), nvme_ana_state_names[desc->state]); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 527d64545023..5ee75b5ff83f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -577,7 +577,4 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) return dev_to_disk(dev)->private_data; } -int __init nvme_core_init(void); -void __exit nvme_core_exit(void); - #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a90cf5d63aac..3e4fb891a95a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -146,7 +146,7 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp) static int queue_count_set(const char *val, const struct kernel_param *kp) { - int n = 0, ret; + int n, ret; ret = kstrtoint(val, 10, &n); if (ret) @@ -177,7 +177,6 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) * commands and one for I/O commands). */ struct nvme_queue { - struct device *q_dmadev; struct nvme_dev *dev; spinlock_t sq_lock; struct nvme_command *sq_cmds; @@ -189,7 +188,7 @@ struct nvme_queue { dma_addr_t cq_dma_addr; u32 __iomem *q_db; u16 q_depth; - s16 cq_vector; + u16 cq_vector; u16 sq_tail; u16 last_sq_tail; u16 cq_head; @@ -200,6 +199,7 @@ struct nvme_queue { #define NVMEQ_ENABLED 0 #define NVMEQ_SQ_CMB 1 #define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 u32 *dbbuf_sq_db; u32 *dbbuf_cq_db; u32 *dbbuf_sq_ei; @@ -208,10 +208,10 @@ struct nvme_queue { }; /* - * The nvme_iod describes the data in an I/O, including the list of PRP - * entries. You can't see it in this data structure because C doesn't let - * me express that. Use nvme_init_iod to ensure there's enough space - * allocated to store the PRP list. + * The nvme_iod describes the data in an I/O. + * + * The sg pointer contains the list of PRP/SGL chunk allocations in addition + * to the actual struct scatterlist. */ struct nvme_iod { struct nvme_request req; @@ -220,33 +220,12 @@ struct nvme_iod { int aborted; int npages; /* In the PRP list. 0 means small pool in use */ int nents; /* Used in scatterlist */ - int length; /* Of data, in bytes */ dma_addr_t first_dma; - struct scatterlist meta_sg; /* metadata requires single contiguous buffer */ + unsigned int dma_len; /* length of single DMA segment mapping */ + dma_addr_t meta_dma; struct scatterlist *sg; - struct scatterlist inline_sg[0]; }; -/* - * Check we didin't inadvertently grow the command struct - */ -static inline void _nvme_check_size(void) -{ - BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); - BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); - BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); - BUILD_BUG_ON(sizeof(struct nvme_features) != 64); - BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); - BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); - BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); -} - static unsigned int max_io_queues(void) { return num_possible_cpus() + write_queues + poll_queues; @@ -372,12 +351,6 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, } /* - * Max size of iod being embedded in the request payload - */ -#define NVME_INT_PAGES 2 -#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->ctrl.page_size) - -/* * Will slightly overestimate the number of pages needed. This is OK * as it only leads to a small amount of wasted memory for the lifetime of * the I/O. @@ -411,15 +384,6 @@ static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev, return alloc_size + sizeof(struct scatterlist) * nseg; } -static unsigned int nvme_pci_cmd_size(struct nvme_dev *dev, bool use_sgl) -{ - unsigned int alloc_size = nvme_pci_iod_alloc_size(dev, - NVME_INT_BYTES(dev), NVME_INT_PAGES, - use_sgl); - - return sizeof(struct nvme_iod) + alloc_size; -} - static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -584,37 +548,26 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) return true; } -static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) +static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) { - struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); - int nseg = blk_rq_nr_phys_segments(rq); - unsigned int size = blk_rq_payload_bytes(rq); - - iod->use_sgl = nvme_pci_use_sgls(dev, rq); + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + enum dma_data_direction dma_dir = rq_data_dir(req) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE; + const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1; + dma_addr_t dma_addr = iod->first_dma, next_dma_addr; + int i; - if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { - iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); - if (!iod->sg) - return BLK_STS_RESOURCE; - } else { - iod->sg = iod->inline_sg; + if (iod->dma_len) { + dma_unmap_page(dev->dev, dma_addr, iod->dma_len, dma_dir); + return; } - iod->aborted = 0; - iod->npages = -1; - iod->nents = 0; - iod->length = size; - - return BLK_STS_OK; -} + WARN_ON_ONCE(!iod->nents); -static void nvme_free_iod(struct nvme_dev *dev, struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1; - dma_addr_t dma_addr = iod->first_dma, next_dma_addr; + /* P2PDMA requests do not need to be unmapped */ + if (!is_pci_p2pdma_page(sg_page(iod->sg))) + dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); - int i; if (iod->npages == 0) dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], @@ -638,8 +591,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) dma_addr = next_dma_addr; } - if (iod->sg != iod->inline_sg) - mempool_free(iod->sg, dev->iod_mempool); + mempool_free(iod->sg, dev->iod_mempool); } static void nvme_print_sgl(struct scatterlist *sgl, int nents) @@ -829,80 +781,104 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, return BLK_STS_OK; } +static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, + struct request *req, struct nvme_rw_command *cmnd, + struct bio_vec *bv) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset; + + iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, iod->first_dma)) + return BLK_STS_RESOURCE; + iod->dma_len = bv->bv_len; + + cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); + if (bv->bv_len > first_prp_len) + cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + return 0; +} + +static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, + struct request *req, struct nvme_rw_command *cmnd, + struct bio_vec *bv) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + + iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, iod->first_dma)) + return BLK_STS_RESOURCE; + iod->dma_len = bv->bv_len; + + cmnd->flags = NVME_CMD_SGL_METABUF; + cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma); + cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len); + cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4; + return 0; +} + static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, struct nvme_command *cmnd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct request_queue *q = req->q; - enum dma_data_direction dma_dir = rq_data_dir(req) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE; - blk_status_t ret = BLK_STS_IOERR; + blk_status_t ret = BLK_STS_RESOURCE; int nr_mapped; + if (blk_rq_nr_phys_segments(req) == 1) { + struct bio_vec bv = req_bvec(req); + + if (!is_pci_p2pdma_page(bv.bv_page)) { + if (bv.bv_offset + bv.bv_len <= dev->ctrl.page_size * 2) + return nvme_setup_prp_simple(dev, req, + &cmnd->rw, &bv); + + if (iod->nvmeq->qid && + dev->ctrl.sgls & ((1 << 0) | (1 << 1))) + return nvme_setup_sgl_simple(dev, req, + &cmnd->rw, &bv); + } + } + + iod->dma_len = 0; + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); + if (!iod->sg) + return BLK_STS_RESOURCE; sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); - iod->nents = blk_rq_map_sg(q, req, iod->sg); + iod->nents = blk_rq_map_sg(req->q, req, iod->sg); if (!iod->nents) goto out; - ret = BLK_STS_RESOURCE; - if (is_pci_p2pdma_page(sg_page(iod->sg))) nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents, - dma_dir); + rq_dma_dir(req)); else nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, - dma_dir, DMA_ATTR_NO_WARN); + rq_dma_dir(req), DMA_ATTR_NO_WARN); if (!nr_mapped) goto out; + iod->use_sgl = nvme_pci_use_sgls(dev, req); if (iod->use_sgl) ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); - - if (ret != BLK_STS_OK) - goto out_unmap; - - ret = BLK_STS_IOERR; - if (blk_integrity_rq(req)) { - if (blk_rq_count_integrity_sg(q, req->bio) != 1) - goto out_unmap; - - sg_init_table(&iod->meta_sg, 1); - if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) - goto out_unmap; - - if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) - goto out_unmap; - - cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); - } - - return BLK_STS_OK; - -out_unmap: - dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); out: + if (ret != BLK_STS_OK) + nvme_unmap_data(dev, req); return ret; } -static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) +static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req, + struct nvme_command *cmnd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - enum dma_data_direction dma_dir = rq_data_dir(req) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE; - - if (iod->nents) { - /* P2PDMA requests do not need to be unmapped */ - if (!is_pci_p2pdma_page(sg_page(iod->sg))) - dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); - - if (blk_integrity_rq(req)) - dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); - } - nvme_cleanup_cmd(req); - nvme_free_iod(dev, req); + iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req), + rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, iod->meta_dma)) + return BLK_STS_IOERR; + cmnd->rw.metadata = cpu_to_le64(iod->meta_dma); + return 0; } /* @@ -915,9 +891,14 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_queue *nvmeq = hctx->driver_data; struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_command cmnd; blk_status_t ret; + iod->aborted = 0; + iod->npages = -1; + iod->nents = 0; + /* * We should not need to do this, but we're still using this to * ensure we can drain requests on a dying queue. @@ -929,21 +910,23 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) return ret; - ret = nvme_init_iod(req, dev); - if (ret) - goto out_free_cmd; - if (blk_rq_nr_phys_segments(req)) { ret = nvme_map_data(dev, req, &cmnd); if (ret) - goto out_cleanup_iod; + goto out_free_cmd; + } + + if (blk_integrity_rq(req)) { + ret = nvme_map_metadata(dev, req, &cmnd); + if (ret) + goto out_unmap_data; } blk_mq_start_request(req); nvme_submit_cmd(nvmeq, &cmnd, bd->last); return BLK_STS_OK; -out_cleanup_iod: - nvme_free_iod(dev, req); +out_unmap_data: + nvme_unmap_data(dev, req); out_free_cmd: nvme_cleanup_cmd(req); return ret; @@ -952,8 +935,14 @@ out_free_cmd: static void nvme_pci_complete_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_dev *dev = iod->nvmeq->dev; - nvme_unmap_data(iod->nvmeq->dev, req); + nvme_cleanup_cmd(req); + if (blk_integrity_rq(req)) + dma_unmap_page(dev->dev, iod->meta_dma, + rq_integrity_vec(req)->bv_len, rq_data_dir(req)); + if (blk_rq_nr_phys_segments(req)) + nvme_unmap_data(dev, req); nvme_complete_rq(req); } @@ -1088,7 +1077,7 @@ static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) * using the CQ lock. For normal interrupt driven threads we have * to disable the interrupt to avoid racing with it. */ - if (nvmeq->cq_vector == -1) { + if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) { spin_lock(&nvmeq->cq_poll_lock); found = nvme_process_cq(nvmeq, &start, &end, tag); spin_unlock(&nvmeq->cq_poll_lock); @@ -1148,7 +1137,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG; - if (vector != -1) + if (!test_bit(NVMEQ_POLLED, &nvmeq->flags)) flags |= NVME_CQ_IRQ_ENABLED; /* @@ -1161,10 +1150,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); - if (vector != -1) - c.create_cq.irq_vector = cpu_to_le16(vector); - else - c.create_cq.irq_vector = 0; + c.create_cq.irq_vector = cpu_to_le16(vector); return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); } @@ -1271,6 +1257,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; struct nvme_command cmd; + bool shutdown = false; u32 csts = readl(dev->bar + NVME_REG_CSTS); /* If PCI error recovery process is happening, we cannot reset or @@ -1307,12 +1294,14 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * shutdown, so we return BLK_EH_DONE. */ switch (dev->ctrl.state) { + case NVME_CTRL_DELETING: + shutdown = true; case NVME_CTRL_CONNECTING: case NVME_CTRL_RESETTING: dev_warn_ratelimited(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); - nvme_dev_disable(dev, false); + nvme_dev_disable(dev, shutdown); nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_DONE; default: @@ -1371,16 +1360,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) static void nvme_free_queue(struct nvme_queue *nvmeq) { - dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), + dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); if (!nvmeq->sq_cmds) return; if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { - pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev), + pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev), nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth)); } else { - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), + dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth), nvmeq->sq_cmds, nvmeq->sq_dma_addr); } } @@ -1410,10 +1399,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) nvmeq->dev->online_queues--; if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); - if (nvmeq->cq_vector == -1) - return 0; - pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); - nvmeq->cq_vector = -1; + if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) + pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); return 0; } @@ -1498,7 +1485,6 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth)) goto free_cqdma; - nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; spin_lock_init(&nvmeq->sq_lock); spin_lock_init(&nvmeq->cq_poll_lock); @@ -1507,7 +1493,6 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->qid = qid; - nvmeq->cq_vector = -1; dev->ctrl.queue_count++; return 0; @@ -1552,7 +1537,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) { struct nvme_dev *dev = nvmeq->dev; int result; - s16 vector; + u16 vector = 0; clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); @@ -1563,7 +1548,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) if (!polled) vector = dev->num_vecs == 1 ? 0 : qid; else - vector = -1; + set_bit(NVMEQ_POLLED, &nvmeq->flags); result = adapter_alloc_cq(dev, qid, nvmeq, vector); if (result) @@ -1578,7 +1563,8 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) nvmeq->cq_vector = vector; nvme_init_queue(nvmeq, qid); - if (vector != -1) { + if (!polled) { + nvmeq->cq_vector = vector; result = queue_request_irq(nvmeq); if (result < 0) goto release_sq; @@ -1588,7 +1574,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) return result; release_sq: - nvmeq->cq_vector = -1; dev->online_queues--; adapter_delete_sq(dev, qid); release_cq: @@ -1639,7 +1624,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.numa_node = dev_to_node(dev->dev); - dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false); + dev->admin_tagset.cmd_size = sizeof(struct nvme_iod); dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; dev->admin_tagset.driver_data = dev; @@ -1730,7 +1715,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) nvme_init_queue(nvmeq, 0); result = queue_request_irq(nvmeq); if (result) { - nvmeq->cq_vector = -1; + dev->online_queues--; return result; } @@ -2171,10 +2156,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * number of interrupts. */ result = queue_request_irq(adminq); - if (result) { - adminq->cq_vector = -1; + if (result) return result; - } set_bit(NVMEQ_ENABLED, &adminq->flags); result = nvme_create_io_queues(dev); @@ -2286,11 +2269,7 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; - dev->tagset.cmd_size = nvme_pci_cmd_size(dev, false); - if ((dev->ctrl.sgls & ((1 << 0) | (1 << 1))) && sgl_threshold) { - dev->tagset.cmd_size = max(dev->tagset.cmd_size, - nvme_pci_cmd_size(dev, true)); - } + dev->tagset.cmd_size = sizeof(struct nvme_iod); dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; dev->tagset.driver_data = dev; @@ -2438,8 +2417,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * must flush all entered requests to their failed completion to avoid * deadlocking blk-mq hot-cpu notifier. */ - if (shutdown) + if (shutdown) { nvme_start_queues(&dev->ctrl); + if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) + blk_mq_unquiesce_queue(dev->ctrl.admin_q); + } mutex_unlock(&dev->shutdown_lock); } @@ -2979,6 +2961,9 @@ static struct pci_driver nvme_driver = { static int __init nvme_init(void) { + BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); + BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); + BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); return pci_register_driver(&nvme_driver); } @@ -2987,7 +2972,6 @@ static void __exit nvme_exit(void) { pci_unregister_driver(&nvme_driver); flush_workqueue(nvme_wq); - _nvme_check_size(); } MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 11a5ecae78c8..e1824c2e0a1c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -914,8 +914,9 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, { blk_mq_quiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, - &ctrl->ctrl); + if (ctrl->ctrl.admin_tagset) + blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset, + nvme_cancel_request, &ctrl->ctrl); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_destroy_admin_queue(ctrl, remove); } @@ -926,8 +927,9 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); - blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, - &ctrl->ctrl); + if (ctrl->ctrl.tagset) + blk_mq_tagset_busy_iter(ctrl->ctrl.tagset, + nvme_cancel_request, &ctrl->ctrl); if (remove) nvme_start_queues(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, remove); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 68c49dd67210..2b107a1d152b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -473,7 +473,6 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, } return 0; - } static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, @@ -634,7 +633,6 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status) nvme_end_request(rq, cpu_to_le16(status << 1), res); } - static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { @@ -1425,7 +1423,8 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) if (!ret) { set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags); } else { - __nvme_tcp_stop_queue(&ctrl->queues[idx]); + if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags)) + __nvme_tcp_stop_queue(&ctrl->queues[idx]); dev_err(nctrl->device, "failed to connect queue: %d ret=%d\n", idx, ret); } @@ -1535,7 +1534,7 @@ out_free_queue: return ret; } -static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) +static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; @@ -1565,7 +1564,7 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl) return nr_io_queues; } -static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl) +static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { unsigned int nr_io_queues; int ret; @@ -1582,7 +1581,7 @@ static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl) dev_info(ctrl->device, "creating %d I/O queues.\n", nr_io_queues); - return nvme_tcp_alloc_io_queues(ctrl); + return __nvme_tcp_alloc_io_queues(ctrl); } static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) @@ -1599,7 +1598,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) { int ret; - ret = nvme_alloc_io_queues(ctrl); + ret = nvme_tcp_alloc_io_queues(ctrl); if (ret) return ret; @@ -1710,7 +1709,9 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, { blk_mq_quiesce_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); - blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl); + if (ctrl->admin_tagset) + blk_mq_tagset_busy_iter(ctrl->admin_tagset, + nvme_cancel_request, ctrl); blk_mq_unquiesce_queue(ctrl->admin_q); nvme_tcp_destroy_admin_queue(ctrl, remove); } @@ -1722,7 +1723,9 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, return; nvme_stop_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); - blk_mq_tagset_busy_iter(ctrl->tagset, nvme_cancel_request, ctrl); + if (ctrl->tagset) + blk_mq_tagset_busy_iter(ctrl->tagset, + nvme_cancel_request, ctrl); if (remove) nvme_start_queues(ctrl); nvme_tcp_destroy_io_queues(ctrl, remove); |