From 03ea83e9a37e41d436f8348e6eee3d8281bfff3a Mon Sep 17 00:00:00 2001 From: Tushar Behera Date: Mon, 10 Jun 2013 10:20:55 +0530 Subject: NVMe: Use kzalloc instead of kmalloc+memset Use kzalloc instead of kmalloc and a susbsequent memset. Signed-off-by: Tushar Behera Signed-off-by: Vishal Verma Signed-off-by: Matthew Wilcox --- drivers/block/nvme-scsi.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 102de2f52b5c..4a4ff4eb8e23 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -933,13 +933,12 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res = SNTI_TRANSLATION_SUCCESS; int xfer_len; - inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); + inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); if (inq_response == NULL) { res = -ENOMEM; goto out_mem; } - memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */ inq_response[2] = 0x00; /* Page Length MSB */ inq_response[3] = 0x3C; /* Page Length LSB */ @@ -964,12 +963,11 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, int xfer_len; u8 *log_response; - log_response = kmalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL); + log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL); if (log_response == NULL) { res = -ENOMEM; goto out_mem; } - memset(log_response, 0, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH); log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; /* Subpage=0x00, Page Length MSB=0 */ @@ -1000,12 +998,11 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, u8 temp_c; u16 temp_k; - log_response = kmalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); + log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); if (log_response == NULL) { res = -ENOMEM; goto out_mem; } - memset(log_response, 0, LOG_INFO_EXCP_PAGE_LENGTH); mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), @@ -1069,12 +1066,11 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 temp_c_cur, temp_c_thresh; u16 temp_k; - log_response = kmalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); + log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); if (log_response == NULL) { res = -ENOMEM; goto out_mem; } - memset(log_response, 0, LOG_TEMP_PAGE_LENGTH); mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), @@ -1380,12 +1376,11 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns, blk_desc_offset = mph_size; mode_pages_offset_1 = blk_desc_offset + blk_desc_len; - response = kmalloc(resp_size, GFP_KERNEL); + response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; goto out_mem; } - memset(response, 0, resp_size); res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, llbaa, mode_data_length, blk_desc_len); @@ -2480,12 +2475,11 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, } id_ns = mem; - response = kmalloc(resp_size, GFP_KERNEL); + response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; goto out_dma; } - memset(response, 0, resp_size); nvme_trans_fill_read_cap(response, id_ns, cdb16); xfer_len = min(alloc_len, resp_size); @@ -2554,12 +2548,11 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out_dma; } - response = kmalloc(resp_size, GFP_KERNEL); + response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; goto out_dma; } - memset(response, 0, resp_size); /* The first LUN ID will always be 0 per the SAM spec */ for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) { @@ -2600,12 +2593,11 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : (FIXED_FMT_SENSE_DATA_SIZE)); - response = kmalloc(resp_size, GFP_KERNEL); + response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; goto out; } - memset(response, 0, resp_size); if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { /* Descriptor Format Sense Data */ -- cgit v1.2.3 From 063a8096f3dbca7521d5918b3aea7ab46c5d2fe9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 20 Jun 2013 10:53:48 -0400 Subject: NVMe: Restructure MSI / MSI-X setup The current code copies 'nr_io_queues' into 'q_count', modifies 'nr_io_queues' during MSI-X setup, then resets 'nr_io_queues' for MSI setup. Instead, copy 'nr_io_queues' into 'vecs' and modify 'vecs' during both MSI-X and MSI setup. This lets us simplify the for-loops that set up MSI-X and MSI, and opens the possibility of using more I/O queues than we have interrupt vectors, should future benchmarking prove that to be a useful feature. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index ce79a590b45b..de3a75978c56 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1638,7 +1638,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) static int nvme_setup_io_queues(struct nvme_dev *dev) { struct pci_dev *pdev = dev->pci_dev; - int result, cpu, i, nr_io_queues, db_bar_size, q_depth, q_count; + int result, cpu, i, vecs, nr_io_queues, db_bar_size, q_depth; nr_io_queues = num_online_cpus(); result = set_queue_count(dev, nr_io_queues); @@ -1647,7 +1647,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (result < nr_io_queues) nr_io_queues = result; - q_count = nr_io_queues; /* Deregister the admin queue's interrupt */ free_irq(dev->entry[0].vector, dev->queues[0]); @@ -1659,39 +1658,42 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->queues[0]->q_db = dev->dbs; } - for (i = 0; i < nr_io_queues; i++) + vecs = nr_io_queues; + for (i = 0; i < vecs; i++) dev->entry[i].entry = i; for (;;) { - result = pci_enable_msix(pdev, dev->entry, nr_io_queues); - if (result == 0) { + result = pci_enable_msix(pdev, dev->entry, vecs); + if (result <= 0) break; - } else if (result > 0) { - nr_io_queues = result; - continue; - } else { - nr_io_queues = 0; - break; - } + vecs = result; } - if (nr_io_queues == 0) { - nr_io_queues = q_count; + if (result < 0) { + vecs = nr_io_queues; + if (vecs > 32) + vecs = 32; for (;;) { - result = pci_enable_msi_block(pdev, nr_io_queues); + result = pci_enable_msi_block(pdev, vecs); if (result == 0) { - for (i = 0; i < nr_io_queues; i++) + for (i = 0; i < vecs; i++) dev->entry[i].vector = i + pdev->irq; break; - } else if (result > 0) { - nr_io_queues = result; - continue; - } else { - nr_io_queues = 1; + } else if (result < 0) { + vecs = 1; break; } + vecs = result; } } + /* + * Should investigate if there's a performance win from allocating + * more queues than interrupt vectors; it might allow the submission + * path to scale better, even if the receive path is limited by the + * number of interrupts. + */ + nr_io_queues = vecs; + result = queue_request_irq(dev, dev->queues[0], "nvme admin"); /* XXX: handle failure here */ -- cgit v1.2.3 From 6198221fa0df0298513b35796f63f242ea97134e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 29 May 2013 15:59:39 -0600 Subject: NVMe: Disk IO statistics Add io stats accounting for bio requests so nvme block devices show useful disk stats. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 28 ++++++++++++++++++++++++++++ include/linux/nvme.h | 1 + 2 files changed, 29 insertions(+) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index de3a75978c56..4e71b075d3b4 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -285,6 +285,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) iod->npages = -1; iod->length = nbytes; iod->nents = 0; + iod->start_time = jiffies; } return iod; @@ -308,6 +309,30 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) kfree(iod); } +static void nvme_start_io_acct(struct bio *bio) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + const int rw = bio_data_dir(bio); + int cpu = part_stat_lock(); + part_round_stats(cpu, &disk->part0); + part_stat_inc(cpu, &disk->part0, ios[rw]); + part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio)); + part_inc_in_flight(&disk->part0, rw); + part_stat_unlock(); +} + +static void nvme_end_io_acct(struct bio *bio, unsigned long start_time) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + const int rw = bio_data_dir(bio); + unsigned long duration = jiffies - start_time; + int cpu = part_stat_lock(); + part_stat_add(cpu, &disk->part0, ticks[rw], duration); + part_round_stats(cpu, &disk->part0); + part_dec_in_flight(&disk->part0, rw); + part_stat_unlock(); +} + static void bio_completion(struct nvme_dev *dev, void *ctx, struct nvme_completion *cqe) { @@ -318,6 +343,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, if (iod->nents) dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + + nvme_end_io_acct(bio, iod->start_time); nvme_free_iod(dev, iod); if (status) bio_endio(bio, -EIO); @@ -695,6 +722,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, cmnd->rw.control = cpu_to_le16(control); cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); + nvme_start_io_acct(bio); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; writel(nvmeq->sq_tail, nvmeq->q_db); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f451c8d6e231..5d7c07946fbe 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -572,6 +572,7 @@ struct nvme_iod { int offset; /* Of PRP list */ int nents; /* Used in scatterlist */ int length; /* Of data, in bytes */ + unsigned long start_time; dma_addr_t first_dma; struct scatterlist sg[0]; }; -- cgit v1.2.3 From e9539f47525ecee05c9f22c3565885f3e9492c52 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 24 Jun 2013 11:47:34 -0400 Subject: NVMe: Return correct value from interrupt handler The interrupt handler currently reports whether it found any new completion queue entries. If the completion queue is primarily being processed by a method other than the interrupt handler, it may return IRQ_NONE so often that Linux thinks that the interrupt is being falsely triggered. To solve this problem, report whether any completion queue entries have been seen since the last interrupt was received for this queue. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 4e71b075d3b4..88a95747494c 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -79,7 +79,8 @@ struct nvme_queue { u16 sq_head; u16 sq_tail; u16 cq_head; - u16 cq_phase; + u8 cq_phase; + u8 cqe_seen; unsigned long cmdid_data[]; }; @@ -756,7 +757,7 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio) put_nvmeq(nvmeq); } -static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq) +static int nvme_process_cq(struct nvme_queue *nvmeq) { u16 head, phase; @@ -786,13 +787,14 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq) * a big problem. */ if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) - return IRQ_NONE; + return 0; writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride)); nvmeq->cq_head = head; nvmeq->cq_phase = phase; - return IRQ_HANDLED; + nvmeq->cqe_seen = 1; + return 1; } static irqreturn_t nvme_irq(int irq, void *data) @@ -800,7 +802,9 @@ static irqreturn_t nvme_irq(int irq, void *data) irqreturn_t result; struct nvme_queue *nvmeq = data; spin_lock(&nvmeq->q_lock); - result = nvme_process_cq(nvmeq); + nvme_process_cq(nvmeq); + result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE; + nvmeq->cqe_seen = 0; spin_unlock(&nvmeq->q_lock); return result; } -- cgit v1.2.3 From bc57a0f7a44cfcf3e9873f6c6b8dcecdca486b1f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 24 Jun 2013 11:56:42 -0400 Subject: NVMe: Remove "process_cq did something" message I was originally intending to log the fact that the kthread had done some work since it might help us find interrupt handling problems, but that hasn't been done yet, and spamming the logs with this message is just rude. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 88a95747494c..eb4a91f3bf41 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1538,8 +1538,7 @@ static int nvme_kthread(void *data) if (!nvmeq) continue; spin_lock_irq(&nvmeq->q_lock); - if (nvme_process_cq(nvmeq)) - printk("process_cq did something\n"); + nvme_process_cq(nvmeq); nvme_cancel_ios(nvmeq, true); nvme_resubmit_bios(nvmeq); spin_unlock_irq(&nvmeq->q_lock); -- cgit v1.2.3 From 7d8224574cbd2326a6be00f319f5f7597abec3f6 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 24 Jun 2013 12:03:57 -0400 Subject: NVMe: Call nvme_process_cq from submission path Since we have the queue locked, it makes sense to check if there are any completion queue entries on the queue before we release the lock. If there are, it may save an interrupt and reduce latency for the I/Os that happened to complete. This happens fairly often for some workloads. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index eb4a91f3bf41..07d527c66eb4 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -738,25 +738,6 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, return result; } -static void nvme_make_request(struct request_queue *q, struct bio *bio) -{ - struct nvme_ns *ns = q->queuedata; - struct nvme_queue *nvmeq = get_nvmeq(ns->dev); - int result = -EBUSY; - - spin_lock_irq(&nvmeq->q_lock); - if (bio_list_empty(&nvmeq->sq_cong)) - result = nvme_submit_bio_queue(nvmeq, ns, bio); - if (unlikely(result)) { - if (bio_list_empty(&nvmeq->sq_cong)) - add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); - bio_list_add(&nvmeq->sq_cong, bio); - } - - spin_unlock_irq(&nvmeq->q_lock); - put_nvmeq(nvmeq); -} - static int nvme_process_cq(struct nvme_queue *nvmeq) { u16 head, phase; @@ -797,6 +778,26 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) return 1; } +static void nvme_make_request(struct request_queue *q, struct bio *bio) +{ + struct nvme_ns *ns = q->queuedata; + struct nvme_queue *nvmeq = get_nvmeq(ns->dev); + int result = -EBUSY; + + spin_lock_irq(&nvmeq->q_lock); + if (bio_list_empty(&nvmeq->sq_cong)) + result = nvme_submit_bio_queue(nvmeq, ns, bio); + if (unlikely(result)) { + if (bio_list_empty(&nvmeq->sq_cong)) + add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); + bio_list_add(&nvmeq->sq_cong, bio); + } + + nvme_process_cq(nvmeq); + spin_unlock_irq(&nvmeq->q_lock); + put_nvmeq(nvmeq); +} + static irqreturn_t nvme_irq(int irq, void *data) { irqreturn_t result; -- cgit v1.2.3 From c3bfe7176c035a0a2c70bc79180fb13a6c57142a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 8 Jul 2013 17:26:25 -0400 Subject: NVMe: Namespace IDs are unsigned The 'Number of Namespaces' read from the device was being treated as signed, which would cause us to not scan any namespaces for a device with more than 2 billion namespaces. That led to noticing that the namespace ID was also being treated as signed, which could lead to the result from NVME_IOCTL_ID being treated as an error code. Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 7 +++++-- include/linux/nvme.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 07d527c66eb4..56d1fa472d06 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -1486,6 +1487,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, switch (cmd) { case NVME_IOCTL_ID: + force_successful_syscall_return(); return ns->ns_id; case NVME_IOCTL_ADMIN_CMD: return nvme_user_admin_cmd(ns->dev, (void __user *)arg); @@ -1588,7 +1590,7 @@ static void nvme_config_discard(struct nvme_ns *ns) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); } -static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, +static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, struct nvme_id_ns *id, struct nvme_lba_range_type *rt) { struct nvme_ns *ns; @@ -1768,7 +1770,8 @@ static void nvme_free_queues(struct nvme_dev *dev) */ static int nvme_dev_add(struct nvme_dev *dev) { - int res, nn, i; + int res; + unsigned nn, i; struct nvme_ns *ns; struct nvme_id_ctrl *ctrl; struct nvme_id_ns *id_ns; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8d0041513e1a..3403c8f06fa0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -104,7 +104,7 @@ struct nvme_ns { struct request_queue *queue; struct gendisk *disk; - int ns_id; + unsigned ns_id; int lba_shift; int ms; u64 mode_select_num_blocks; -- cgit v1.2.3 From 1b56749e541ad59068582f2a28297843e243b856 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 18 Jul 2013 12:13:51 -0600 Subject: NVMe: Fix checkpatch issues Signed-off-by: Keith Busch --- drivers/block/nvme-core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 56d1fa472d06..f9244131b88a 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -451,10 +451,8 @@ static void nvme_bio_pair_endio(struct bio *bio, int err) if (atomic_dec_and_test(&bp->cnt)) { bio_endio(bp->parent, bp->err); - if (bp->bv1) - kfree(bp->bv1); - if (bp->bv2) - kfree(bp->bv2); + kfree(bp->bv1); + kfree(bp->bv2); kfree(bp); } } @@ -1348,7 +1346,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.appmask = cpu_to_le16(io.appmask); if (meta_len) { - meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata, meta_len); + meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata, + meta_len); if (IS_ERR(meta_iod)) { status = PTR_ERR(meta_iod); meta_iod = NULL; -- cgit v1.2.3 From 7e03b124065507e72008ef294c30001eca74a031 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 29 Jul 2013 16:20:56 -0600 Subject: NVMe: Bring up cdev on set feature failure This patch creates the character device as long as a device's admin queues are usable so a user has an opprotunity to perform administration tasks. A device may be in a state that does not allow IO and setting the queue count feature in such a state returns an error. Previously the driver would bail and the controller would be unusable. Signed-off-by: Keith Busch --- drivers/block/nvme-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index f9244131b88a..8cfa4576d424 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1664,7 +1664,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, &result); if (status) - return -EIO; + return status < 0 ? -EIO : -EBUSY; return min(result & 0xffff, result >> 16) + 1; } @@ -2018,7 +2018,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_unlock(&dev_list_lock); result = nvme_dev_add(dev); - if (result) + if (result && result != -EBUSY) goto delete; scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); -- cgit v1.2.3 From 9e59d091b0eb04f223ed037348e3d9e36f30e72b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 8 Aug 2013 10:25:38 -0600 Subject: NVMe: Disk stats for read/write commands only Flush and discard requests would previously mess up the accounting. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 8cfa4576d424..360ac5d32d26 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -342,11 +342,11 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, struct bio *bio = iod->private; u16 status = le16_to_cpup(&cqe->status) >> 1; - if (iod->nents) + if (iod->nents) { dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - - nvme_end_io_acct(bio, iod->start_time); + nvme_end_io_acct(bio, iod->start_time); + } nvme_free_iod(dev, iod); if (status) bio_endio(bio, -EIO); -- cgit v1.2.3 From 0877cb0d285c7f1d53d0b84b360bdea4be4f3f59 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 15 Jul 2013 15:02:19 -0600 Subject: NVMe: Group pci related actions in functions This will make it easier to reuse these outside probe/remove. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 112 +++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 46 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 360ac5d32d26..a93f52c48036 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1191,9 +1191,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) u64 cap = readq(&dev->bar->cap); struct nvme_queue *nvmeq; - dev->dbs = ((void __iomem *)dev->bar) + 4096; - dev->db_stride = NVME_CAP_STRIDE(cap); - result = nvme_disable_ctrl(dev, cap); if (result < 0) return result; @@ -1832,6 +1829,61 @@ static int nvme_dev_add(struct nvme_dev *dev) return res; } +static int nvme_dev_map(struct nvme_dev *dev) +{ + int bars, result = -ENOMEM; + struct pci_dev *pdev = dev->pci_dev; + + if (pci_enable_device_mem(pdev)) + return result; + + dev->entry[0].vector = pdev->irq; + pci_set_master(pdev); + bars = pci_select_bars(pdev, IORESOURCE_MEM); + if (pci_request_selected_regions(pdev, bars, "nvme")) + goto disable_pci; + + if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) + dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); + else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) + dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + else + goto disable_pci; + + pci_set_drvdata(pdev, dev); + dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); + if (!dev->bar) + goto disable; + + dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap)); + dev->dbs = ((void __iomem *)dev->bar) + 4096; + + return 0; + + disable: + pci_release_regions(pdev); + disable_pci: + pci_disable_device(pdev); + return result; +} + +static void nvme_dev_unmap(struct nvme_dev *dev) +{ + if (dev->pci_dev->msi_enabled) + pci_disable_msi(dev->pci_dev); + else if (dev->pci_dev->msix_enabled) + pci_disable_msix(dev->pci_dev); + + if (dev->bar) { + iounmap(dev->bar); + dev->bar = NULL; + } + + pci_release_regions(dev->pci_dev); + if (pci_is_enabled(dev->pci_dev)) + pci_disable_device(dev->pci_dev); +} + static int nvme_dev_remove(struct nvme_dev *dev) { struct nvme_ns *ns, *next; @@ -1908,15 +1960,9 @@ static void nvme_free_dev(struct kref *kref) { struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); nvme_dev_remove(dev); - if (dev->pci_dev->msi_enabled) - pci_disable_msi(dev->pci_dev); - else if (dev->pci_dev->msix_enabled) - pci_disable_msix(dev->pci_dev); - iounmap(dev->bar); + nvme_dev_unmap(dev); nvme_release_instance(dev); nvme_release_prp_pools(dev); - pci_disable_device(dev->pci_dev); - pci_release_regions(dev->pci_dev); kfree(dev->queues); kfree(dev->entry); kfree(dev); @@ -1959,7 +2005,7 @@ static const struct file_operations nvme_dev_fops = { static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - int bars, result = -ENOMEM; + int result = -ENOMEM; struct nvme_dev *dev; dev = kzalloc(sizeof(*dev), GFP_KERNEL); @@ -1974,39 +2020,19 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!dev->queues) goto free; - if (pci_enable_device_mem(pdev)) - goto free; - pci_set_master(pdev); - bars = pci_select_bars(pdev, IORESOURCE_MEM); - if (pci_request_selected_regions(pdev, bars, "nvme")) - goto disable; - INIT_LIST_HEAD(&dev->namespaces); dev->pci_dev = pdev; - pci_set_drvdata(pdev, dev); - - if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) - dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); - else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) - dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); - else - goto disable; - result = nvme_set_instance(dev); if (result) - goto disable; - - dev->entry[0].vector = pdev->irq; + goto free; result = nvme_setup_prp_pools(dev); if (result) - goto disable_msix; + goto release; - dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); - if (!dev->bar) { - result = -ENOMEM; - goto disable_msix; - } + result = nvme_dev_map(dev); + if (result) + goto release_pools; result = nvme_configure_admin_queue(dev); if (result) @@ -2042,17 +2068,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) nvme_free_queues(dev); unmap: - iounmap(dev->bar); - disable_msix: - if (dev->pci_dev->msi_enabled) - pci_disable_msi(dev->pci_dev); - else if (dev->pci_dev->msix_enabled) - pci_disable_msix(dev->pci_dev); - nvme_release_instance(dev); + nvme_dev_unmap(dev); + release_pools: nvme_release_prp_pools(dev); - disable: - pci_disable_device(pdev); - pci_release_regions(pdev); + release: + nvme_release_instance(dev); free: kfree(dev->queues); kfree(dev->entry); -- cgit v1.2.3 From 224042742582c9938788b81165180c876e997a07 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 15 Jul 2013 15:02:20 -0600 Subject: NVMe: Separate queue alloc/free from create/delete This separates nvme queue allocation from creation, and queue deletion from freeing. This is so that we may in the future temporarily disable queues and reuse the same memory when bringing them back online, like coming back from suspend state. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 133 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 94 insertions(+), 39 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index a93f52c48036..95e28b6bcd09 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -82,6 +82,7 @@ struct nvme_queue { u16 cq_head; u8 cq_phase; u8 cqe_seen; + u8 q_suspended; unsigned long cmdid_data[]; }; @@ -117,6 +118,11 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq) return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; } +static unsigned nvme_queue_extra(int depth) +{ + return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info)); +} + /** * alloc_cmdid() - Allocate a Command ID * @nvmeq: The queue that will be used for this command @@ -784,7 +790,7 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio) int result = -EBUSY; spin_lock_irq(&nvmeq->q_lock); - if (bio_list_empty(&nvmeq->sq_cong)) + if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong)) result = nvme_submit_bio_queue(nvmeq, ns, bio); if (unlikely(result)) { if (bio_list_empty(&nvmeq->sq_cong)) @@ -1018,8 +1024,15 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) } } -static void nvme_free_queue_mem(struct nvme_queue *nvmeq) +static void nvme_free_queue(struct nvme_queue *nvmeq) { + spin_lock_irq(&nvmeq->q_lock); + while (bio_list_peek(&nvmeq->sq_cong)) { + struct bio *bio = bio_list_pop(&nvmeq->sq_cong); + bio_endio(bio, -EIO); + } + spin_unlock_irq(&nvmeq->q_lock); + dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), @@ -1027,17 +1040,28 @@ static void nvme_free_queue_mem(struct nvme_queue *nvmeq) kfree(nvmeq); } -static void nvme_free_queue(struct nvme_dev *dev, int qid) +static void nvme_free_queues(struct nvme_dev *dev) +{ + int i; + + for (i = dev->queue_count - 1; i >= 0; i--) { + nvme_free_queue(dev->queues[i]); + dev->queue_count--; + dev->queues[i] = NULL; + } +} + +static void nvme_disable_queue(struct nvme_dev *dev, int qid) { struct nvme_queue *nvmeq = dev->queues[qid]; int vector = dev->entry[nvmeq->cq_vector].vector; spin_lock_irq(&nvmeq->q_lock); - nvme_cancel_ios(nvmeq, false); - while (bio_list_peek(&nvmeq->sq_cong)) { - struct bio *bio = bio_list_pop(&nvmeq->sq_cong); - bio_endio(bio, -EIO); + if (nvmeq->q_suspended) { + spin_unlock_irq(&nvmeq->q_lock); + return; } + nvmeq->q_suspended = 1; spin_unlock_irq(&nvmeq->q_lock); irq_set_affinity_hint(vector, NULL); @@ -1049,15 +1073,17 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) adapter_delete_cq(dev, qid); } - nvme_free_queue_mem(nvmeq); + spin_lock_irq(&nvmeq->q_lock); + nvme_process_cq(nvmeq); + nvme_cancel_ios(nvmeq, false); + spin_unlock_irq(&nvmeq->q_lock); } static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth, int vector) { struct device *dmadev = &dev->pci_dev->dev; - unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * - sizeof(struct nvme_cmd_info)); + unsigned extra = nvme_queue_extra(depth); struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); if (!nvmeq) return NULL; @@ -1084,6 +1110,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; nvmeq->q_depth = depth; nvmeq->cq_vector = vector; + nvmeq->q_suspended = 1; + dev->queue_count++; return nvmeq; @@ -1107,18 +1135,29 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, IRQF_DISABLED | IRQF_SHARED, name, nvmeq); } -static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, - int cq_size, int vector) +static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) { - int result; - struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector); + struct nvme_dev *dev = nvmeq->dev; + unsigned extra = nvme_queue_extra(nvmeq->q_depth); - if (!nvmeq) - return ERR_PTR(-ENOMEM); + nvmeq->sq_tail = 0; + nvmeq->cq_head = 0; + nvmeq->cq_phase = 1; + nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; + memset(nvmeq->cmdid_data, 0, extra); + memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); + nvme_cancel_ios(nvmeq, false); + nvmeq->q_suspended = 0; +} + +static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) +{ + struct nvme_dev *dev = nvmeq->dev; + int result; result = adapter_alloc_cq(dev, qid, nvmeq); if (result < 0) - goto free_nvmeq; + return result; result = adapter_alloc_sq(dev, qid, nvmeq); if (result < 0) @@ -1128,19 +1167,17 @@ static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, if (result < 0) goto release_sq; - return nvmeq; + spin_lock(&nvmeq->q_lock); + nvme_init_queue(nvmeq, qid); + spin_unlock(&nvmeq->q_lock); + + return result; release_sq: adapter_delete_sq(dev, qid); release_cq: adapter_delete_cq(dev, qid); - free_nvmeq: - dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), - (void *)nvmeq->cqes, nvmeq->cq_dma_addr); - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), - nvmeq->sq_cmds, nvmeq->sq_dma_addr); - kfree(nvmeq); - return ERR_PTR(result); + return result; } static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) @@ -1221,10 +1258,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) goto free_q; dev->queues[0] = nvmeq; + spin_lock(&nvmeq->q_lock); + nvme_init_queue(nvmeq, 0); + spin_unlock(&nvmeq->q_lock); return result; free_q: - nvme_free_queue_mem(nvmeq); + nvme_free_queue(nvmeq); return result; } @@ -1386,6 +1426,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) put_nvmeq(nvmeq); if (length != (io.nblocks + 1) << ns->lba_shift) status = -ENOMEM; + else if (!nvmeq || nvmeq->q_suspended) + status = -EBUSY; else status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); @@ -1537,9 +1579,12 @@ static int nvme_kthread(void *data) if (!nvmeq) continue; spin_lock_irq(&nvmeq->q_lock); + if (nvmeq->q_suspended) + goto unlock; nvme_process_cq(nvmeq); nvme_cancel_ios(nvmeq, true); nvme_resubmit_bios(nvmeq); + unlock: spin_unlock_irq(&nvmeq->q_lock); } } @@ -1725,7 +1770,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) nr_io_queues = vecs; result = queue_request_irq(dev, dev->queues[0], "nvme admin"); - /* XXX: handle failure here */ + if (result) + goto free_queues; cpu = cpumask_first(cpu_online_mask); for (i = 0; i < nr_io_queues; i++) { @@ -1736,10 +1782,11 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, NVME_Q_DEPTH); for (i = 0; i < nr_io_queues; i++) { - dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); - if (IS_ERR(dev->queues[i + 1])) - return PTR_ERR(dev->queues[i + 1]); - dev->queue_count++; + dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i); + if (!dev->queues[i + 1]) { + result = -ENOMEM; + goto free_queues; + } } for (; i < num_possible_cpus(); i++) { @@ -1747,15 +1794,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->queues[i + 1] = dev->queues[target + 1]; } - return 0; -} + for (i = 1; i < dev->queue_count; i++) { + result = nvme_create_queue(dev->queues[i], i); + if (result) { + for (--i; i > 0; i--) + nvme_disable_queue(dev, i); + goto free_queues; + } + } -static void nvme_free_queues(struct nvme_dev *dev) -{ - int i; + return 0; - for (i = dev->queue_count - 1; i >= 0; i--) - nvme_free_queue(dev, i); + free_queues: + nvme_free_queues(dev); + return result; } /* @@ -1887,6 +1939,10 @@ static void nvme_dev_unmap(struct nvme_dev *dev) static int nvme_dev_remove(struct nvme_dev *dev) { struct nvme_ns *ns, *next; + int i; + + for (i = dev->queue_count - 1; i >= 0; i--) + nvme_disable_queue(dev, i); spin_lock(&dev_list_lock); list_del(&dev->node); @@ -2037,7 +2093,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) result = nvme_configure_admin_queue(dev); if (result) goto unmap; - dev->queue_count++; spin_lock(&dev_list_lock); list_add(&dev->node, &dev_list); -- cgit v1.2.3 From f0b50732a979c55c2d15fe8ec4503fa5b3260c53 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 15 Jul 2013 15:02:21 -0600 Subject: NVMe: Separate controller init from disk discovery This combines the controller initialization into one function, removing IO queue setup from namespace discovery, and creates symetric functions for device removal. The controller start and shutdown functions can now be called from resume/suspend context as well as probe/remove. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 77 +++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 30 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 95e28b6bcd09..1595ffba8a66 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1827,10 +1827,6 @@ static int nvme_dev_add(struct nvme_dev *dev) dma_addr_t dma_addr; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; - res = nvme_setup_io_queues(dev); - if (res) - return res; - mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr, GFP_KERNEL); if (!mem) @@ -1936,27 +1932,29 @@ static void nvme_dev_unmap(struct nvme_dev *dev) pci_disable_device(dev->pci_dev); } -static int nvme_dev_remove(struct nvme_dev *dev) +static void nvme_dev_shutdown(struct nvme_dev *dev) { - struct nvme_ns *ns, *next; int i; for (i = dev->queue_count - 1; i >= 0; i--) nvme_disable_queue(dev, i); spin_lock(&dev_list_lock); - list_del(&dev->node); + list_del_init(&dev->node); spin_unlock(&dev_list_lock); + nvme_dev_unmap(dev); +} + +static void nvme_dev_remove(struct nvme_dev *dev) +{ + struct nvme_ns *ns, *next; + list_for_each_entry_safe(ns, next, &dev->namespaces, list) { list_del(&ns->list); del_gendisk(ns->disk); nvme_ns_free(ns); } - - nvme_free_queues(dev); - - return 0; } static int nvme_setup_prp_pools(struct nvme_dev *dev) @@ -2016,7 +2014,8 @@ static void nvme_free_dev(struct kref *kref) { struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); nvme_dev_remove(dev); - nvme_dev_unmap(dev); + nvme_dev_shutdown(dev); + nvme_free_queues(dev); nvme_release_instance(dev); nvme_release_prp_pools(dev); kfree(dev->queues); @@ -2059,6 +2058,37 @@ static const struct file_operations nvme_dev_fops = { .compat_ioctl = nvme_dev_ioctl, }; +static int nvme_dev_start(struct nvme_dev *dev) +{ + int result; + + result = nvme_dev_map(dev); + if (result) + return result; + + result = nvme_configure_admin_queue(dev); + if (result) + goto unmap; + + spin_lock(&dev_list_lock); + list_add(&dev->node, &dev_list); + spin_unlock(&dev_list_lock); + + result = nvme_setup_io_queues(dev); + if (result) + goto disable; + + return 0; + + disable: + spin_lock(&dev_list_lock); + list_del_init(&dev->node); + spin_unlock(&dev_list_lock); + unmap: + nvme_dev_unmap(dev); + return result; +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int result = -ENOMEM; @@ -2086,21 +2116,13 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release; - result = nvme_dev_map(dev); + result = nvme_dev_start(dev); if (result) goto release_pools; - result = nvme_configure_admin_queue(dev); - if (result) - goto unmap; - - spin_lock(&dev_list_lock); - list_add(&dev->node, &dev_list); - spin_unlock(&dev_list_lock); - result = nvme_dev_add(dev); if (result && result != -EBUSY) - goto delete; + goto shutdown; scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); dev->miscdev.minor = MISC_DYNAMIC_MINOR; @@ -2116,15 +2138,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) remove: nvme_dev_remove(dev); - delete: - spin_lock(&dev_list_lock); - list_del(&dev->node); - spin_unlock(&dev_list_lock); - - nvme_free_queues(dev); - unmap: - nvme_dev_unmap(dev); + shutdown: + nvme_dev_shutdown(dev); release_pools: + nvme_free_queues(dev); nvme_release_prp_pools(dev); release: nvme_release_instance(dev); -- cgit v1.2.3 From 1894d8f16afe5ad54b732f0fa6c4e80bd4d40b91 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 15 Jul 2013 15:02:22 -0600 Subject: NVMe: Use normal shutdown The NVMe spec recommends using the shutdown normal sequence when safely taking the controller offline instead of hitting CC.EN on the next start-up to reset the controller. The spec recommends a minimum of 1 second for the shutdown complete. This patch waits 2 seconds to be on the safe side. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 26 ++++++++++++++++++++++++++ include/linux/nvme.h | 2 ++ 2 files changed, 28 insertions(+) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 1595ffba8a66..23bb5a70d810 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1221,6 +1221,30 @@ static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap) return nvme_wait_ready(dev, cap, true); } +static int nvme_shutdown_ctrl(struct nvme_dev *dev) +{ + unsigned long timeout; + u32 cc; + + cc = (readl(&dev->bar->cc) & ~NVME_CC_SHN_MASK) | NVME_CC_SHN_NORMAL; + writel(cc, &dev->bar->cc); + + timeout = 2 * HZ + jiffies; + while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) != + NVME_CSTS_SHST_CMPLT) { + msleep(100); + if (fatal_signal_pending(current)) + return -EINTR; + if (time_after(jiffies, timeout)) { + dev_err(&dev->pci_dev->dev, + "Device shutdown incomplete; abort shutdown\n"); + return -ENODEV; + } + } + + return 0; +} + static int nvme_configure_admin_queue(struct nvme_dev *dev) { int result; @@ -1943,6 +1967,8 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) list_del_init(&dev->node); spin_unlock(&dev_list_lock); + if (dev->bar) + nvme_shutdown_ctrl(dev); nvme_dev_unmap(dev); } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3403c8f06fa0..26ebcf41c213 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -53,6 +53,7 @@ enum { NVME_CC_SHN_NONE = 0 << 14, NVME_CC_SHN_NORMAL = 1 << 14, NVME_CC_SHN_ABRUPT = 2 << 14, + NVME_CC_SHN_MASK = 3 << 14, NVME_CC_IOSQES = 6 << 16, NVME_CC_IOCQES = 4 << 20, NVME_CSTS_RDY = 1 << 0, @@ -60,6 +61,7 @@ enum { NVME_CSTS_SHST_NORMAL = 0 << 2, NVME_CSTS_SHST_OCCUR = 1 << 2, NVME_CSTS_SHST_CMPLT = 2 << 2, + NVME_CSTS_SHST_MASK = 3 << 2, }; #define NVME_VS(major, minor) (major << 16 | minor) -- cgit v1.2.3 From cd63894630ab17a192bf97427d16dbec10710a6a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 15 Jul 2013 15:02:23 -0600 Subject: NVMe: Add pci suspend/resume driver callbacks Used for going in and out of low power states. Resuming reuses the IO queues from the previous initialization, freeing any allocated queues that are no longer usable. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 73 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 23bb5a70d810..8efa728f1eac 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -789,6 +789,12 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio) struct nvme_queue *nvmeq = get_nvmeq(ns->dev); int result = -EBUSY; + if (!nvmeq) { + put_nvmeq(NULL); + bio_endio(bio, -EIO); + return; + } + spin_lock_irq(&nvmeq->q_lock); if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong)) result = nvme_submit_bio_queue(nvmeq, ns, bio); @@ -1256,9 +1262,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) if (result < 0) return result; - nvmeq = nvme_alloc_queue(dev, 0, 64, 0); - if (!nvmeq) - return -ENOMEM; + nvmeq = dev->queues[0]; + if (!nvmeq) { + nvmeq = nvme_alloc_queue(dev, 0, 64, 0); + if (!nvmeq) + return -ENOMEM; + dev->queues[0] = nvmeq; + } aqa = nvmeq->q_depth - 1; aqa |= aqa << 16; @@ -1275,21 +1285,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) result = nvme_enable_ctrl(dev, cap); if (result) - goto free_q; + return result; result = queue_request_irq(dev, nvmeq, "nvme admin"); if (result) - goto free_q; + return result; - dev->queues[0] = nvmeq; spin_lock(&nvmeq->q_lock); nvme_init_queue(nvmeq, 0); spin_unlock(&nvmeq->q_lock); return result; - - free_q: - nvme_free_queue(nvmeq); - return result; } struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, @@ -1797,6 +1802,21 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (result) goto free_queues; + /* Free previously allocated queues that are no longer usable */ + spin_lock(&dev_list_lock); + for (i = dev->queue_count - 1; i > nr_io_queues; i--) { + struct nvme_queue *nvmeq = dev->queues[i]; + + spin_lock(&nvmeq->q_lock); + nvme_cancel_ios(nvmeq, false); + spin_unlock(&nvmeq->q_lock); + + nvme_free_queue(nvmeq); + dev->queue_count--; + dev->queues[i] = NULL; + } + spin_unlock(&dev_list_lock); + cpu = cpumask_first(cpu_online_mask); for (i = 0; i < nr_io_queues; i++) { irq_set_affinity_hint(dev->entry[i].vector, get_cpu_mask(cpu)); @@ -1805,7 +1825,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, NVME_Q_DEPTH); - for (i = 0; i < nr_io_queues; i++) { + for (i = dev->queue_count - 1; i < nr_io_queues; i++) { dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i); if (!dev->queues[i + 1]) { result = -ENOMEM; @@ -2191,8 +2211,30 @@ static void nvme_remove(struct pci_dev *pdev) #define nvme_link_reset NULL #define nvme_slot_reset NULL #define nvme_error_resume NULL -#define nvme_suspend NULL -#define nvme_resume NULL + +static int nvme_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct nvme_dev *ndev = pci_get_drvdata(pdev); + + nvme_dev_shutdown(ndev); + return 0; +} + +static int nvme_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct nvme_dev *ndev = pci_get_drvdata(pdev); + int ret; + + ret = nvme_dev_start(ndev); + /* XXX: should remove gendisks if resume fails */ + if (ret) + nvme_free_queues(ndev); + return ret; +} + +static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); static const struct pci_error_handlers nvme_err_handler = { .error_detected = nvme_error_detected, @@ -2216,8 +2258,9 @@ static struct pci_driver nvme_driver = { .id_table = nvme_id_table, .probe = nvme_probe, .remove = nvme_remove, - .suspend = nvme_suspend, - .resume = nvme_resume, + .driver = { + .pm = &nvme_dev_pm_ops, + }, .err_handler = &nvme_err_handler, }; -- cgit v1.2.3 From 9d713c2bfb5e1d6abb18a8b12293631f9fcdc708 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 15 Jul 2013 15:02:24 -0600 Subject: NVMe: Handle ioremap failure Decrement the number of queues required for doorbell remapping until the memory is successfully mapped for that size. Additional checks are done so that we don't call free_irq if it has already been freed. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 8efa728f1eac..9f2b424c445e 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1739,10 +1739,15 @@ static int set_queue_count(struct nvme_dev *dev, int count) return min(result & 0xffff, result >> 16) + 1; } +static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) +{ + return 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3)); +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { struct pci_dev *pdev = dev->pci_dev; - int result, cpu, i, vecs, nr_io_queues, db_bar_size, q_depth; + int result, cpu, i, vecs, nr_io_queues, size, q_depth; nr_io_queues = num_online_cpus(); result = set_queue_count(dev, nr_io_queues); @@ -1751,17 +1756,24 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (result < nr_io_queues) nr_io_queues = result; - /* Deregister the admin queue's interrupt */ - free_irq(dev->entry[0].vector, dev->queues[0]); - - db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3)); - if (db_bar_size > 8192) { + size = db_bar_size(dev, nr_io_queues); + if (size > 8192) { iounmap(dev->bar); - dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size); + do { + dev->bar = ioremap(pci_resource_start(pdev, 0), size); + if (dev->bar) + break; + if (!--nr_io_queues) + return -ENOMEM; + size = db_bar_size(dev, nr_io_queues); + } while (1); dev->dbs = ((void __iomem *)dev->bar) + 4096; dev->queues[0]->q_db = dev->dbs; } + /* Deregister the admin queue's interrupt */ + free_irq(dev->entry[0].vector, dev->queues[0]); + vecs = nr_io_queues; for (i = 0; i < vecs; i++) dev->entry[i].entry = i; @@ -1799,8 +1811,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) nr_io_queues = vecs; result = queue_request_irq(dev, dev->queues[0], "nvme admin"); - if (result) + if (result) { + dev->queues[0]->q_suspended = 1; goto free_queues; + } /* Free previously allocated queues that are no longer usable */ spin_lock(&dev_list_lock); -- cgit v1.2.3 From d82e8bfdef9afae83b894be49af4644d9ac3c359 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 5 Sep 2013 14:45:07 -0600 Subject: NVMe: Merge issue on character device bring-up A recent patch made it possible to bring up the character handle when the device is responsive but not accepting a set-features command. Another recent patch moved the initialization that requires we move where the checks for this condition occur. This patch merges these two ideas so it works much as before. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 9f2b424c445e..da52092980e2 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2135,10 +2135,10 @@ static int nvme_dev_start(struct nvme_dev *dev) spin_unlock(&dev_list_lock); result = nvme_setup_io_queues(dev); - if (result) + if (result && result != -EBUSY) goto disable; - return 0; + return result; disable: spin_lock(&dev_list_lock); @@ -2177,13 +2177,17 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto release; result = nvme_dev_start(dev); - if (result) + if (result) { + if (result == -EBUSY) + goto create_cdev; goto release_pools; + } result = nvme_dev_add(dev); - if (result && result != -EBUSY) + if (result) goto shutdown; + create_cdev: scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); dev->miscdev.minor = MISC_DYNAMIC_MINOR; dev->miscdev.parent = &pdev->dev; -- cgit v1.2.3