diff options
39 files changed, 966 insertions, 798 deletions
diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.txt index 83407d36630a..59e0516cbf6b 100644 --- a/Documentation/block/writeback_cache_control.txt +++ b/Documentation/block/writeback_cache_control.txt @@ -71,7 +71,7 @@ requests that have a payload. For devices with volatile write caches the driver needs to tell the block layer that it supports flushing caches by doing: - blk_queue_flush(sdkp->disk->queue, REQ_FLUSH); + blk_queue_write_cache(sdkp->disk->queue, true, false); and handle empty REQ_FLUSH requests in its prep_fn/request_fn. Note that REQ_FLUSH requests with a payload are automatically turned into a sequence @@ -79,7 +79,7 @@ of an empty REQ_FLUSH request followed by the actual write by the block layer. For devices that also support the FUA bit the block layer needs to be told to pass through the REQ_FUA bit using: - blk_queue_flush(sdkp->disk->queue, REQ_FLUSH | REQ_FUA); + blk_queue_write_cache(sdkp->disk->queue, true, true); and the driver must handle write requests that have the REQ_FUA bit set in prep_fn/request_fn. If the FUA bit is not natively supported the block diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 39ba20755e03..17e96dc29596 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -862,7 +862,7 @@ static int ubd_add(int n, char **error_out) goto out; } ubd_dev->queue->queuedata = ubd_dev; - blk_queue_flush(ubd_dev->queue, REQ_FLUSH); + blk_queue_write_cache(ubd_dev->queue, true, false); blk_queue_max_segments(ubd_dev->queue, MAX_SG); err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); diff --git a/block/blk-core.c b/block/blk-core.c index c50227796a26..2475b1c72773 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1964,7 +1964,8 @@ generic_make_request_checks(struct bio *bio) * drivers without flush support don't have to worry * about them. */ - if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { + if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && + !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); if (!nr_sectors) { err = 0; diff --git a/block/blk-flush.c b/block/blk-flush.c index 9c423e53324a..b1c91d229e5e 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -95,17 +95,18 @@ enum { static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq); -static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) +static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq) { unsigned int policy = 0; if (blk_rq_sectors(rq)) policy |= REQ_FSEQ_DATA; - if (fflags & REQ_FLUSH) { + if (fflags & (1UL << QUEUE_FLAG_WC)) { if (rq->cmd_flags & REQ_FLUSH) policy |= REQ_FSEQ_PREFLUSH; - if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) + if (!(fflags & (1UL << QUEUE_FLAG_FUA)) && + (rq->cmd_flags & REQ_FUA)) policy |= REQ_FSEQ_POSTFLUSH; } return policy; @@ -384,7 +385,7 @@ static void mq_flush_data_end_io(struct request *rq, int error) void blk_insert_flush(struct request *rq) { struct request_queue *q = rq->q; - unsigned int fflags = q->flush_flags; /* may change, cache */ + unsigned long fflags = q->queue_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx); @@ -393,7 +394,7 @@ void blk_insert_flush(struct request *rq) * REQ_FLUSH and FUA for the driver. */ rq->cmd_flags &= ~REQ_FLUSH; - if (!(fflags & REQ_FUA)) + if (!(fflags & (1UL << QUEUE_FLAG_FUA))) rq->cmd_flags &= ~REQ_FUA; /* diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 2fd04286f103..56a0c37a3d06 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -464,15 +464,14 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, } } -void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, - void *priv) +static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, + busy_tag_iter_fn *fn, void *priv) { if (tags->nr_reserved_tags) bt_tags_for_each(tags, &tags->breserved_tags, 0, fn, priv, true); bt_tags_for_each(tags, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv, false); } -EXPORT_SYMBOL(blk_mq_all_tag_busy_iter); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv) diff --git a/block/blk-settings.c b/block/blk-settings.c index c903bee43cf8..f679ae122843 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -820,29 +820,14 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); -/** - * blk_queue_flush - configure queue's cache flush capability - * @q: the request queue for the device - * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA - * - * Tell block layer cache flush capability of @q. If it supports - * flushing, REQ_FLUSH should be set. If it supports bypassing - * write cache for individual writes, REQ_FUA should be set. - */ -void blk_queue_flush(struct request_queue *q, unsigned int flush) -{ - WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); - - if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) - flush &= ~REQ_FUA; - - q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); -} -EXPORT_SYMBOL_GPL(blk_queue_flush); - void blk_queue_flush_queueable(struct request_queue *q, bool queueable) { - q->flush_not_queueable = !queueable; + spin_lock_irq(q->queue_lock); + if (queueable) + clear_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); + else + set_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); + spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); @@ -857,16 +842,13 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) { spin_lock_irq(q->queue_lock); - if (wc) { + if (wc) queue_flag_set(QUEUE_FLAG_WC, q); - q->flush_flags = REQ_FLUSH; - } else + else queue_flag_clear(QUEUE_FLAG_WC, q); - if (fua) { - if (wc) - q->flush_flags |= REQ_FUA; + if (fua) queue_flag_set(QUEUE_FLAG_FUA, q); - } else + else queue_flag_clear(QUEUE_FLAG_FUA, q); spin_unlock_irq(q->queue_lock); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index fa209773d494..2ba1494b2799 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2761,7 +2761,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig q->backing_dev_info.congested_data = device; blk_queue_make_request(q, drbd_make_request); - blk_queue_flush(q, REQ_FLUSH | REQ_FUA); + blk_queue_write_cache(q, true, true); /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 80cf8add46ff..1fa8cc235977 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -943,7 +943,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) - blk_queue_flush(lo->lo_queue, REQ_FLUSH); + blk_queue_write_cache(lo->lo_queue, true, false); loop_update_dio(lo); set_capacity(lo->lo_disk, size); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 25824c1697c5..6053e4659fa2 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3000,14 +3000,14 @@ restart_eh: "Completion workers still active!"); spin_lock(dd->queue->queue_lock); - blk_mq_all_tag_busy_iter(*dd->tags.tags, + blk_mq_tagset_busy_iter(&dd->tags, mtip_queue_cmd, dd); spin_unlock(dd->queue->queue_lock); set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags); if (mtip_device_reset(dd)) - blk_mq_all_tag_busy_iter(*dd->tags.tags, + blk_mq_tagset_busy_iter(&dd->tags, mtip_abort_cmd, dd); clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags); @@ -4023,12 +4023,6 @@ skip_create_disk: blk_queue_io_min(dd->queue, 4096); blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask); - /* - * write back cache is not supported in the device. FUA depends on - * write back cache support, hence setting flush support to zero. - */ - blk_queue_flush(dd->queue, 0); - /* Signal trim support */ if (dd->trim_supp == true) { set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags); @@ -4174,7 +4168,7 @@ static int mtip_block_remove(struct driver_data *dd) blk_mq_freeze_queue_start(dd->queue); blk_mq_stop_hw_queues(dd->queue); - blk_mq_all_tag_busy_iter(dd->tags.tags[0], mtip_no_dev_cleanup, dd); + blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); /* * Delete our gendisk structure. This also removes the device diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 08afbc7a2bb8..31e73a7a40f2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -693,9 +693,9 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) if (nbd->flags & NBD_FLAG_SEND_TRIM) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); if (nbd->flags & NBD_FLAG_SEND_FLUSH) - blk_queue_flush(nbd->disk->queue, REQ_FLUSH); + blk_queue_write_cache(nbd->disk->queue, true, false); else - blk_queue_flush(nbd->disk->queue, 0); + blk_queue_write_cache(nbd->disk->queue, false, false); } static int nbd_dev_dbg_init(struct nbd_device *nbd); diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 1b709a4e3b5e..c2854a2bfdb0 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -437,7 +437,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev) blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); blk_queue_prep_rq(q, blk_queue_start_tag); - blk_queue_flush(q, REQ_FLUSH); + blk_queue_write_cache(q, true, false); disk->queue = q; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index c120d70d3fb3..4b7e405830d7 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -468,7 +468,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) blk_queue_dma_alignment(queue, dev->blk_size-1); blk_queue_logical_block_size(queue, dev->blk_size); - blk_queue_flush(queue, REQ_FLUSH); + blk_queue_write_cache(queue, true, false); blk_queue_max_segments(queue, -1); blk_queue_max_segment_size(queue, dev->bounce_size); diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 9a9ec212fab8..910e065918af 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -133,7 +133,6 @@ MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID); #define SKD_TIMER_MINUTES(minutes) ((minutes) * (60)) #define INQ_STD_NBYTES 36 -#define SKD_DISCARD_CDB_LENGTH 24 enum skd_drvr_state { SKD_DRVR_STATE_LOAD, @@ -212,7 +211,6 @@ struct skd_request_context { struct request *req; u8 flush_cmd; - u8 discard_page; u32 timeout_stamp; u8 sg_data_dir; @@ -230,7 +228,6 @@ struct skd_request_context { }; #define SKD_DATA_DIR_HOST_TO_CARD 1 #define SKD_DATA_DIR_CARD_TO_HOST 2 -#define SKD_DATA_DIR_NONE 3 /* especially for DISCARD requests. */ struct skd_special_context { struct skd_request_context req; @@ -540,31 +537,6 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req, scsi_req->cdb[9] = 0; } -static void -skd_prep_discard_cdb(struct skd_scsi_request *scsi_req, - struct skd_request_context *skreq, - struct page *page, - u32 lba, u32 count) -{ - char *buf; - unsigned long len; - struct request *req; - - buf = page_address(page); - len = SKD_DISCARD_CDB_LENGTH; - - scsi_req->cdb[0] = UNMAP; - scsi_req->cdb[8] = len; - - put_unaligned_be16(6 + 16, &buf[0]); - put_unaligned_be16(16, &buf[2]); - put_unaligned_be64(lba, &buf[8]); - put_unaligned_be32(count, &buf[16]); - - req = skreq->req; - blk_add_request_payload(req, page, 0, len); -} - static void skd_request_fn_not_online(struct request_queue *q); static void skd_request_fn(struct request_queue *q) @@ -575,7 +547,6 @@ static void skd_request_fn(struct request_queue *q) struct skd_request_context *skreq; struct request *req = NULL; struct skd_scsi_request *scsi_req; - struct page *page; unsigned long io_flags; int error; u32 lba; @@ -669,7 +640,6 @@ static void skd_request_fn(struct request_queue *q) skreq->flush_cmd = 0; skreq->n_sg = 0; skreq->sg_byte_count = 0; - skreq->discard_page = 0; /* * OK to now dequeue request from q. @@ -735,18 +705,7 @@ static void skd_request_fn(struct request_queue *q) else skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD; - if (io_flags & REQ_DISCARD) { - page = alloc_page(GFP_ATOMIC | __GFP_ZERO); - if (!page) { - pr_err("request_fn:Page allocation failed.\n"); - skd_end_request(skdev, skreq, -ENOMEM); - break; - } - skreq->discard_page = 1; - req->completion_data = page; - skd_prep_discard_cdb(scsi_req, skreq, page, lba, count); - - } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) { + if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) { skd_prep_zerosize_flush_cdb(scsi_req, skreq); SKD_ASSERT(skreq->flush_cmd == 1); @@ -851,16 +810,6 @@ skip_sg: static void skd_end_request(struct skd_device *skdev, struct skd_request_context *skreq, int error) { - struct request *req = skreq->req; - unsigned int io_flags = req->cmd_flags; - - if ((io_flags & REQ_DISCARD) && - (skreq->discard_page == 1)) { - pr_debug("%s:%s:%d, free the page!", - skdev->name, __func__, __LINE__); - __free_page(req->completion_data); - } - if (unlikely(error)) { struct request *req = skreq->req; char *cmd = (rq_data_dir(req) == READ) ? "read" : "write"; @@ -4412,19 +4361,13 @@ static int skd_cons_disk(struct skd_device *skdev) disk->queue = q; q->queuedata = skdev; - blk_queue_flush(q, REQ_FLUSH | REQ_FUA); + blk_queue_write_cache(q, true, true); blk_queue_max_segments(q, skdev->sgs_per_request); blk_queue_max_hw_sectors(q, SKD_N_MAX_SECTORS); /* set sysfs ptimal_io_size to 8K */ blk_queue_io_opt(q, 8192); - /* DISCARD Flag initialization. */ - q->limits.discard_granularity = 8192; - q->limits.discard_alignment = 0; - blk_queue_max_discard_sectors(q, UINT_MAX >> 9); - q->limits.discard_zeroes_data = 1; - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 28cff0d23d82..42758b52768c 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -493,11 +493,7 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev) u8 writeback = virtblk_get_cache_mode(vdev); struct virtio_blk *vblk = vdev->priv; - if (writeback) - blk_queue_flush(vblk->disk->queue, REQ_FLUSH); - else - blk_queue_flush(vblk->disk->queue, 0); - + blk_queue_write_cache(vblk->disk->queue, writeback, false); revalidate_disk(vblk->disk); } diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 26aa080e243c..3355f1cdd4e5 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -477,7 +477,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, vbd->type |= VDISK_REMOVABLE; q = bdev_get_queue(bdev); - if (q && q->flush_flags) + if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags)) vbd->flush_support = true; if (q && blk_queue_secdiscard(q)) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 6405b6557792..ca13df854639 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -998,7 +998,8 @@ static const char *flush_info(unsigned int feature_flush) static void xlvbd_flush(struct blkfront_info *info) { - blk_queue_flush(info->rq, info->feature_flush); + blk_queue_write_cache(info->rq, info->feature_flush & REQ_FLUSH, + info->feature_flush & REQ_FUA); pr_info("blkfront: %s: %s %s %s %s %s\n", info->gd->disk_name, flush_info(info->feature_flush), "persistent grants:", info->feature_persistent ? diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 37a8a907febe..05dbcce70b0e 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -522,7 +522,7 @@ static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect) static void update_flush(ide_drive_t *drive) { u16 *id = drive->id; - unsigned flush = 0; + bool wc = false; if (drive->dev_flags & IDE_DFLAG_WCACHE) { unsigned long long capacity; @@ -546,12 +546,12 @@ static void update_flush(ide_drive_t *drive) drive->name, barrier ? "" : "not "); if (barrier) { - flush = REQ_FLUSH; + wc = true; blk_queue_prep_rq(drive->queue, idedisk_prep_fn); } } - blk_queue_flush(drive->queue, flush); + blk_queue_write_cache(drive->queue, wc, false); } ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 0dc9a80adb94..160c1a6838e1 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -30,23 +30,35 @@ #include <linux/sched/sysctl.h> #include <uapi/linux/lightnvm.h> -static LIST_HEAD(nvm_targets); +static LIST_HEAD(nvm_tgt_types); static LIST_HEAD(nvm_mgrs); static LIST_HEAD(nvm_devices); +static LIST_HEAD(nvm_targets); static DECLARE_RWSEM(nvm_lock); +static struct nvm_target *nvm_find_target(const char *name) +{ + struct nvm_target *tgt; + + list_for_each_entry(tgt, &nvm_targets, list) + if (!strcmp(name, tgt->disk->disk_name)) + return tgt; + + return NULL; +} + static struct nvm_tgt_type *nvm_find_target_type(const char *name) { struct nvm_tgt_type *tt; - list_for_each_entry(tt, &nvm_targets, list) + list_for_each_entry(tt, &nvm_tgt_types, list) if (!strcmp(name, tt->name)) return tt; return NULL; } -int nvm_register_target(struct nvm_tgt_type *tt) +int nvm_register_tgt_type(struct nvm_tgt_type *tt) { int ret = 0; @@ -54,14 +66,14 @@ int nvm_register_target(struct nvm_tgt_type *tt) if (nvm_find_target_type(tt->name)) ret = -EEXIST; else - list_add(&tt->list, &nvm_targets); + list_add(&tt->list, &nvm_tgt_types); up_write(&nvm_lock); return ret; } -EXPORT_SYMBOL(nvm_register_target); +EXPORT_SYMBOL(nvm_register_tgt_type); -void nvm_unregister_target(struct nvm_tgt_type *tt) +void nvm_unregister_tgt_type(struct nvm_tgt_type *tt) { if (!tt) return; @@ -70,20 +82,20 @@ void nvm_unregister_target(struct nvm_tgt_type *tt) list_del(&tt->list); up_write(&nvm_lock); } -EXPORT_SYMBOL(nvm_unregister_target); +EXPORT_SYMBOL(nvm_unregister_tgt_type); void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags, dma_addr_t *dma_handler) { - return dev->ops->dev_dma_alloc(dev, dev->ppalist_pool, mem_flags, + return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags, dma_handler); } EXPORT_SYMBOL(nvm_dev_dma_alloc); -void nvm_dev_dma_free(struct nvm_dev *dev, void *ppa_list, +void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler) { - dev->ops->dev_dma_free(dev->ppalist_pool, ppa_list, dma_handler); + dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler); } EXPORT_SYMBOL(nvm_dev_dma_free); @@ -214,8 +226,8 @@ void nvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd) { int i; - if (rqd->nr_pages > 1) { - for (i = 0; i < rqd->nr_pages; i++) + if (rqd->nr_ppas > 1) { + for (i = 0; i < rqd->nr_ppas; i++) rqd->ppa_list[i] = dev_to_generic_addr(dev, rqd->ppa_list[i]); } else { @@ -228,8 +240,8 @@ void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd) { int i; - if (rqd->nr_pages > 1) { - for (i = 0; i < rqd->nr_pages; i++) + if (rqd->nr_ppas > 1) { + for (i = 0; i < rqd->nr_ppas; i++) rqd->ppa_list[i] = generic_to_dev_addr(dev, rqd->ppa_list[i]); } else { @@ -239,33 +251,36 @@ void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd) EXPORT_SYMBOL(nvm_generic_to_addr_mode); int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, - struct ppa_addr *ppas, int nr_ppas) + struct ppa_addr *ppas, int nr_ppas, int vblk) { int i, plane_cnt, pl_idx; - if (dev->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { - rqd->nr_pages = 1; + if ((!vblk || dev->plane_mode == NVM_PLANE_SINGLE) && nr_ppas == 1) { + rqd->nr_ppas = nr_ppas; rqd->ppa_addr = ppas[0]; return 0; } - plane_cnt = dev->plane_mode; - rqd->nr_pages = plane_cnt * nr_ppas; - - if (dev->ops->max_phys_sect < rqd->nr_pages) - return -EINVAL; - + rqd->nr_ppas = nr_ppas; rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list); if (!rqd->ppa_list) { pr_err("nvm: failed to allocate dma memory\n"); return -ENOMEM; } - for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { + if (!vblk) { + for (i = 0; i < nr_ppas; i++) + rqd->ppa_list[i] = ppas[i]; + } else { + plane_cnt = dev->plane_mode; + rqd->nr_ppas *= plane_cnt; + for (i = 0; i < nr_ppas; i++) { - ppas[i].g.pl = pl_idx; - rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppas[i]; + for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { + ppas[i].g.pl = pl_idx; + rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppas[i]; + } } } @@ -292,7 +307,7 @@ int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas) memset(&rqd, 0, sizeof(struct nvm_rq)); - ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas); + ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); if (ret) return ret; @@ -322,11 +337,10 @@ static void nvm_end_io_sync(struct nvm_rq *rqd) complete(waiting); } -int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas, - int opcode, int flags, void *buf, int len) +int __nvm_submit_ppa(struct nvm_dev *dev, struct nvm_rq *rqd, int opcode, + int flags, void *buf, int len) { DECLARE_COMPLETION_ONSTACK(wait); - struct nvm_rq rqd; struct bio *bio; int ret; unsigned long hang_check; @@ -335,23 +349,21 @@ int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas, if (IS_ERR_OR_NULL(bio)) return -ENOMEM; - memset(&rqd, 0, sizeof(struct nvm_rq)); - ret = nvm_set_rqd_ppalist(dev, &rqd, ppa, nr_ppas); + nvm_generic_to_addr_mode(dev, rqd); + + rqd->dev = dev; + rqd->opcode = opcode; + rqd->flags = flags; + rqd->bio = bio; + rqd->wait = &wait; + rqd->end_io = nvm_end_io_sync; + + ret = dev->ops->submit_io(dev, rqd); if (ret) { bio_put(bio); return ret; } - rqd.opcode = opcode; - rqd.bio = bio; - rqd.wait = &wait; - rqd.dev = dev; - rqd.end_io = nvm_end_io_sync; - rqd.flags = flags; - nvm_generic_to_addr_mode(dev, &rqd); - - ret = dev->ops->submit_io(dev, &rqd); - /* Prevent hang_check timer from firing at us during very long I/O */ hang_check = sysctl_hung_task_timeout_secs; if (hang_check) @@ -359,12 +371,113 @@ int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas, else wait_for_completion_io(&wait); + return rqd->error; +} + +/** + * nvm_submit_ppa_list - submit user-defined ppa list to device. The user must + * take to free ppa list if necessary. + * @dev: device + * @ppa_list: user created ppa_list + * @nr_ppas: length of ppa_list + * @opcode: device opcode + * @flags: device flags + * @buf: data buffer + * @len: data buffer length + */ +int nvm_submit_ppa_list(struct nvm_dev *dev, struct ppa_addr *ppa_list, + int nr_ppas, int opcode, int flags, void *buf, int len) +{ + struct nvm_rq rqd; + + if (dev->ops->max_phys_sect < nr_ppas) + return -EINVAL; + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + rqd.nr_ppas = nr_ppas; + if (nr_ppas > 1) + rqd.ppa_list = ppa_list; + else + rqd.ppa_addr = ppa_list[0]; + + return __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len); +} +EXPORT_SYMBOL(nvm_submit_ppa_list); + +/** + * nvm_submit_ppa - submit PPAs to device. PPAs will automatically be unfolded + * as single, dual, quad plane PPAs depending on device type. + * @dev: device + * @ppa: user created ppa_list + * @nr_ppas: length of ppa_list + * @opcode: device opcode + * @flags: device flags + * @buf: data buffer + * @len: data buffer length + */ +int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas, + int opcode, int flags, void *buf, int len) +{ + struct nvm_rq rqd; + int ret; + + memset(&rqd, 0, sizeof(struct nvm_rq)); + ret = nvm_set_rqd_ppalist(dev, &rqd, ppa, nr_ppas, 1); + if (ret) + return ret; + + ret = __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len); + nvm_free_rqd_ppalist(dev, &rqd); - return rqd.error; + return ret; } EXPORT_SYMBOL(nvm_submit_ppa); +/* + * folds a bad block list from its plane representation to its virtual + * block representation. The fold is done in place and reduced size is + * returned. + * + * If any of the planes status are bad or grown bad block, the virtual block + * is marked bad. If not bad, the first plane state acts as the block state. + */ +int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) +{ + int blk, offset, pl, blktype; + + if (nr_blks != dev->blks_per_lun * dev->plane_mode) + return -EINVAL; + + for (blk = 0; blk < dev->blks_per_lun; blk++) { + offset = blk * dev->plane_mode; + blktype = blks[offset]; + + /* Bad blocks on any planes take precedence over other types */ + for (pl = 0; pl < dev->plane_mode; pl++) { + if (blks[offset + pl] & + (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { + blktype = blks[offset + pl]; + break; + } + } + + blks[blk] = blktype; + } + + return dev->blks_per_lun; +} +EXPORT_SYMBOL(nvm_bb_tbl_fold); + +int nvm_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa, u8 *blks) +{ + ppa = generic_to_dev_addr(dev, ppa); + + return dev->ops->get_bb_tbl(dev, ppa, blks); +} +EXPORT_SYMBOL(nvm_get_bb_tbl); + static int nvm_init_slc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp) { int i; @@ -414,6 +527,7 @@ static int nvm_core_init(struct nvm_dev *dev) { struct nvm_id *id = &dev->identity; struct nvm_id_group *grp = &id->groups[0]; + int ret; /* device values */ dev->nr_chnls = grp->num_ch; @@ -421,6 +535,8 @@ static int nvm_core_init(struct nvm_dev *dev) dev->pgs_per_blk = grp->num_pg; dev->blks_per_lun = grp->num_blk; dev->nr_planes = grp->num_pln; + dev->fpg_size = grp->fpg_sz; + dev->pfpg_size = grp->fpg_sz * grp->num_pln; dev->sec_size = grp->csecs; dev->oob_size = grp->sos; dev->sec_per_pg = grp->fpg_sz / grp->csecs; @@ -430,33 +546,16 @@ static int nvm_core_init(struct nvm_dev *dev) dev->plane_mode = NVM_PLANE_SINGLE; dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size; - if (grp->mtype != 0) { - pr_err("nvm: memory type not supported\n"); - return -EINVAL; - } - - switch (grp->fmtype) { - case NVM_ID_FMTYPE_SLC: - if (nvm_init_slc_tbl(dev, grp)) - return -ENOMEM; - break; - case NVM_ID_FMTYPE_MLC: - if (nvm_init_mlc_tbl(dev, grp)) - return -ENOMEM; - break; - default: - pr_err("nvm: flash type not supported\n"); - return -EINVAL; - } - - if (!dev->lps_per_blk) - pr_info("nvm: lower page programming table missing\n"); - if (grp->mpos & 0x020202) dev->plane_mode = NVM_PLANE_DOUBLE; if (grp->mpos & 0x040404) dev->plane_mode = NVM_PLANE_QUAD; + if (grp->mtype != 0) { + pr_err("nvm: memory type not supported\n"); + return -EINVAL; + } + /* calculated values */ dev->sec_per_pl = dev->sec_per_pg * dev->nr_planes; dev->sec_per_blk = dev->sec_per_pl * dev->pgs_per_blk; @@ -468,11 +567,73 @@ static int nvm_core_init(struct nvm_dev *dev) sizeof(unsigned long), GFP_KERNEL); if (!dev->lun_map) return -ENOMEM; - INIT_LIST_HEAD(&dev->online_targets); + + switch (grp->fmtype) { + case NVM_ID_FMTYPE_SLC: + if (nvm_init_slc_tbl(dev, grp)) { + ret = -ENOMEM; + goto err_fmtype; + } + break; + case NVM_ID_FMTYPE_MLC: + if (nvm_init_mlc_tbl(dev, grp)) { + ret = -ENOMEM; + goto err_fmtype; + } + break; + default: + pr_err("nvm: flash type not supported\n"); + ret = -EINVAL; + goto err_fmtype; + } + mutex_init(&dev->mlock); spin_lock_init(&dev->lock); return 0; +err_fmtype: + kfree(dev->lun_map); + return ret; +} + +static void nvm_remove_target(struct nvm_target *t) +{ + struct nvm_tgt_type *tt = t->type; + struct gendisk *tdisk = t->disk; + struct request_queue *q = tdisk->queue; + + lockdep_assert_held(&nvm_lock); + + del_gendisk(tdisk); + blk_cleanup_queue(q); + + if (tt->exit) + tt->exit(tdisk->private_data); + + put_disk(tdisk); + + list_del(&t->list); + kfree(t); +} + +static void nvm_free_mgr(struct nvm_dev *dev) +{ + struct nvm_target *tgt, *tmp; + + if (!dev->mt) + return; + + down_write(&nvm_lock); + list_for_each_entry_safe(tgt, tmp, &nvm_targets, list) { + if (tgt->dev != dev) + continue; + + nvm_remove_target(tgt); + } + up_write(&nvm_lock); + + dev->mt->unregister_mgr(dev); + dev->mt = NULL; } static void nvm_free(struct nvm_dev *dev) @@ -480,10 +641,10 @@ static void nvm_free(struct nvm_dev *dev) if (!dev) return; - if (dev->mt) - dev->mt->unregister_mgr(dev); + nvm_free_mgr(dev); kfree(dev->lptbl); + kfree(dev->lun_map); } static int nvm_init(struct nvm_dev *dev) @@ -530,8 +691,8 @@ err: static void nvm_exit(struct nvm_dev *dev) { - if (dev->ppalist_pool) - dev->ops->destroy_dma_pool(dev->ppalist_pool); + if (dev->dma_pool) + dev->ops->destroy_dma_pool(dev->dma_pool); nvm_free(dev); pr_info("nvm: successfully unloaded\n"); @@ -565,9 +726,9 @@ int nvm_register(struct request_queue *q, char *disk_name, } if (dev->ops->max_phys_sect > 1) { - dev->ppalist_pool = dev->ops->create_dma_pool(dev, "ppalist"); - if (!dev->ppalist_pool) { - pr_err("nvm: could not create ppa pool\n"); + dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); + if (!dev->dma_pool) { + pr_err("nvm: could not create dma pool\n"); ret = -ENOMEM; goto err_init; } @@ -613,7 +774,6 @@ void nvm_unregister(char *disk_name) up_write(&nvm_lock); nvm_exit(dev); - kfree(dev->lun_map); kfree(dev); } EXPORT_SYMBOL(nvm_unregister); @@ -645,12 +805,11 @@ static int nvm_create_target(struct nvm_dev *dev, return -EINVAL; } - list_for_each_entry(t, &dev->online_targets, list) { - if (!strcmp(create->tgtname, t->disk->disk_name)) { - pr_err("nvm: target name already exists.\n"); - up_write(&nvm_lock); - return -EINVAL; - } + t = nvm_find_target(create->tgtname); + if (t) { + pr_err("nvm: target name already exists.\n"); + up_write(&nvm_lock); + return -EINVAL; } up_write(&nvm_lock); @@ -688,9 +847,10 @@ static int nvm_create_target(struct nvm_dev *dev, t->type = tt; t->disk = tdisk; + t->dev = dev; down_write(&nvm_lock); - list_add_tail(&t->list, &dev->online_targets); + list_add_tail(&t->list, &nvm_targets); up_write(&nvm_lock); return 0; @@ -703,26 +863,6 @@ err_t: return -ENOMEM; } -static void nvm_remove_target(struct nvm_target *t) -{ - struct nvm_tgt_type *tt = t->type; - struct gendisk *tdisk = t->disk; - struct request_queue *q = tdisk->queue; - - lockdep_assert_held(&nvm_lock); - - del_gendisk(tdisk); - blk_cleanup_queue(q); - - if (tt->exit) - tt->exit(tdisk->private_data); - - put_disk(tdisk); - - list_del(&t->list); - kfree(t); -} - static int __nvm_configure_create(struct nvm_ioctl_create *create) { struct nvm_dev *dev; @@ -753,26 +893,19 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) static int __nvm_configure_remove(struct nvm_ioctl_remove *remove) { - struct nvm_target *t = NULL; - struct nvm_dev *dev; - int ret = -1; + struct nvm_target *t; down_write(&nvm_lock); - list_for_each_entry(dev, &nvm_devices, devices) - list_for_each_entry(t, &dev->online_targets, list) { - if (!strcmp(remove->tgtname, t->disk->disk_name)) { - nvm_remove_target(t); - ret = 0; - break; - } - } - up_write(&nvm_lock); - - if (ret) { + t = nvm_find_target(remove->tgtname); + if (!t) { pr_err("nvm: target \"%s\" doesn't exist.\n", remove->tgtname); + up_write(&nvm_lock); return -EINVAL; } + nvm_remove_target(t); + up_write(&nvm_lock); + return 0; } @@ -921,7 +1054,7 @@ static long nvm_ioctl_info(struct file *file, void __user *arg) info->version[2] = NVM_VERSION_PATCH; down_write(&nvm_lock); - list_for_each_entry(tt, &nvm_targets, list) { + list_for_each_entry(tt, &nvm_tgt_types, list) { struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter]; tgt->version[0] = tt->version[0]; @@ -1118,10 +1251,7 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) return -EINVAL; } - if (dev->mt) { - dev->mt->unregister_mgr(dev); - dev->mt = NULL; - } + nvm_free_mgr(dev); if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) return nvm_dev_factory(dev, fact.flags); diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index 72e124a3927d..ec9fb6876e38 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -129,27 +129,25 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn) return 0; } -static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks, - void *private) +static int gennvm_block_bb(struct gen_nvm *gn, struct ppa_addr ppa, + u8 *blks, int nr_blks) { - struct gen_nvm *gn = private; struct nvm_dev *dev = gn->dev; struct gen_lun *lun; struct nvm_block *blk; int i; + nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); + if (nr_blks < 0) + return nr_blks; + lun = &gn->luns[(dev->luns_per_chnl * ppa.g.ch) + ppa.g.lun]; - for (i = 0; i < nr_blocks; i++) { + for (i = 0; i < nr_blks; i++) { if (blks[i] == 0) continue; blk = &lun->vlun.blocks[i]; - if (!blk) { - pr_err("gennvm: BB data is out of bounds.\n"); - return -EINVAL; - } - list_move_tail(&blk->list, &lun->bb_list); lun->vlun.nr_bad_blocks++; lun->vlun.nr_free_blocks--; @@ -216,13 +214,21 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn) struct gen_lun *lun; struct nvm_block *block; sector_t lun_iter, blk_iter, cur_block_id = 0; - int ret; + int ret, nr_blks; + u8 *blks; + + nr_blks = dev->blks_per_lun * dev->plane_mode; + blks = kmalloc(nr_blks, GFP_KERNEL); + if (!blks) + return -ENOMEM; gennvm_for_each_lun(gn, lun, lun_iter) { lun->vlun.blocks = vzalloc(sizeof(struct nvm_block) * dev->blks_per_lun); - if (!lun->vlun.blocks) + if (!lun->vlun.blocks) { + kfree(blks); return -ENOMEM; + } for (blk_iter = 0; blk_iter < dev->blks_per_lun; blk_iter++) { block = &lun->vlun.blocks[blk_iter]; @@ -246,14 +252,15 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn) ppa.ppa = 0; ppa.g.ch = lun->vlun.chnl_id; - ppa.g.lun = lun->vlun.id; - ppa = generic_to_dev_addr(dev, ppa); + ppa.g.lun = lun->vlun.lun_id; + + ret = nvm_get_bb_tbl(dev, ppa, blks); + if (ret) + pr_err("gennvm: could not get BB table\n"); - ret = dev->ops->get_bb_tbl(dev, ppa, - dev->blks_per_lun, - gennvm_block_bb, gn); + ret = gennvm_block_bb(gn, ppa, blks, nr_blks); if (ret) - pr_err("gennvm: could not read BB table\n"); + pr_err("gennvm: BB table map failed\n"); } } @@ -266,6 +273,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn) } } + kfree(blks); return 0; } @@ -399,64 +407,60 @@ static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk) spin_unlock(&vlun->lock); } -static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa, - int type) +static void gennvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type) { struct gen_nvm *gn = dev->mp; struct gen_lun *lun; struct nvm_block *blk; - if (unlikely(ppa->g.ch > dev->nr_chnls || - ppa->g.lun > dev->luns_per_chnl || - ppa->g.blk > dev->blks_per_lun)) { + pr_debug("gennvm: ppa (ch: %u lun: %u blk: %u pg: %u) -> %u\n", + ppa.g.ch, ppa.g.lun, ppa.g.blk, ppa.g.pg, type); + + if (unlikely(ppa.g.ch > dev->nr_chnls || + ppa.g.lun > dev->luns_per_chnl || + ppa.g.blk > dev->blks_per_lun)) { WARN_ON_ONCE(1); pr_err("gennvm: ppa broken (ch: %u > %u lun: %u > %u blk: %u > %u", - ppa->g.ch, dev->nr_chnls, - ppa->g.lun, dev->luns_per_chnl, - ppa->g.blk, dev->blks_per_lun); + ppa.g.ch, dev->nr_chnls, + ppa.g.lun, dev->luns_per_chnl, + ppa.g.blk, dev->blks_per_lun); return; } - lun = &gn->luns[ppa->g.lun * ppa->g.ch]; - blk = &lun->vlun.blocks[ppa->g.blk]; + lun = &gn->luns[ppa.g.lun * ppa.g.ch]; + blk = &lun->vlun.blocks[ppa.g.blk]; /* will be moved to bb list on put_blk from target */ blk->state = type; } -/* mark block bad. It is expected the target recover from the error. */ +/* + * mark block bad in gennvm. It is expected that the target recovers separately + */ static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd) { - int i; - - if (!dev->ops->set_bb_tbl) - return; - - if (dev->ops->set_bb_tbl(dev, rqd, 1)) - return; + int bit = -1; + int max_secs = dev->ops->max_phys_sect; + void *comp_bits = &rqd->ppa_status; nvm_addr_to_generic_mode(dev, rqd); /* look up blocks and mark them as bad */ - if (rqd->nr_pages > 1) - for (i = 0; i < rqd->nr_pages; i++) - gennvm_blk_set_type(dev, &rqd->ppa_list[i], - NVM_BLK_ST_BAD); - else - gennvm_blk_set_type(dev, &rqd->ppa_addr, NVM_BLK_ST_BAD); + if (rqd->nr_ppas == 1) { + gennvm_mark_blk(dev, rqd->ppa_addr, NVM_BLK_ST_BAD); + return; + } + + while ((bit = find_next_bit(comp_bits, max_secs, bit + 1)) < max_secs) + gennvm_mark_blk(dev, rqd->ppa_list[bit], NVM_BLK_ST_BAD); } static void gennvm_end_io(struct nvm_rq *rqd) { struct nvm_tgt_instance *ins = rqd->ins; - switch (rqd->error) { - case NVM_RSP_SUCCESS: - case NVM_RSP_ERR_EMPTYPAGE: - break; - case NVM_RSP_ERR_FAILWRITE: + if (rqd->error == NVM_RSP_ERR_FAILWRITE) gennvm_mark_blk_bad(rqd->dev, rqd); - } ins->tt->end_io(rqd); } @@ -539,6 +543,8 @@ static struct nvmm_type gennvm = { .submit_io = gennvm_submit_io, .erase_blk = gennvm_erase_blk, + .mark_blk = gennvm_mark_blk, + .get_lun = gennvm_get_lun, .reserve_lun = gennvm_reserve_lun, .release_lun = gennvm_release_lun, diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index 3ab6495c3fd8..2103e97a974f 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -405,9 +405,8 @@ static void rrpc_block_gc(struct work_struct *work) ws_gc); struct rrpc *rrpc = gcb->rrpc; struct rrpc_block *rblk = gcb->rblk; + struct rrpc_lun *rlun = rblk->rlun; struct nvm_dev *dev = rrpc->dev; - struct nvm_lun *lun = rblk->parent->lun; - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; mempool_free(gcb, rrpc->gcb_pool); pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id); @@ -508,9 +507,9 @@ static void rrpc_gc_queue(struct work_struct *work) ws_gc); struct rrpc *rrpc = gcb->rrpc; struct rrpc_block *rblk = gcb->rblk; + struct rrpc_lun *rlun = rblk->rlun; struct nvm_lun *lun = rblk->parent->lun; struct nvm_block *blk = rblk->parent; - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; spin_lock(&rlun->lock); list_add_tail(&rblk->prio, &rlun->prio_list); @@ -696,7 +695,7 @@ static void rrpc_end_io(struct nvm_rq *rqd) { struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance); struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); - uint8_t npages = rqd->nr_pages; + uint8_t npages = rqd->nr_ppas; sector_t laddr = rrpc_get_laddr(rqd->bio) - npages; if (bio_data_dir(rqd->bio) == WRITE) @@ -711,8 +710,6 @@ static void rrpc_end_io(struct nvm_rq *rqd) if (npages > 1) nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); - if (rqd->metadata) - nvm_dev_dma_free(rrpc->dev, rqd->metadata, rqd->dma_metadata); mempool_free(rqd, rrpc->rq_pool); } @@ -886,7 +883,7 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, bio_get(bio); rqd->bio = bio; rqd->ins = &rrpc->instance; - rqd->nr_pages = nr_pages; + rqd->nr_ppas = nr_pages; rrq->flags = flags; err = nvm_submit_io(rrpc->dev, rqd); @@ -895,7 +892,7 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, bio_put(bio); if (!(flags & NVM_IOTYPE_GC)) { rrpc_unlock_rq(rrpc, rqd); - if (rqd->nr_pages > 1) + if (rqd->nr_ppas > 1) nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); } @@ -1039,11 +1036,8 @@ static int rrpc_map_init(struct rrpc *rrpc) { struct nvm_dev *dev = rrpc->dev; sector_t i; - u64 slba; int ret; - slba = rrpc->soffset >> (ilog2(dev->sec_size) - 9); - rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects); if (!rrpc->trans_map) return -ENOMEM; @@ -1065,8 +1059,8 @@ static int rrpc_map_init(struct rrpc *rrpc) return 0; /* Bring up the mapping table from device */ - ret = dev->ops->get_l2p_tbl(dev, slba, rrpc->nr_sects, rrpc_l2p_update, - rrpc); + ret = dev->ops->get_l2p_tbl(dev, rrpc->soffset, rrpc->nr_sects, + rrpc_l2p_update, rrpc); if (ret) { pr_err("nvm: rrpc: could not read L2P table.\n"); return -EINVAL; @@ -1207,10 +1201,6 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) INIT_WORK(&rlun->ws_gc, rrpc_lun_gc); spin_lock_init(&rlun->lock); - - rrpc->total_blocks += dev->blks_per_lun; - rrpc->nr_sects += dev->sec_per_lun; - } return 0; @@ -1224,18 +1214,24 @@ static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin) struct nvm_dev *dev = rrpc->dev; struct nvmm_type *mt = dev->mt; sector_t size = rrpc->nr_sects * dev->sec_size; + int ret; size >>= 9; - return mt->get_area(dev, begin, size); + ret = mt->get_area(dev, begin, size); + if (!ret) + *begin >>= (ilog2(dev->sec_size) - 9); + + return ret; } static void rrpc_area_free(struct rrpc *rrpc) { struct nvm_dev *dev = rrpc->dev; struct nvmm_type *mt = dev->mt; + sector_t begin = rrpc->soffset << (ilog2(dev->sec_size) - 9); - mt->put_area(dev, rrpc->soffset); + mt->put_area(dev, begin); } static void rrpc_free(struct rrpc *rrpc) @@ -1268,7 +1264,7 @@ static sector_t rrpc_capacity(void *private) sector_t reserved, provisioned; /* cur, gc, and two emergency blocks for each lun */ - reserved = rrpc->nr_luns * dev->max_pages_per_blk * 4; + reserved = rrpc->nr_luns * dev->sec_per_blk * 4; provisioned = rrpc->nr_sects - reserved; if (reserved > rrpc->nr_sects) { @@ -1388,6 +1384,8 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk, INIT_WORK(&rrpc->ws_requeue, rrpc_requeue); rrpc->nr_luns = lun_end - lun_begin + 1; + rrpc->total_blocks = (unsigned long)dev->blks_per_lun * rrpc->nr_luns; + rrpc->nr_sects = (unsigned long long)dev->sec_per_lun * rrpc->nr_luns; /* simple round-robin strategy */ atomic_set(&rrpc->next_lun, -1); @@ -1468,12 +1466,12 @@ static struct nvm_tgt_type tt_rrpc = { static int __init rrpc_module_init(void) { - return nvm_register_target(&tt_rrpc); + return nvm_register_tgt_type(&tt_rrpc); } static void rrpc_module_exit(void) { - nvm_unregister_target(&tt_rrpc); + nvm_unregister_tgt_type(&tt_rrpc); } module_init(rrpc_module_init); diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h index 2653484a3b40..87e84b5fc1cc 100644 --- a/drivers/lightnvm/rrpc.h +++ b/drivers/lightnvm/rrpc.h @@ -251,7 +251,7 @@ static inline void rrpc_unlock_laddr(struct rrpc *rrpc, static inline void rrpc_unlock_rq(struct rrpc *rrpc, struct nvm_rq *rqd) { struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); - uint8_t pages = rqd->nr_pages; + uint8_t pages = rqd->nr_ppas; BUG_ON((r->l_start + pages) > rrpc->nr_sects); diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c index 321de1f154c5..994697ac786e 100644 --- a/drivers/lightnvm/sysblk.c +++ b/drivers/lightnvm/sysblk.c @@ -93,12 +93,51 @@ void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s, s->nr_rows = nvm_setup_sysblks(dev, sysblk_ppas); } -static int sysblk_get_host_blks(struct ppa_addr ppa, int nr_blks, u8 *blks, - void *private) +static int sysblk_get_free_blks(struct nvm_dev *dev, struct ppa_addr ppa, + u8 *blks, int nr_blks, + struct sysblk_scan *s) +{ + struct ppa_addr *sppa; + int i, blkid = 0; + + nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); + if (nr_blks < 0) + return nr_blks; + + for (i = 0; i < nr_blks; i++) { + if (blks[i] == NVM_BLK_T_HOST) + return -EEXIST; + + if (blks[i] != NVM_BLK_T_FREE) + continue; + + sppa = &s->ppas[scan_ppa_idx(s->row, blkid)]; + sppa->g.ch = ppa.g.ch; + sppa->g.lun = ppa.g.lun; + sppa->g.blk = i; + s->nr_ppas++; + blkid++; + + pr_debug("nvm: use (%u %u %u) as sysblk\n", + sppa->g.ch, sppa->g.lun, sppa->g.blk); + if (blkid > MAX_BLKS_PR_SYSBLK - 1) + return 0; + } + + pr_err("nvm: sysblk failed get sysblk\n"); + return -EINVAL; +} + +static int sysblk_get_host_blks(struct nvm_dev *dev, struct ppa_addr ppa, + u8 *blks, int nr_blks, + struct sysblk_scan *s) { - struct sysblk_scan *s = private; int i, nr_sysblk = 0; + nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); + if (nr_blks < 0) + return nr_blks; + for (i = 0; i < nr_blks; i++) { if (blks[i] != NVM_BLK_T_HOST) continue; @@ -119,26 +158,42 @@ static int sysblk_get_host_blks(struct ppa_addr ppa, int nr_blks, u8 *blks, } static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s, - struct ppa_addr *ppas, nvm_bb_update_fn *fn) + struct ppa_addr *ppas, int get_free) { - struct ppa_addr dppa; - int i, ret; + int i, nr_blks, ret = 0; + u8 *blks; s->nr_ppas = 0; + nr_blks = dev->blks_per_lun * dev->plane_mode; + + blks = kmalloc(nr_blks, GFP_KERNEL); + if (!blks) + return -ENOMEM; for (i = 0; i < s->nr_rows; i++) { - dppa = generic_to_dev_addr(dev, ppas[i]); s->row = i; - ret = dev->ops->get_bb_tbl(dev, dppa, dev->blks_per_lun, fn, s); + ret = nvm_get_bb_tbl(dev, ppas[i], blks); if (ret) { pr_err("nvm: failed bb tbl for ppa (%u %u)\n", ppas[i].g.ch, ppas[i].g.blk); - return ret; + goto err_get; } + + if (get_free) + ret = sysblk_get_free_blks(dev, ppas[i], blks, nr_blks, + s); + else + ret = sysblk_get_host_blks(dev, ppas[i], blks, nr_blks, + s); + + if (ret) + goto err_get; } +err_get: + kfree(blks); return ret; } @@ -154,13 +209,12 @@ static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa, struct nvm_system_block *sblk) { struct nvm_system_block *cur; - int pg, cursz, ret, found = 0; + int pg, ret, found = 0; /* the full buffer for a flash page is allocated. Only the first of it * contains the system block information */ - cursz = dev->sec_size * dev->sec_per_pg * dev->nr_planes; - cur = kmalloc(cursz, GFP_KERNEL); + cur = kmalloc(dev->pfpg_size, GFP_KERNEL); if (!cur) return -ENOMEM; @@ -169,7 +223,7 @@ static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa, ppa->g.pg = ppa_to_slc(dev, pg); ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE, - cur, cursz); + cur, dev->pfpg_size); if (ret) { if (ret == NVM_RSP_ERR_EMPTYPAGE) { pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n", @@ -223,10 +277,10 @@ static int nvm_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s, int type) memset(&rqd, 0, sizeof(struct nvm_rq)); - nvm_set_rqd_ppalist(dev, &rqd, s->ppas, s->nr_ppas); + nvm_set_rqd_ppalist(dev, &rqd, s->ppas, s->nr_ppas, 1); nvm_generic_to_addr_mode(dev, &rqd); - ret = dev->ops->set_bb_tbl(dev, &rqd, type); + ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); nvm_free_rqd_ppalist(dev, &rqd); if (ret) { pr_err("nvm: sysblk failed bb mark\n"); @@ -236,50 +290,17 @@ static int nvm_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s, int type) return 0; } -static int sysblk_get_free_blks(struct ppa_addr ppa, int nr_blks, u8 *blks, - void *private) -{ - struct sysblk_scan *s = private; - struct ppa_addr *sppa; - int i, blkid = 0; - - for (i = 0; i < nr_blks; i++) { - if (blks[i] == NVM_BLK_T_HOST) - return -EEXIST; - - if (blks[i] != NVM_BLK_T_FREE) - continue; - - sppa = &s->ppas[scan_ppa_idx(s->row, blkid)]; - sppa->g.ch = ppa.g.ch; - sppa->g.lun = ppa.g.lun; - sppa->g.blk = i; - s->nr_ppas++; - blkid++; - - pr_debug("nvm: use (%u %u %u) as sysblk\n", - sppa->g.ch, sppa->g.lun, sppa->g.blk); - if (blkid > MAX_BLKS_PR_SYSBLK - 1) - return 0; - } - - pr_err("nvm: sysblk failed get sysblk\n"); - return -EINVAL; -} - static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info, struct sysblk_scan *s) { struct nvm_system_block nvmsb; void *buf; - int i, sect, ret, bufsz; + int i, sect, ret = 0; struct ppa_addr *ppas; nvm_cpu_to_sysblk(&nvmsb, info); - /* buffer for flash page */ - bufsz = dev->sec_size * dev->sec_per_pg * dev->nr_planes; - buf = kzalloc(bufsz, GFP_KERNEL); + buf = kzalloc(dev->pfpg_size, GFP_KERNEL); if (!buf) return -ENOMEM; memcpy(buf, &nvmsb, sizeof(struct nvm_system_block)); @@ -309,7 +330,7 @@ static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info, } ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PWRITE, - NVM_IO_SLC_MODE, buf, bufsz); + NVM_IO_SLC_MODE, buf, dev->pfpg_size); if (ret) { pr_err("nvm: sysblk failed program (%u %u %u)\n", ppas[0].g.ch, @@ -319,7 +340,7 @@ static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info, } ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PREAD, - NVM_IO_SLC_MODE, buf, bufsz); + NVM_IO_SLC_MODE, buf, dev->pfpg_size); if (ret) { pr_err("nvm: sysblk failed read (%u %u %u)\n", ppas[0].g.ch, @@ -388,7 +409,7 @@ int nvm_get_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info) nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); mutex_lock(&dev->mlock); - ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks); + ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0); if (ret) goto err_sysblk; @@ -448,7 +469,7 @@ int nvm_update_sysblock(struct nvm_dev *dev, struct nvm_sb_info *new) nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); mutex_lock(&dev->mlock); - ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks); + ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0); if (ret) goto err_sysblk; @@ -546,7 +567,7 @@ int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info) nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); mutex_lock(&dev->mlock); - ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_free_blks); + ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 1); if (ret) goto err_mark; @@ -561,52 +582,49 @@ err_mark: return ret; } -struct factory_blks { - struct nvm_dev *dev; - int flags; - unsigned long *blks; -}; - static int factory_nblks(int nblks) { /* Round up to nearest BITS_PER_LONG */ return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); } -static unsigned int factory_blk_offset(struct nvm_dev *dev, int ch, int lun) +static unsigned int factory_blk_offset(struct nvm_dev *dev, struct ppa_addr ppa) { int nblks = factory_nblks(dev->blks_per_lun); - return ((ch * dev->luns_per_chnl * nblks) + (lun * nblks)) / + return ((ppa.g.ch * dev->luns_per_chnl * nblks) + (ppa.g.lun * nblks)) / BITS_PER_LONG; } -static int nvm_factory_blks(struct ppa_addr ppa, int nr_blks, u8 *blks, - void *private) +static int nvm_factory_blks(struct nvm_dev *dev, struct ppa_addr ppa, + u8 *blks, int nr_blks, + unsigned long *blk_bitmap, int flags) { - struct factory_blks *f = private; - struct nvm_dev *dev = f->dev; int i, lunoff; - lunoff = factory_blk_offset(dev, ppa.g.ch, ppa.g.lun); + nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); + if (nr_blks < 0) + return nr_blks; + + lunoff = factory_blk_offset(dev, ppa); /* non-set bits correspond to the block must be erased */ for (i = 0; i < nr_blks; i++) { switch (blks[i]) { case NVM_BLK_T_FREE: - if (f->flags & NVM_FACTORY_ERASE_ONLY_USER) - set_bit(i, &f->blks[lunoff]); + if (flags & NVM_FACTORY_ERASE_ONLY_USER) + set_bit(i, &blk_bitmap[lunoff]); break; case NVM_BLK_T_HOST: - if (!(f->flags & NVM_FACTORY_RESET_HOST_BLKS)) - set_bit(i, &f->blks[lunoff]); + if (!(flags & NVM_FACTORY_RESET_HOST_BLKS)) + set_bit(i, &blk_bitmap[lunoff]); break; case NVM_BLK_T_GRWN_BAD: - if (!(f->flags & NVM_FACTORY_RESET_GRWN_BBLKS)) - set_bit(i, &f->blks[lunoff]); + if (!(flags & NVM_FACTORY_RESET_GRWN_BBLKS)) + set_bit(i, &blk_bitmap[lunoff]); break; default: - set_bit(i, &f->blks[lunoff]); + set_bit(i, &blk_bitmap[lunoff]); break; } } @@ -615,7 +633,7 @@ static int nvm_factory_blks(struct ppa_addr ppa, int nr_blks, u8 *blks, } static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list, - int max_ppas, struct factory_blks *f) + int max_ppas, unsigned long *blk_bitmap) { struct ppa_addr ppa; int ch, lun, blkid, idx, done = 0, ppa_cnt = 0; @@ -623,111 +641,95 @@ static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list, while (!done) { done = 1; - for (ch = 0; ch < dev->nr_chnls; ch++) { - for (lun = 0; lun < dev->luns_per_chnl; lun++) { - idx = factory_blk_offset(dev, ch, lun); - offset = &f->blks[idx]; - - blkid = find_first_zero_bit(offset, - dev->blks_per_lun); - if (blkid >= dev->blks_per_lun) - continue; - set_bit(blkid, offset); - - ppa.ppa = 0; - ppa.g.ch = ch; - ppa.g.lun = lun; - ppa.g.blk = blkid; - pr_debug("nvm: erase ppa (%u %u %u)\n", - ppa.g.ch, - ppa.g.lun, - ppa.g.blk); - - erase_list[ppa_cnt] = ppa; - ppa_cnt++; - done = 0; - - if (ppa_cnt == max_ppas) - return ppa_cnt; - } + nvm_for_each_lun_ppa(dev, ppa, ch, lun) { + idx = factory_blk_offset(dev, ppa); + offset = &blk_bitmap[idx]; + + blkid = find_first_zero_bit(offset, + dev->blks_per_lun); + if (blkid >= dev->blks_per_lun) + continue; + set_bit(blkid, offset); + + ppa.g.blk = blkid; + pr_debug("nvm: erase ppa (%u %u %u)\n", + ppa.g.ch, + ppa.g.lun, + ppa.g.blk); + + erase_list[ppa_cnt] = ppa; + ppa_cnt++; + done = 0; + + if (ppa_cnt == max_ppas) + return ppa_cnt; } } return ppa_cnt; } -static int nvm_fact_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa, - nvm_bb_update_fn *fn, void *priv) +static int nvm_fact_select_blks(struct nvm_dev *dev, unsigned long *blk_bitmap, + int flags) { - struct ppa_addr dev_ppa; - int ret; + struct ppa_addr ppa; + int ch, lun, nr_blks, ret = 0; + u8 *blks; - dev_ppa = generic_to_dev_addr(dev, ppa); + nr_blks = dev->blks_per_lun * dev->plane_mode; + blks = kmalloc(nr_blks, GFP_KERNEL); + if (!blks) + return -ENOMEM; - ret = dev->ops->get_bb_tbl(dev, dev_ppa, dev->blks_per_lun, fn, priv); - if (ret) - pr_err("nvm: failed bb tbl for ch%u lun%u\n", + nvm_for_each_lun_ppa(dev, ppa, ch, lun) { + ret = nvm_get_bb_tbl(dev, ppa, blks); + if (ret) + pr_err("nvm: failed bb tbl for ch%u lun%u\n", ppa.g.ch, ppa.g.blk); - return ret; -} -static int nvm_fact_select_blks(struct nvm_dev *dev, struct factory_blks *f) -{ - int ch, lun, ret; - struct ppa_addr ppa; - - ppa.ppa = 0; - for (ch = 0; ch < dev->nr_chnls; ch++) { - for (lun = 0; lun < dev->luns_per_chnl; lun++) { - ppa.g.ch = ch; - ppa.g.lun = lun; - - ret = nvm_fact_get_bb_tbl(dev, ppa, nvm_factory_blks, - f); - if (ret) - return ret; - } + ret = nvm_factory_blks(dev, ppa, blks, nr_blks, blk_bitmap, + flags); + if (ret) + break; } - return 0; + kfree(blks); + return ret; } int nvm_dev_factory(struct nvm_dev *dev, int flags) { - struct factory_blks f; struct ppa_addr *ppas; int ppa_cnt, ret = -ENOMEM; int max_ppas = dev->ops->max_phys_sect / dev->nr_planes; struct ppa_addr sysblk_ppas[MAX_SYSBLKS]; struct sysblk_scan s; + unsigned long *blk_bitmap; - f.blks = kzalloc(factory_nblks(dev->blks_per_lun) * dev->nr_luns, + blk_bitmap = kzalloc(factory_nblks(dev->blks_per_lun) * dev->nr_luns, GFP_KERNEL); - if (!f.blks) + if (!blk_bitmap) return ret; ppas = kcalloc(max_ppas, sizeof(struct ppa_addr), GFP_KERNEL); if (!ppas) goto err_blks; - f.dev = dev; - f.flags = flags; - /* create list of blks to be erased */ - ret = nvm_fact_select_blks(dev, &f); + ret = nvm_fact_select_blks(dev, blk_bitmap, flags); if (ret) goto err_ppas; /* continue to erase until list of blks until empty */ - while ((ppa_cnt = nvm_fact_get_blks(dev, ppas, max_ppas, &f)) > 0) + while ((ppa_cnt = + nvm_fact_get_blks(dev, ppas, max_ppas, blk_bitmap)) > 0) nvm_erase_ppa(dev, ppas, ppa_cnt); /* mark host reserved blocks free */ if (flags & NVM_FACTORY_RESET_HOST_BLKS) { nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); mutex_lock(&dev->mlock); - ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, - sysblk_get_host_blks); + ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0); if (!ret) ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_FREE); mutex_unlock(&dev->mlock); @@ -735,7 +737,7 @@ int nvm_dev_factory(struct nvm_dev *dev, int flags) err_ppas: kfree(ppas); err_blks: - kfree(f.blks); + kfree(blk_bitmap); return ret; } EXPORT_SYMBOL(nvm_dev_factory); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a296425a7270..f5dbb4e884d8 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -816,7 +816,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags); set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); - blk_queue_flush(q, REQ_FLUSH|REQ_FUA); + blk_queue_write_cache(q, true, true); return 0; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f9e8f0bef332..626a5ec04466 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1348,13 +1348,13 @@ static void dm_table_verify_integrity(struct dm_table *t) static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - unsigned flush = (*(unsigned *)data); + unsigned long flush = (unsigned long) data; struct request_queue *q = bdev_get_queue(dev->bdev); - return q && (q->flush_flags & flush); + return q && (q->queue_flags & flush); } -static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) +static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush) { struct dm_target *ti; unsigned i = 0; @@ -1375,7 +1375,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) return true; if (ti->type->iterate_devices && - ti->type->iterate_devices(ti, device_flush_capable, &flush)) + ti->type->iterate_devices(ti, device_flush_capable, (void *) flush)) return true; } @@ -1506,7 +1506,7 @@ static bool dm_table_supports_discards(struct dm_table *t) void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { - unsigned flush = 0; + bool wc = false, fua = false; /* * Copy table's limits to the DM device's request_queue @@ -1518,12 +1518,12 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); - if (dm_table_supports_flush(t, REQ_FLUSH)) { - flush |= REQ_FLUSH; - if (dm_table_supports_flush(t, REQ_FUA)) - flush |= REQ_FUA; + if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { + wc = true; + if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA))) + fua = true; } - blk_queue_flush(q, flush); + blk_queue_write_cache(q, wc, fua); if (!dm_table_discard_zeroes_data(t)) q->limits.discard_zeroes_data = 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index 14d3b37944df..c9a475c33cc7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5039,7 +5039,7 @@ static int md_alloc(dev_t dev, char *name) disk->fops = &md_fops; disk->private_data = mddev; disk->queue = mddev->queue; - blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA); + blk_queue_write_cache(mddev->queue, true, true); /* Allow extended partitions. This makes the * 'mdp' device redundant, but we can't really * remove it now. diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 9531f5f05b93..26f14970a858 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1188,6 +1188,7 @@ ioerr: int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) { + struct request_queue *q = bdev_get_queue(rdev->bdev); struct r5l_log *log; if (PAGE_SIZE != 4096) @@ -1197,7 +1198,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) return -ENOMEM; log->rdev = rdev; - log->need_cache_flush = (rdev->bdev->bd_disk->queue->flush_flags != 0); + log->need_cache_flush = test_bit(QUEUE_FLAG_WC, &q->queue_flags) != 0; log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid, sizeof(rdev->mddev->uuid)); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 5f2a3d69344f..ddc96206288a 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -2293,7 +2293,7 @@ again: ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) || card->ext_csd.rel_sectors)) { md->flags |= MMC_BLK_REL_WR; - blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA); + blk_queue_write_cache(md->queue.queue, true, true); } if (mmc_card_mmc(card) && diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index f4701182b558..74ae24364a8d 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -409,7 +409,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) goto error3; if (tr->flush) - blk_queue_flush(new->rq, REQ_FLUSH); + blk_queue_write_cache(new->rq, true, false); new->rq->queuedata = new; blk_queue_logical_block_size(new->rq, tr->blksize); diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index c894841c6456..d296fc3ae06e 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -18,7 +18,7 @@ config BLK_DEV_NVME_SCSI depends on NVME_CORE ---help--- This adds support for the SG_IO ioctl on the NVMe character - and block devices nodes, as well a a translation for a small + and block devices nodes, as well as a translation for a small number of selected SCSI commands to NVMe commands to the NVMe driver. If you don't know what this means you probably want to say N here, unless you run a distro that abuses the SCSI diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 643f457131c2..2de248bd462b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -58,6 +58,55 @@ static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; +bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, + enum nvme_ctrl_state new_state) +{ + enum nvme_ctrl_state old_state = ctrl->state; + bool changed = false; + + spin_lock_irq(&ctrl->lock); + switch (new_state) { + case NVME_CTRL_LIVE: + switch (old_state) { + case NVME_CTRL_RESETTING: + changed = true; + /* FALLTHRU */ + default: + break; + } + break; + case NVME_CTRL_RESETTING: + switch (old_state) { + case NVME_CTRL_NEW: + case NVME_CTRL_LIVE: + changed = true; + /* FALLTHRU */ + default: + break; + } + break; + case NVME_CTRL_DELETING: + switch (old_state) { + case NVME_CTRL_LIVE: + case NVME_CTRL_RESETTING: + changed = true; + /* FALLTHRU */ + default: + break; + } + break; + default: + break; + } + spin_unlock_irq(&ctrl->lock); + + if (changed) + ctrl->state = new_state; + + return changed; +} +EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); + static void nvme_free_ns(struct kref *kref) { struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); @@ -138,6 +187,111 @@ struct request *nvme_alloc_request(struct request_queue *q, } EXPORT_SYMBOL_GPL(nvme_alloc_request); +static inline void nvme_setup_flush(struct nvme_ns *ns, + struct nvme_command *cmnd) +{ + memset(cmnd, 0, sizeof(*cmnd)); + cmnd->common.opcode = nvme_cmd_flush; + cmnd->common.nsid = cpu_to_le32(ns->ns_id); +} + +static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, + struct nvme_command *cmnd) +{ + struct nvme_dsm_range *range; + struct page *page; + int offset; + unsigned int nr_bytes = blk_rq_bytes(req); + + range = kmalloc(sizeof(*range), GFP_ATOMIC); + if (!range) + return BLK_MQ_RQ_QUEUE_BUSY; + + range->cattr = cpu_to_le32(0); + range->nlb = cpu_to_le32(nr_bytes >> ns->lba_shift); + range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + + memset(cmnd, 0, sizeof(*cmnd)); + cmnd->dsm.opcode = nvme_cmd_dsm; + cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); + cmnd->dsm.nr = 0; + cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); + + req->completion_data = range; + page = virt_to_page(range); + offset = offset_in_page(range); + blk_add_request_payload(req, page, offset, sizeof(*range)); + + /* + * we set __data_len back to the size of the area to be discarded + * on disk. This allows us to report completion on the full amount + * of blocks described by the request. + */ + req->__data_len = nr_bytes; + + return 0; +} + +static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req, + struct nvme_command *cmnd) +{ + u16 control = 0; + u32 dsmgmt = 0; + + if (req->cmd_flags & REQ_FUA) + control |= NVME_RW_FUA; + if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) + control |= NVME_RW_LR; + + if (req->cmd_flags & REQ_RAHEAD) + dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; + + memset(cmnd, 0, sizeof(*cmnd)); + cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); + cmnd->rw.command_id = req->tag; + cmnd->rw.nsid = cpu_to_le32(ns->ns_id); + cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + + if (ns->ms) { + switch (ns->pi_type) { + case NVME_NS_DPS_PI_TYPE3: + control |= NVME_RW_PRINFO_PRCHK_GUARD; + break; + case NVME_NS_DPS_PI_TYPE1: + case NVME_NS_DPS_PI_TYPE2: + control |= NVME_RW_PRINFO_PRCHK_GUARD | + NVME_RW_PRINFO_PRCHK_REF; + cmnd->rw.reftag = cpu_to_le32( + nvme_block_nr(ns, blk_rq_pos(req))); + break; + } + if (!blk_integrity_rq(req)) + control |= NVME_RW_PRINFO_PRACT; + } + + cmnd->rw.control = cpu_to_le16(control); + cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); +} + +int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, + struct nvme_command *cmd) +{ + int ret = 0; + + if (req->cmd_type == REQ_TYPE_DRV_PRIV) + memcpy(cmd, req->cmd, sizeof(*cmd)); + else if (req->cmd_flags & REQ_FLUSH) + nvme_setup_flush(ns, cmd); + else if (req->cmd_flags & REQ_DISCARD) + ret = nvme_setup_discard(ns, req, cmd); + else + nvme_setup_rw(ns, req, cmd); + + return ret; +} +EXPORT_SYMBOL_GPL(nvme_setup_cmd); + /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code @@ -894,6 +1048,8 @@ EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl); static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, struct request_queue *q) { + bool vwc = false; + if (ctrl->max_hw_sectors) { u32 max_segments = (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; @@ -903,9 +1059,10 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, } if (ctrl->stripe_size) blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9); - if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) - blk_queue_flush(q, REQ_FLUSH | REQ_FUA); blk_queue_virt_boundary(q, ctrl->page_size - 1); + if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) + vwc = true; + blk_queue_write_cache(q, vwc, vwc); } /* @@ -1272,7 +1429,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_revalidate_disk(ns->disk)) goto out_free_disk; - list_add_tail(&ns->list, &ctrl->namespaces); + list_add_tail_rcu(&ns->list, &ctrl->namespaces); kref_get(&ctrl->kref); if (ns->type == NVME_NS_LIGHTNVM) return; @@ -1295,6 +1452,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) static void nvme_ns_remove(struct nvme_ns *ns) { + lockdep_assert_held(&ns->ctrl->namespaces_mutex); + if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; @@ -1307,9 +1466,8 @@ static void nvme_ns_remove(struct nvme_ns *ns) blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); } - mutex_lock(&ns->ctrl->namespaces_mutex); list_del_init(&ns->list); - mutex_unlock(&ns->ctrl->namespaces_mutex); + synchronize_rcu(); nvme_put_ns(ns); } @@ -1361,7 +1519,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) return ret; } -static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn) +static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) { struct nvme_ns *ns, *next; unsigned i; @@ -1377,11 +1535,16 @@ static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn) } } -void nvme_scan_namespaces(struct nvme_ctrl *ctrl) +static void nvme_scan_work(struct work_struct *work) { + struct nvme_ctrl *ctrl = + container_of(work, struct nvme_ctrl, scan_work); struct nvme_id_ctrl *id; unsigned nn; + if (ctrl->state != NVME_CTRL_LIVE) + return; + if (nvme_identify_ctrl(ctrl, &id)) return; @@ -1392,23 +1555,86 @@ void nvme_scan_namespaces(struct nvme_ctrl *ctrl) if (!nvme_scan_ns_list(ctrl, nn)) goto done; } - __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn)); + nvme_scan_ns_sequential(ctrl, nn); done: list_sort(NULL, &ctrl->namespaces, ns_cmp); mutex_unlock(&ctrl->namespaces_mutex); kfree(id); + + if (ctrl->ops->post_scan) + ctrl->ops->post_scan(ctrl); +} + +void nvme_queue_scan(struct nvme_ctrl *ctrl) +{ + /* + * Do not queue new scan work when a controller is reset during + * removal. + */ + if (ctrl->state == NVME_CTRL_LIVE) + schedule_work(&ctrl->scan_work); } -EXPORT_SYMBOL_GPL(nvme_scan_namespaces); +EXPORT_SYMBOL_GPL(nvme_queue_scan); void nvme_remove_namespaces(struct nvme_ctrl *ctrl) { struct nvme_ns *ns, *next; + mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) nvme_ns_remove(ns); + mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_remove_namespaces); +static void nvme_async_event_work(struct work_struct *work) +{ + struct nvme_ctrl *ctrl = + container_of(work, struct nvme_ctrl, async_event_work); + + spin_lock_irq(&ctrl->lock); + while (ctrl->event_limit > 0) { + int aer_idx = --ctrl->event_limit; + + spin_unlock_irq(&ctrl->lock); + ctrl->ops->submit_async_event(ctrl, aer_idx); + spin_lock_irq(&ctrl->lock); + } + spin_unlock_irq(&ctrl->lock); +} + +void nvme_complete_async_event(struct nvme_ctrl *ctrl, + struct nvme_completion *cqe) +{ + u16 status = le16_to_cpu(cqe->status) >> 1; + u32 result = le32_to_cpu(cqe->result); + + if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) { + ++ctrl->event_limit; + schedule_work(&ctrl->async_event_work); + } + + if (status != NVME_SC_SUCCESS) + return; + + switch (result & 0xff07) { + case NVME_AER_NOTICE_NS_CHANGED: + dev_info(ctrl->device, "rescanning\n"); + nvme_queue_scan(ctrl); + break; + default: + dev_warn(ctrl->device, "async event result %08x\n", result); + } +} +EXPORT_SYMBOL_GPL(nvme_complete_async_event); + +void nvme_queue_async_events(struct nvme_ctrl *ctrl) +{ + ctrl->event_limit = NVME_NR_AERS; + schedule_work(&ctrl->async_event_work); +} +EXPORT_SYMBOL_GPL(nvme_queue_async_events); + static DEFINE_IDA(nvme_instance_ida); static int nvme_set_instance(struct nvme_ctrl *ctrl) @@ -1440,6 +1666,10 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl) void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { + flush_work(&ctrl->async_event_work); + flush_work(&ctrl->scan_work); + nvme_remove_namespaces(ctrl); + device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); spin_lock(&dev_list_lock); @@ -1475,12 +1705,16 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, { int ret; + ctrl->state = NVME_CTRL_NEW; + spin_lock_init(&ctrl->lock); INIT_LIST_HEAD(&ctrl->namespaces); mutex_init(&ctrl->namespaces_mutex); kref_init(&ctrl->kref); ctrl->dev = dev; ctrl->ops = ops; ctrl->quirks = quirks; + INIT_WORK(&ctrl->scan_work, nvme_scan_work); + INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); ret = nvme_set_instance(ctrl); if (ret) @@ -1520,8 +1754,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); - list_for_each_entry(ns, &ctrl->namespaces, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { if (!kref_get_unless_zero(&ns->kref)) continue; @@ -1538,7 +1772,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) nvme_put_ns(ns); } - mutex_unlock(&ctrl->namespaces_mutex); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nvme_kill_queues); @@ -1546,8 +1780,8 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); - list_for_each_entry(ns, &ctrl->namespaces, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { spin_lock_irq(ns->queue->queue_lock); queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); spin_unlock_irq(ns->queue->queue_lock); @@ -1555,7 +1789,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) blk_mq_cancel_requeue_work(ns->queue); blk_mq_stop_hw_queues(ns->queue); } - mutex_unlock(&ctrl->namespaces_mutex); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nvme_stop_queues); @@ -1563,13 +1797,13 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); - list_for_each_entry(ns, &ctrl->namespaces, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue); blk_mq_start_stopped_hw_queues(ns->queue, true); blk_mq_kick_requeue_list(ns->queue); } - mutex_unlock(&ctrl->namespaces_mutex); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nvme_start_queues); @@ -1607,9 +1841,9 @@ int __init nvme_core_init(void) void nvme_core_exit(void) { - unregister_blkdev(nvme_major, "nvme"); class_destroy(nvme_class); __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); + unregister_blkdev(nvme_major, "nvme"); } MODULE_LICENSE("GPL"); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 9461dd639acd..a0af0558354c 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -367,8 +367,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, entries, len); if (ret) { - dev_err(ns->ctrl->dev, "L2P table transfer failed (%d)\n", - ret); + dev_err(ns->ctrl->device, + "L2P table transfer failed (%d)\n", ret); ret = -EIO; goto out; } @@ -387,41 +387,16 @@ out: return ret; } -static void nvme_nvm_bb_tbl_fold(struct nvm_dev *nvmdev, - int nr_dst_blks, u8 *dst_blks, - int nr_src_blks, u8 *src_blks) -{ - int blk, offset, pl, blktype; - - for (blk = 0; blk < nr_dst_blks; blk++) { - offset = blk * nvmdev->plane_mode; - blktype = src_blks[offset]; - - /* Bad blocks on any planes take precedence over other types */ - for (pl = 0; pl < nvmdev->plane_mode; pl++) { - if (src_blks[offset + pl] & - (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { - blktype = src_blks[offset + pl]; - break; - } - } - - dst_blks[blk] = blktype; - } -} - static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, - int nr_dst_blks, nvm_bb_update_fn *update_bbtbl, - void *priv) + u8 *blks) { struct request_queue *q = nvmdev->q; struct nvme_ns *ns = q->queuedata; struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_nvm_command c = {}; struct nvme_nvm_bb_tbl *bb_tbl; - u8 *dst_blks = NULL; - int nr_src_blks = nr_dst_blks * nvmdev->plane_mode; - int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_src_blks; + int nr_blks = nvmdev->blks_per_lun * nvmdev->plane_mode; + int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; int ret = 0; c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl; @@ -432,54 +407,43 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, if (!bb_tbl) return -ENOMEM; - dst_blks = kzalloc(nr_dst_blks, GFP_KERNEL); - if (!dst_blks) { - ret = -ENOMEM; - goto out; - } - ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c, bb_tbl, tblsz); if (ret) { - dev_err(ctrl->dev, "get bad block table failed (%d)\n", ret); + dev_err(ctrl->device, "get bad block table failed (%d)\n", ret); ret = -EIO; goto out; } if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' || bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') { - dev_err(ctrl->dev, "bbt format mismatch\n"); + dev_err(ctrl->device, "bbt format mismatch\n"); ret = -EINVAL; goto out; } if (le16_to_cpu(bb_tbl->verid) != 1) { ret = -EINVAL; - dev_err(ctrl->dev, "bbt version not supported\n"); + dev_err(ctrl->device, "bbt version not supported\n"); goto out; } - if (le32_to_cpu(bb_tbl->tblks) != nr_src_blks) { + if (le32_to_cpu(bb_tbl->tblks) != nr_blks) { ret = -EINVAL; - dev_err(ctrl->dev, "bbt unsuspected blocks returned (%u!=%u)", - le32_to_cpu(bb_tbl->tblks), nr_src_blks); + dev_err(ctrl->device, + "bbt unsuspected blocks returned (%u!=%u)", + le32_to_cpu(bb_tbl->tblks), nr_blks); goto out; } - nvme_nvm_bb_tbl_fold(nvmdev, nr_dst_blks, dst_blks, - nr_src_blks, bb_tbl->blk); - - ppa = dev_to_generic_addr(nvmdev, ppa); - ret = update_bbtbl(ppa, nr_dst_blks, dst_blks, priv); - + memcpy(blks, bb_tbl->blk, nvmdev->blks_per_lun * nvmdev->plane_mode); out: - kfree(dst_blks); kfree(bb_tbl); return ret; } -static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd, - int type) +static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, + int nr_ppas, int type) { struct nvme_ns *ns = nvmdev->q->queuedata; struct nvme_nvm_command c = {}; @@ -487,14 +451,15 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd, c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl; c.set_bb.nsid = cpu_to_le32(ns->ns_id); - c.set_bb.spba = cpu_to_le64(rqd->ppa_addr.ppa); - c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1); + c.set_bb.spba = cpu_to_le64(ppas->ppa); + c.set_bb.nlb = cpu_to_le16(nr_ppas - 1); c.set_bb.value = type; ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, NULL, 0); if (ret) - dev_err(ns->ctrl->dev, "set bad block table failed (%d)\n", ret); + dev_err(ns->ctrl->device, "set bad block table failed (%d)\n", + ret); return ret; } @@ -504,8 +469,9 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, c->ph_rw.opcode = rqd->opcode; c->ph_rw.nsid = cpu_to_le32(ns->ns_id); c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa); + c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list); c->ph_rw.control = cpu_to_le16(rqd->flags); - c->ph_rw.length = cpu_to_le16(rqd->nr_pages - 1); + c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1); if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD) c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns, @@ -576,7 +542,7 @@ static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd) c.erase.opcode = NVM_OP_ERASE; c.erase.nsid = cpu_to_le32(ns->ns_id); c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa); - c.erase.length = cpu_to_le16(rqd->nr_pages - 1); + c.erase.length = cpu_to_le16(rqd->nr_ppas - 1); return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); } @@ -601,10 +567,10 @@ static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool, return dma_pool_alloc(pool, mem_flags, dma_handler); } -static void nvme_nvm_dev_dma_free(void *pool, void *ppa_list, +static void nvme_nvm_dev_dma_free(void *pool, void *addr, dma_addr_t dma_handler) { - dma_pool_free(pool, ppa_list, dma_handler); + dma_pool_free(pool, addr, dma_handler); } static struct nvm_dev_ops nvme_nvm_dev_ops = { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f846da4eb338..114b92873894 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -67,7 +67,16 @@ enum nvme_quirks { NVME_QUIRK_DISCARD_ZEROES = (1 << 2), }; +enum nvme_ctrl_state { + NVME_CTRL_NEW, + NVME_CTRL_LIVE, + NVME_CTRL_RESETTING, + NVME_CTRL_DELETING, +}; + struct nvme_ctrl { + enum nvme_ctrl_state state; + spinlock_t lock; const struct nvme_ctrl_ops *ops; struct request_queue *admin_q; struct device *dev; @@ -84,7 +93,7 @@ struct nvme_ctrl { char serial[20]; char model[40]; char firmware_rev[8]; - int cntlid; + u16 cntlid; u32 ctrl_config; @@ -99,6 +108,8 @@ struct nvme_ctrl { u32 vs; bool subsystem; unsigned long quirks; + struct work_struct scan_work; + struct work_struct async_event_work; }; /* @@ -136,9 +147,10 @@ struct nvme_ctrl_ops { int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); - bool (*io_incapable)(struct nvme_ctrl *ctrl); int (*reset_ctrl)(struct nvme_ctrl *ctrl); void (*free_ctrl)(struct nvme_ctrl *ctrl); + void (*post_scan)(struct nvme_ctrl *ctrl); + void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); }; static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) @@ -150,17 +162,6 @@ static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) return val & NVME_CSTS_RDY; } -static inline bool nvme_io_incapable(struct nvme_ctrl *ctrl) -{ - u32 val = 0; - - if (ctrl->ops->io_incapable(ctrl)) - return true; - if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val)) - return true; - return val & NVME_CSTS_CFS; -} - static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) { if (!ctrl->subsystem) @@ -173,57 +174,20 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) return (sector >> (ns->lba_shift - 9)); } -static inline void nvme_setup_flush(struct nvme_ns *ns, - struct nvme_command *cmnd) +static inline unsigned nvme_map_len(struct request *rq) { - memset(cmnd, 0, sizeof(*cmnd)); - cmnd->common.opcode = nvme_cmd_flush; - cmnd->common.nsid = cpu_to_le32(ns->ns_id); + if (rq->cmd_flags & REQ_DISCARD) + return sizeof(struct nvme_dsm_range); + else + return blk_rq_bytes(rq); } -static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req, - struct nvme_command *cmnd) +static inline void nvme_cleanup_cmd(struct request *req) { - u16 control = 0; - u32 dsmgmt = 0; - - if (req->cmd_flags & REQ_FUA) - control |= NVME_RW_FUA; - if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) - control |= NVME_RW_LR; - - if (req->cmd_flags & REQ_RAHEAD) - dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; - - memset(cmnd, 0, sizeof(*cmnd)); - cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); - cmnd->rw.command_id = req->tag; - cmnd->rw.nsid = cpu_to_le32(ns->ns_id); - cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); - cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); - - if (ns->ms) { - switch (ns->pi_type) { - case NVME_NS_DPS_PI_TYPE3: - control |= NVME_RW_PRINFO_PRCHK_GUARD; - break; - case NVME_NS_DPS_PI_TYPE1: - case NVME_NS_DPS_PI_TYPE2: - control |= NVME_RW_PRINFO_PRCHK_GUARD | - NVME_RW_PRINFO_PRCHK_REF; - cmnd->rw.reftag = cpu_to_le32( - nvme_block_nr(ns, blk_rq_pos(req))); - break; - } - if (!blk_integrity_rq(req)) - control |= NVME_RW_PRINFO_PRACT; - } - - cmnd->rw.control = cpu_to_le16(control); - cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); + if (req->cmd_flags & REQ_DISCARD) + kfree(req->completion_data); } - static inline int nvme_error_status(u16 status) { switch (status & 0x7ff) { @@ -242,6 +206,8 @@ static inline bool nvme_req_needs_retry(struct request *req, u16 status) (jiffies - req->start_time) < req->timeout; } +bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, + enum nvme_ctrl_state new_state); int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap); int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); @@ -251,9 +217,14 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_put_ctrl(struct nvme_ctrl *ctrl); int nvme_init_identify(struct nvme_ctrl *ctrl); -void nvme_scan_namespaces(struct nvme_ctrl *ctrl); +void nvme_queue_scan(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); +#define NVME_NR_AERS 1 +void nvme_complete_async_event(struct nvme_ctrl *ctrl, + struct nvme_completion *cqe); +void nvme_queue_async_events(struct nvme_ctrl *ctrl); + void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); @@ -261,6 +232,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl); struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags); void nvme_requeue_req(struct request *req); +int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, + struct nvme_command *cmd); int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4fd733ff72b1..0f093f14d348 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -54,8 +54,7 @@ * We handle AEN commands ourselves and don't even let the * block layer know about them. */ -#define NVME_NR_AEN_COMMANDS 1 -#define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) +#define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AERS) static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0); @@ -92,9 +91,7 @@ struct nvme_dev { struct msix_entry *entry; void __iomem *bar; struct work_struct reset_work; - struct work_struct scan_work; struct work_struct remove_work; - struct work_struct async_work; struct timer_list watchdog_timer; struct mutex shutdown_lock; bool subsystem; @@ -102,11 +99,6 @@ struct nvme_dev { dma_addr_t cmb_dma_addr; u64 cmb_size; u32 cmbsz; - unsigned long flags; - -#define NVME_CTRL_RESETTING 0 -#define NVME_CTRL_REMOVING 1 - struct nvme_ctrl ctrl; struct completion ioq_wait; }; @@ -271,40 +263,6 @@ static int nvme_init_request(void *data, struct request *req, return 0; } -static void nvme_queue_scan(struct nvme_dev *dev) -{ - /* - * Do not queue new scan work when a controller is reset during - * removal. - */ - if (test_bit(NVME_CTRL_REMOVING, &dev->flags)) - return; - queue_work(nvme_workq, &dev->scan_work); -} - -static void nvme_complete_async_event(struct nvme_dev *dev, - struct nvme_completion *cqe) -{ - u16 status = le16_to_cpu(cqe->status) >> 1; - u32 result = le32_to_cpu(cqe->result); - - if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) { - ++dev->ctrl.event_limit; - queue_work(nvme_workq, &dev->async_work); - } - - if (status != NVME_SC_SUCCESS) - return; - - switch (result & 0xff07) { - case NVME_AER_NOTICE_NS_CHANGED: - dev_info(dev->ctrl.device, "rescanning\n"); - nvme_queue_scan(dev); - default: - dev_warn(dev->ctrl.device, "async event result %08x\n", result); - } -} - /** * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use @@ -334,16 +292,11 @@ static __le64 **iod_list(struct request *req) return (__le64 **)(iod->sg + req->nr_phys_segments); } -static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) +static int nvme_init_iod(struct request *rq, unsigned size, + struct nvme_dev *dev) { struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); int nseg = rq->nr_phys_segments; - unsigned size; - - if (rq->cmd_flags & REQ_DISCARD) - size = sizeof(struct nvme_dsm_range); - else - size = blk_rq_bytes(rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); @@ -368,6 +321,8 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) __le64 **list = iod_list(req); dma_addr_t prp_dma = iod->first_dma; + nvme_cleanup_cmd(req); + if (iod->npages == 0) dma_pool_free(dev->prp_small_pool, list[0], prp_dma); for (i = 0; i < iod->npages; i++) { @@ -529,7 +484,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req, } static int nvme_map_data(struct nvme_dev *dev, struct request *req, - struct nvme_command *cmnd) + unsigned size, struct nvme_command *cmnd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct request_queue *q = req->q; @@ -546,7 +501,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req, if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir)) goto out; - if (!nvme_setup_prps(dev, req, blk_rq_bytes(req))) + if (!nvme_setup_prps(dev, req, size)) goto out_unmap; ret = BLK_MQ_RQ_QUEUE_ERROR; @@ -596,37 +551,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) } /* - * We reuse the small pool to allocate the 16-byte range here as it is not - * worth having a special pool for these or additional cases to handle freeing - * the iod. - */ -static int nvme_setup_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, - struct request *req, struct nvme_command *cmnd) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_dsm_range *range; - - range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC, - &iod->first_dma); - if (!range) - return BLK_MQ_RQ_QUEUE_BUSY; - iod_list(req)[0] = (__le64 *)range; - iod->npages = 0; - - range->cattr = cpu_to_le32(0); - range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift); - range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); - - memset(cmnd, 0, sizeof(*cmnd)); - cmnd->dsm.opcode = nvme_cmd_dsm; - cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); - cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma); - cmnd->dsm.nr = 0; - cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - return BLK_MQ_RQ_QUEUE_OK; -} - -/* * NOTE: ns is NULL when called on the admin queue. */ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -637,6 +561,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; struct nvme_command cmnd; + unsigned map_len; int ret = BLK_MQ_RQ_QUEUE_OK; /* @@ -652,23 +577,17 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } } - ret = nvme_init_iod(req, dev); + map_len = nvme_map_len(req); + ret = nvme_init_iod(req, map_len, dev); if (ret) return ret; - if (req->cmd_flags & REQ_DISCARD) { - ret = nvme_setup_discard(nvmeq, ns, req, &cmnd); - } else { - if (req->cmd_type == REQ_TYPE_DRV_PRIV) - memcpy(&cmnd, req->cmd, sizeof(cmnd)); - else if (req->cmd_flags & REQ_FLUSH) - nvme_setup_flush(ns, &cmnd); - else - nvme_setup_rw(ns, req, &cmnd); + ret = nvme_setup_cmd(ns, req, &cmnd); + if (ret) + goto out; - if (req->nr_phys_segments) - ret = nvme_map_data(dev, req, &cmnd); - } + if (req->nr_phys_segments) + ret = nvme_map_data(dev, req, map_len, &cmnd); if (ret) goto out; @@ -764,7 +683,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) */ if (unlikely(nvmeq->qid == 0 && cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) { - nvme_complete_async_event(nvmeq->dev, &cqe); + nvme_complete_async_event(&nvmeq->dev->ctrl, &cqe); continue; } @@ -833,21 +752,18 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) return 0; } -static void nvme_async_event_work(struct work_struct *work) +static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) { - struct nvme_dev *dev = container_of(work, struct nvme_dev, async_work); + struct nvme_dev *dev = to_nvme_dev(ctrl); struct nvme_queue *nvmeq = dev->queues[0]; struct nvme_command c; memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; + c.common.command_id = NVME_AQ_BLKMQ_DEPTH + aer_idx; spin_lock_irq(&nvmeq->q_lock); - while (dev->ctrl.event_limit > 0) { - c.common.command_id = NVME_AQ_BLKMQ_DEPTH + - --dev->ctrl.event_limit; - __nvme_submit_cmd(nvmeq, &c); - } + __nvme_submit_cmd(nvmeq, &c); spin_unlock_irq(&nvmeq->q_lock); } @@ -939,7 +855,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * cancellation error. All outstanding requests are completed on * shutdown, so we return BLK_EH_HANDLED. */ - if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) { + if (dev->ctrl.state == NVME_CTRL_RESETTING) { dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); @@ -1003,16 +919,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) return BLK_EH_RESET_TIMER; } -static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved) +static void nvme_cancel_io(struct request *req, void *data, bool reserved) { - struct nvme_queue *nvmeq = data; int status; if (!blk_mq_request_started(req)) return; - dev_dbg_ratelimited(nvmeq->dev->ctrl.device, - "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid); + dev_dbg_ratelimited(((struct nvme_dev *) data)->ctrl.device, + "Cancelling I/O %d", req->tag); status = NVME_SC_ABORT_REQ; if (blk_queue_dying(req->q)) @@ -1069,14 +984,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) return 0; } -static void nvme_clear_queue(struct nvme_queue *nvmeq) -{ - spin_lock_irq(&nvmeq->q_lock); - if (nvmeq->tags && *nvmeq->tags) - blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq); - spin_unlock_irq(&nvmeq->q_lock); -} - static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = dev->queues[0]; @@ -1350,22 +1257,44 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; } +static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) +{ + + /* If true, indicates loss of adapter communication, possibly by a + * NVMe Subsystem reset. + */ + bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); + + /* If there is a reset ongoing, we shouldn't reset again. */ + if (work_busy(&dev->reset_work)) + return false; + + /* We shouldn't reset unless the controller is on fatal error state + * _or_ if we lost the communication with it. + */ + if (!(csts & NVME_CSTS_CFS) && !nssro) + return false; + + /* If PCI error recovery process is happening, we cannot reset or + * the recovery mechanism will surely fail. + */ + if (pci_channel_offline(to_pci_dev(dev->dev))) + return false; + + return true; +} + static void nvme_watchdog_timer(unsigned long data) { struct nvme_dev *dev = (struct nvme_dev *)data; u32 csts = readl(dev->bar + NVME_REG_CSTS); - /* - * Skip controllers currently under reset. - */ - if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) && - ((csts & NVME_CSTS_CFS) || - (dev->subsystem && (csts & NVME_CSTS_NSSRO)))) { - if (queue_work(nvme_workq, &dev->reset_work)) { + /* Skip controllers under certain specific conditions. */ + if (nvme_should_reset(dev, csts)) { + if (queue_work(nvme_workq, &dev->reset_work)) dev_warn(dev->dev, "Failed status: 0x%x, reset controller.\n", csts); - } return; } @@ -1551,8 +1480,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return result; } -static void nvme_set_irq_hints(struct nvme_dev *dev) +static void nvme_pci_post_scan(struct nvme_ctrl *ctrl) { + struct nvme_dev *dev = to_nvme_dev(ctrl); struct nvme_queue *nvmeq; int i; @@ -1567,16 +1497,6 @@ static void nvme_set_irq_hints(struct nvme_dev *dev) } } -static void nvme_dev_scan(struct work_struct *work) -{ - struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work); - - if (!dev->tagset.tags) - return; - nvme_scan_namespaces(&dev->ctrl); - nvme_set_irq_hints(dev); -} - static void nvme_del_queue_end(struct request *req, int error) { struct nvme_queue *nvmeq = req->end_io_data; @@ -1592,7 +1512,13 @@ static void nvme_del_cq_end(struct request *req, int error) if (!error) { unsigned long flags; - spin_lock_irqsave(&nvmeq->q_lock, flags); + /* + * We might be called with the AQ q_lock held + * and the I/O queue q_lock should always + * nest inside the AQ one. + */ + spin_lock_irqsave_nested(&nvmeq->q_lock, flags, + SINGLE_DEPTH_NESTING); nvme_process_cq(nvmeq); spin_unlock_irqrestore(&nvmeq->q_lock, flags); } @@ -1684,7 +1610,6 @@ static int nvme_dev_add(struct nvme_dev *dev) nvme_free_queues(dev, dev->online_queues); } - nvme_queue_scan(dev); return 0; } @@ -1797,8 +1722,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) } nvme_pci_disable(dev); - for (i = dev->queue_count - 1; i >= 0; i--) - nvme_clear_queue(dev->queues[i]); + blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_io, dev); + blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_io, dev); mutex_unlock(&dev->shutdown_lock); } @@ -1854,7 +1779,7 @@ static void nvme_reset_work(struct work_struct *work) struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); int result = -ENODEV; - if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags))) + if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) goto out; /* @@ -1864,11 +1789,9 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); - if (test_bit(NVME_CTRL_REMOVING, &dev->flags)) + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) goto out; - set_bit(NVME_CTRL_RESETTING, &dev->flags); - result = nvme_pci_enable(dev); if (result) goto out; @@ -1890,8 +1813,14 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; - dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS; - queue_work(nvme_workq, &dev->async_work); + /* + * A controller that can not execute IO typically requires user + * intervention to correct. For such degraded controllers, the driver + * should not submit commands the user did not request, so skip + * registering for asynchronous event notification on this condition. + */ + if (dev->online_queues > 1) + nvme_queue_async_events(&dev->ctrl); mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ)); @@ -1901,13 +1830,20 @@ static void nvme_reset_work(struct work_struct *work) */ if (dev->online_queues < 2) { dev_warn(dev->ctrl.device, "IO queues not created\n"); + nvme_kill_queues(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); } else { nvme_start_queues(&dev->ctrl); nvme_dev_add(dev); } - clear_bit(NVME_CTRL_RESETTING, &dev->flags); + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { + dev_warn(dev->ctrl.device, "failed to mark controller live\n"); + goto out; + } + + if (dev->online_queues > 1) + nvme_queue_scan(&dev->ctrl); return; out: @@ -1955,13 +1891,6 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) return 0; } -static bool nvme_pci_io_incapable(struct nvme_ctrl *ctrl) -{ - struct nvme_dev *dev = to_nvme_dev(ctrl); - - return !dev->bar || dev->online_queues < 2; -} - static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) { return nvme_reset(to_nvme_dev(ctrl)); @@ -1972,9 +1901,10 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .reg_read32 = nvme_pci_reg_read32, .reg_write32 = nvme_pci_reg_write32, .reg_read64 = nvme_pci_reg_read64, - .io_incapable = nvme_pci_io_incapable, .reset_ctrl = nvme_pci_reset_ctrl, .free_ctrl = nvme_pci_free_ctrl, + .post_scan = nvme_pci_post_scan, + .submit_async_event = nvme_pci_submit_async_event, }; static int nvme_dev_map(struct nvme_dev *dev) @@ -2026,10 +1956,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto free; - INIT_WORK(&dev->scan_work, nvme_dev_scan); INIT_WORK(&dev->reset_work, nvme_reset_work); INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); - INIT_WORK(&dev->async_work, nvme_async_event_work); setup_timer(&dev->watchdog_timer, nvme_watchdog_timer, (unsigned long)dev); mutex_init(&dev->shutdown_lock); @@ -2086,15 +2014,12 @@ static void nvme_remove(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - set_bit(NVME_CTRL_REMOVING, &dev->flags); + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); + pci_set_drvdata(pdev, NULL); - flush_work(&dev->async_work); flush_work(&dev->reset_work); - flush_work(&dev->scan_work); - nvme_remove_namespaces(&dev->ctrl); nvme_uninit_ctrl(&dev->ctrl); nvme_dev_disable(dev, true); - flush_work(&dev->reset_work); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); nvme_release_cmb(dev); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 69b0a4a7a15f..428c03ef02b2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -137,15 +137,15 @@ static const char *sd_cache_types[] = { static void sd_set_flush_flag(struct scsi_disk *sdkp) { - unsigned flush = 0; + bool wc = false, fua = false; if (sdkp->WCE) { - flush |= REQ_FLUSH; + wc = true; if (sdkp->DPOFUA) - flush |= REQ_FUA; + fua = true; } - blk_queue_flush(sdkp->disk->queue, flush); + blk_queue_write_cache(sdkp->disk->queue, wc, fua); } static ssize_t diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 026a758e5778..7c4efb4417b0 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -687,10 +687,10 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, * Force writethrough using WRITE_FUA if a volatile write cache * is not enabled, or if initiator set the Force Unit Access bit. */ - if (q->flush_flags & REQ_FUA) { + if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) { if (cmd->se_cmd_flags & SCF_FUA) rw = WRITE_FUA; - else if (!(q->flush_flags & REQ_FLUSH)) + else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) rw = WRITE_FUA; else rw = WRITE; @@ -836,7 +836,7 @@ static bool iblock_get_write_cache(struct se_device *dev) struct block_device *bd = ib_dev->ibd_bd; struct request_queue *q = bdev_get_queue(bd); - return q->flush_flags & REQ_FLUSH; + return test_bit(QUEUE_FLAG_WC, &q->queue_flags); } static const struct target_backend_ops iblock_ops = { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c808fec1ce44..2498fdf3a503 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -238,8 +238,6 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, - void *priv); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b79131acf6c0..1fd8fdff2f81 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -433,8 +433,6 @@ struct request_queue { /* * for flush operations */ - unsigned int flush_flags; - unsigned int flush_not_queueable:1; struct blk_flush_queue *fq; struct list_head requeue_list; @@ -493,6 +491,7 @@ struct request_queue { #define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 23 /* Write back caching */ #define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ +#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -1009,7 +1008,6 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); -extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); @@ -1368,7 +1366,7 @@ static inline unsigned int block_size(struct block_device *bdev) static inline bool queue_flush_queueable(struct request_queue *q) { - return !q->flush_not_queueable; + return !test_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); } typedef struct {struct page *v;} Sector; diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index cdcb2ccbefa8..ef2c7d2e76c4 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -18,7 +18,7 @@ enum { #define NVM_SEC_BITS (8) #define NVM_PL_BITS (8) #define NVM_LUN_BITS (8) -#define NVM_CH_BITS (8) +#define NVM_CH_BITS (7) struct ppa_addr { /* Generic structure for all addresses */ @@ -30,8 +30,14 @@ struct ppa_addr { u64 pl : NVM_PL_BITS; u64 lun : NVM_LUN_BITS; u64 ch : NVM_CH_BITS; + u64 reserved : 1; } g; + struct { + u64 line : 63; + u64 is_cached : 1; + } c; + u64 ppa; }; }; @@ -41,13 +47,11 @@ struct nvm_id; struct nvm_dev; typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); -typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *, void *); -typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, - nvm_bb_update_fn *, void *); -typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); +typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); +typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); @@ -202,6 +206,7 @@ struct nvm_id { struct nvm_target { struct list_head list; + struct nvm_dev *dev; struct nvm_tgt_type *type; struct gendisk *disk; }; @@ -232,14 +237,14 @@ struct nvm_rq { struct ppa_addr *ppa_list; - void *metadata; - dma_addr_t dma_metadata; + void *meta_list; + dma_addr_t dma_meta_list; struct completion *wait; nvm_end_io_fn *end_io; uint8_t opcode; - uint16_t nr_pages; + uint16_t nr_ppas; uint16_t flags; u64 ppa_status; /* ppa media status */ @@ -307,7 +312,6 @@ struct nvm_dev { struct nvm_dev_ops *ops; struct list_head devices; - struct list_head online_targets; /* Media manager */ struct nvmm_type *mt; @@ -323,6 +327,8 @@ struct nvm_dev { int sec_per_pg; /* only sectors for a single page */ int pgs_per_blk; int blks_per_lun; + int fpg_size; + int pfpg_size; /* size of buffer if all pages are to be read */ int sec_size; int oob_size; int mccap; @@ -345,10 +351,9 @@ struct nvm_dev { unsigned long total_blocks; unsigned long total_secs; int nr_luns; - unsigned max_pages_per_blk; unsigned long *lun_map; - void *ppalist_pool; + void *dma_pool; struct nvm_id identity; @@ -450,8 +455,8 @@ struct nvm_tgt_type { struct list_head list; }; -extern int nvm_register_target(struct nvm_tgt_type *); -extern void nvm_unregister_target(struct nvm_tgt_type *); +extern int nvm_register_tgt_type(struct nvm_tgt_type *); +extern void nvm_unregister_tgt_type(struct nvm_tgt_type *); extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); @@ -467,6 +472,7 @@ typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *); typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, unsigned long); +typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int); typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int); typedef int (nvmm_reserve_lun)(struct nvm_dev *, int); typedef void (nvmm_release_lun)(struct nvm_dev *, int); @@ -494,6 +500,9 @@ struct nvmm_type { nvmm_submit_io_fn *submit_io; nvmm_erase_blk_fn *erase_blk; + /* Bad block mgmt */ + nvmm_mark_blk_fn *mark_blk; + /* Configuration management */ nvmm_get_lun_fn *get_lun; nvmm_reserve_lun *reserve_lun; @@ -527,13 +536,17 @@ extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *); extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *); extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *); extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, - struct ppa_addr *, int); + struct ppa_addr *, int, int); extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int); extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *); extern void nvm_end_io(struct nvm_rq *, int); extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int, void *, int); +extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int, + int, void *, int); +extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); +extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *); /* sysblk.c */ #define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */ @@ -554,6 +567,13 @@ extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *); extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *); extern int nvm_dev_factory(struct nvm_dev *, int flags); + +#define nvm_for_each_lun_ppa(dev, ppa, chid, lunid) \ + for ((chid) = 0, (ppa).ppa = 0; (chid) < (dev)->nr_chnls; \ + (chid)++, (ppa).g.ch = (chid)) \ + for ((lunid) = 0; (lunid) < (dev)->luns_per_chnl; \ + (lunid)++, (ppa).g.lun = (lunid)) + #else /* CONFIG_NVM */ struct nvm_dev_ops; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a55986f6fe38..7d51b2904cb7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -21,13 +21,13 @@ enum { NVME_REG_CAP = 0x0000, /* Controller Capabilities */ NVME_REG_VS = 0x0008, /* Version */ NVME_REG_INTMS = 0x000c, /* Interrupt Mask Set */ - NVME_REG_INTMC = 0x0010, /* Interrupt Mask Set */ + NVME_REG_INTMC = 0x0010, /* Interrupt Mask Clear */ NVME_REG_CC = 0x0014, /* Controller Configuration */ NVME_REG_CSTS = 0x001c, /* Controller Status */ NVME_REG_NSSR = 0x0020, /* NVM Subsystem Reset */ NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */ NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */ - NVME_REG_ACQ = 0x0030, /* Admin SQ Base Address */ + NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ }; |