From aef25338226660cdd4df908c2eff1abdcfca65e5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 12 Feb 2016 17:01:11 -0800 Subject: libnvdimm, nfit: centralize command status translation The return value from an 'ndctl_fn' reports the command execution status, i.e. was the command properly formatted and was it successfully submitted to the bus provider. The new 'cmd_rc' parameter allows the bus provider to communicate command specific results, translated into common error codes. Convert the ARS commands to this scheme to: 1/ Consolidate status reporting 2/ Prepare for for expanding ars unit test cases 3/ Make the implementation more generic Cc: Vishal Verma Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 141ffdd59960..f398953270d1 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -48,7 +48,7 @@ struct nvdimm; struct nvdimm_bus_descriptor; typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len); + unsigned int buf_len, int *cmd_rc); struct nd_namespace_label; struct nvdimm_drvdata; -- cgit v1.2.3 From 719994660c249a086a7493205c7f1562e30c38cb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Feb 2016 10:29:49 -0800 Subject: libnvdimm: async notification support In preparation for asynchronous address range scrub support add an ability for the pmem driver to dynamically consume address range scrub results. Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 26 ++++++++++++++++++++++++++ drivers/nvdimm/nd.h | 2 ++ drivers/nvdimm/pmem.c | 15 +++++++++++++++ drivers/nvdimm/region.c | 12 ++++++++++++ include/linux/nd.h | 7 +++++++ 5 files changed, 62 insertions(+) (limited to 'include') diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index c3ba888e3e3a..2508251439e7 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -133,6 +133,32 @@ static int nvdimm_bus_remove(struct device *dev) return rc; } +void nd_device_notify(struct device *dev, enum nvdimm_event event) +{ + device_lock(dev); + if (dev->driver) { + struct nd_device_driver *nd_drv; + + nd_drv = to_nd_device_driver(dev->driver); + if (nd_drv->notify) + nd_drv->notify(dev, event); + } + device_unlock(dev); +} +EXPORT_SYMBOL(nd_device_notify); + +void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + + if (!nvdimm_bus) + return; + + /* caller is responsible for holding a reference on the device */ + nd_device_notify(&nd_region->dev, event); +} +EXPORT_SYMBOL_GPL(nvdimm_region_notify); + static struct bus_type nvdimm_bus_type = { .name = "nd", .uevent = nvdimm_bus_uevent, diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index ba1633b9da31..78b82f6dd191 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "label.h" enum { @@ -168,6 +169,7 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size); void wait_nvdimm_bus_probe_idle(struct device *dev); void nd_device_register(struct device *dev); void nd_device_unregister(struct device *dev, enum nd_async_mode mode); +void nd_device_notify(struct device *dev, enum nvdimm_event event); int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, size_t len); ssize_t nd_sector_size_show(unsigned long current_lbasize, diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 8d0b54670184..efc2a5e671c6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -488,12 +488,27 @@ static int nd_pmem_remove(struct device *dev) return 0; } +static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +{ + struct pmem_device *pmem = dev_get_drvdata(dev); + struct nd_namespace_common *ndns = pmem->ndns; + + if (event != NVDIMM_REVALIDATE_POISON) + return; + + if (is_nd_btt(dev)) + nvdimm_namespace_add_poison(ndns, &pmem->bb, 0); + else + nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset); +} + MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); static struct nd_device_driver nd_pmem_driver = { .probe = nd_pmem_probe, .remove = nd_pmem_remove, + .notify = nd_pmem_notify, .drv = { .name = "nd_pmem", }, diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 7da63eac78ee..4b7715e29cff 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -93,9 +93,21 @@ static int nd_region_remove(struct device *dev) return 0; } +static int child_notify(struct device *dev, void *data) +{ + nd_device_notify(dev, *(enum nvdimm_event *) data); + return 0; +} + +static void nd_region_notify(struct device *dev, enum nvdimm_event event) +{ + device_for_each_child(dev, &event, child_notify); +} + static struct nd_device_driver nd_region_driver = { .probe = nd_region_probe, .remove = nd_region_remove, + .notify = nd_region_notify, .drv = { .name = "nd_region", }, diff --git a/include/linux/nd.h b/include/linux/nd.h index 507e47c86737..5489ab756d1a 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -16,11 +16,16 @@ #include #include +enum nvdimm_event { + NVDIMM_REVALIDATE_POISON, +}; + struct nd_device_driver { struct device_driver drv; unsigned long type; int (*probe)(struct device *dev); int (*remove)(struct device *dev); + void (*notify)(struct device *dev, enum nvdimm_event event); }; static inline struct nd_device_driver *to_nd_device_driver( @@ -144,6 +149,8 @@ static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns, MODULE_ALIAS("nd:t" __stringify(type) "*") #define ND_DEVICE_MODALIAS_FMT "nd:t%d" +struct nd_region; +void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event); int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, struct module *module, const char *mod_name); #define nd_driver_register(driver) \ -- cgit v1.2.3 From 7ae0fa439faff000744b234d04cb470bfd83593b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 19 Feb 2016 12:16:34 -0800 Subject: nfit, libnvdimm: async region scrub workqueue Introduce a workqueue that will be used to run address range scrub asynchronously with the rest of nvdimm device probing. Userspace still wants notification when probing operations complete, so introduce a new callback to flush this workqueue when userspace is awaiting probe completion. Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/acpi/nfit.h | 3 +++ drivers/nvdimm/core.c | 9 +++++++++ include/linux/libnvdimm.h | 1 + 4 files changed, 62 insertions(+) (limited to 'include') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 76c9444c9c60..4aa2bd54fc1d 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -34,6 +34,8 @@ static bool force_enable_dimms; module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); +static struct workqueue_struct *nfit_wq; + struct nfit_table_prev { struct list_head spas; struct list_head memdevs; @@ -1961,6 +1963,39 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) } EXPORT_SYMBOL_GPL(acpi_nfit_init); +struct acpi_nfit_flush_work { + struct work_struct work; + struct completion cmp; +}; + +static void flush_probe(struct work_struct *work) +{ + struct acpi_nfit_flush_work *flush; + + flush = container_of(work, typeof(*flush), work); + complete(&flush->cmp); +} + +static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct device *dev = acpi_desc->dev; + struct acpi_nfit_flush_work flush; + + /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ + device_lock(dev); + device_unlock(dev); + + /* + * Scrub work could take 10s of seconds, userspace may give up so we + * need to be interruptible while waiting. + */ + INIT_WORK_ONSTACK(&flush.work, flush_probe); + COMPLETION_INITIALIZER_ONSTACK(flush.cmp); + queue_work(nfit_wq, &flush.work); + return wait_for_completion_interruptible(&flush.cmp); +} + void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) { struct nvdimm_bus_descriptor *nd_desc; @@ -1971,6 +2006,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) nd_desc = &acpi_desc->nd_desc; nd_desc->provider_name = "ACPI.NFIT"; nd_desc->ndctl = acpi_nfit_ctl; + nd_desc->flush_probe = acpi_nfit_flush_probe; nd_desc->attr_groups = acpi_nfit_attribute_groups; INIT_LIST_HEAD(&acpi_desc->spa_maps); @@ -2048,6 +2084,8 @@ static int acpi_nfit_remove(struct acpi_device *adev) { struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + acpi_desc->cancel = 1; + flush_workqueue(nfit_wq); nvdimm_bus_unregister(acpi_desc->nvdimm_bus); return 0; } @@ -2079,6 +2117,12 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); if (!acpi_desc->nvdimm_bus) goto out_unlock; + } else { + /* + * Finish previous registration before considering new + * regions. + */ + flush_workqueue(nfit_wq); } /* Evaluate _FIT */ @@ -2146,12 +2190,17 @@ static __init int nfit_init(void) acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); + nfit_wq = create_singlethread_workqueue("nfit"); + if (!nfit_wq) + return -ENOMEM; + return acpi_bus_register_driver(&acpi_nfit_driver); } static __exit void nfit_exit(void) { acpi_bus_unregister_driver(&acpi_nfit_driver); + destroy_workqueue(nfit_wq); } module_init(nfit_init); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 524dec0a3adb..e8388fee4cc6 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -14,6 +14,7 @@ */ #ifndef __NFIT_H__ #define __NFIT_H__ +#include #include #include #include @@ -123,6 +124,8 @@ struct acpi_nfit_desc { struct list_head idts; struct nvdimm_bus *nvdimm_bus; struct device *dev; + struct work_struct work; + unsigned int cancel:1; unsigned long dimm_dsm_force_en; unsigned long bus_dsm_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index f309e6bf6833..79646d0c3277 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -298,6 +298,15 @@ static int flush_regions_dimms(struct device *dev, void *data) static ssize_t wait_probe_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + int rc; + + if (nd_desc->flush_probe) { + rc = nd_desc->flush_probe(nd_desc); + if (rc) + return rc; + } nd_synchronize(); device_for_each_child(dev, NULL, flush_regions_dimms); return sprintf(buf, "1\n"); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index f398953270d1..2299f8742f7f 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -71,6 +71,7 @@ struct nvdimm_bus_descriptor { unsigned long dsm_mask; char *provider_name; ndctl_fn ndctl; + int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); }; struct nd_cmd_desc { -- cgit v1.2.3 From 87bf572e19a092cc9cc77d5a00d543a2b628c142 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 22 Feb 2016 21:50:31 -0800 Subject: nfit: disable userspace initiated ars during scrub While the nfit driver is issuing address range scrub commands and reaping the results do not permit an ars_start command issued from userspace. The scrub thread assumes that all ars completions are for scrubs initiated by platform firmware at boot, or by the nfit driver. Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 23 +++++++++++++++++++++++ drivers/nvdimm/bus.c | 18 +++++++++++++----- include/linux/libnvdimm.h | 2 ++ 3 files changed, 38 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 3646501b01d7..0def4ebf5d43 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -2186,6 +2186,28 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) return wait_for_completion_interruptible(&flush.cmp); } +static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + + if (nvdimm) + return 0; + if (cmd != ND_CMD_ARS_START) + return 0; + + /* + * The kernel and userspace may race to initiate a scrub, but + * the scrub thread is prepared to lose that initial race. It + * just needs guarantees that any ars it initiates are not + * interrupted by any intervening start reqeusts from userspace. + */ + if (work_busy(&acpi_desc->work)) + return -EBUSY; + + return 0; +} + void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) { struct nvdimm_bus_descriptor *nd_desc; @@ -2197,6 +2219,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) nd_desc->provider_name = "ACPI.NFIT"; nd_desc->ndctl = acpi_nfit_ctl; nd_desc->flush_probe = acpi_nfit_flush_probe; + nd_desc->clear_to_send = acpi_nfit_clear_to_send; nd_desc->attr_groups = acpi_nfit_attribute_groups; INIT_LIST_HEAD(&acpi_desc->spa_maps); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2508251439e7..228c0e9f430e 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -490,16 +490,24 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) } /* set_config requires an idle interleave set */ -static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd) +static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, + struct nvdimm *nvdimm, unsigned int cmd) { - struct nvdimm_bus *nvdimm_bus; + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + + /* ask the bus provider if it would like to block this request */ + if (nd_desc->clear_to_send) { + int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd); + + if (rc) + return rc; + } if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) return 0; - nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev); + /* prevent label manipulation while the kernel owns label updates */ wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev); - if (atomic_read(&nvdimm->busy)) return -EBUSY; return 0; @@ -609,7 +617,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } nvdimm_bus_lock(&nvdimm_bus->dev); - rc = nd_cmd_clear_to_send(nvdimm, cmd); + rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd); if (rc) goto out_unlock; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 2299f8742f7f..833867b9ddc2 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -72,6 +72,8 @@ struct nvdimm_bus_descriptor { char *provider_name; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); + int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd); }; struct nd_cmd_desc { -- cgit v1.2.3 From d4f323672aa63713b7ca26da418f66cc30d3a41a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Mar 2016 16:08:54 -0800 Subject: nfit, libnvdimm: clear poison command support Add the boiler-plate for a 'clear error' command based on section 9.20.7.6 "Function Index 4 - Clear Uncorrectable Error" from the ACPI 6.1 specification, and add a reference implementation in nfit_test. Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 12 +++++++++++- drivers/nvdimm/bus.c | 19 +++++++++++++++++++ include/uapi/linux/ndctl.h | 13 +++++++++++++ tools/testing/nvdimm/test/nfit.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 0def4ebf5d43..c067d7414007 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -87,6 +87,7 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) static int xlat_status(void *buf, unsigned int cmd) { + struct nd_cmd_clear_error *clear_err; struct nd_cmd_ars_status *ars_status; struct nd_cmd_ars_start *ars_start; struct nd_cmd_ars_cap *ars_cap; @@ -149,6 +150,15 @@ static int xlat_status(void *buf, unsigned int cmd) if (ars_status->status >> 16) return -EIO; break; + case ND_CMD_CLEAR_ERROR: + clear_err = buf; + if (clear_err->status & 0xffff) + return -EIO; + if (!clear_err->cleared) + return -EIO; + if (clear_err->length > clear_err->cleared) + return clear_err->cleared; + break; default: break; } @@ -1002,7 +1012,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) if (!adev) return; - for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++) + for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) set_bit(i, &nd_desc->dsm_mask); } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2e9ac22595ec..cb6fd64b13e3 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -421,6 +421,12 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = { .out_num = 3, .out_sizes = { 4, 4, UINT_MAX, }, }, + [ND_CMD_CLEAR_ERROR] = { + .in_num = 2, + .in_sizes = { 8, 8, }, + .out_num = 3, + .out_sizes = { 4, 4, 8, }, + }, }; const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) @@ -489,6 +495,13 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) } while (true); } +static int pmem_active(struct device *dev, void *data) +{ + if (is_nd_pmem(dev) && dev->driver) + return -EBUSY; + return 0; +} + /* set_config requires an idle interleave set */ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, unsigned int cmd) @@ -503,6 +516,11 @@ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, return rc; } + /* require clear error to go through the pmem driver */ + if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR) + return device_for_each_child(&nvdimm_bus->dev, NULL, + pmem_active); + if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) return 0; @@ -551,6 +569,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, case ND_CMD_VENDOR: case ND_CMD_SET_CONFIG_DATA: case ND_CMD_ARS_START: + case ND_CMD_CLEAR_ERROR: dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", nvdimm ? nvdimm_cmd_name(cmd) : nvdimm_bus_cmd_name(cmd)); diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index cc68b92124d4..7cc28ab05b87 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -98,6 +98,14 @@ struct nd_cmd_ars_status { } __packed records[0]; } __packed; +struct nd_cmd_clear_error { + __u64 address; + __u64 length; + __u32 status; + __u8 reserved[4]; + __u64 cleared; +} __packed; + enum { ND_CMD_IMPLEMENTED = 0, @@ -105,6 +113,7 @@ enum { ND_CMD_ARS_CAP = 1, ND_CMD_ARS_START = 2, ND_CMD_ARS_STATUS = 3, + ND_CMD_CLEAR_ERROR = 4, /* per-dimm commands */ ND_CMD_SMART = 1, @@ -129,6 +138,7 @@ static inline const char *nvdimm_bus_cmd_name(unsigned cmd) [ND_CMD_ARS_CAP] = "ars_cap", [ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_STATUS] = "ars_status", + [ND_CMD_CLEAR_ERROR] = "clear_error", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) @@ -187,6 +197,9 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_IOCTL_ARS_STATUS _IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\ struct nd_cmd_ars_status) +#define ND_IOCTL_CLEAR_ERROR _IOWR(ND_IOCTL, ND_CMD_CLEAR_ERROR,\ + struct nd_cmd_clear_error) + #define ND_DEVICE_DIMM 1 /* nd_dimm: container for "config data" */ #define ND_DEVICE_REGION_PMEM 2 /* nd_region: (parent of PMEM namespaces) */ #define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 1555c09efba1..3187322eeed7 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -223,6 +223,7 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd, } #define NFIT_TEST_ARS_RECORDS 4 +#define NFIT_TEST_CLEAR_ERR_UNIT 256 static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, unsigned int buf_len) @@ -233,6 +234,7 @@ static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status) + NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record); nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16; + nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT; return 0; } @@ -306,6 +308,28 @@ static int nfit_test_cmd_ars_status(struct ars_state *ars_state, return 0; } +static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err, + unsigned int buf_len, int *cmd_rc) +{ + const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1; + if (buf_len < sizeof(*clear_err)) + return -EINVAL; + + if ((clear_err->address & mask) || (clear_err->length & mask)) + return -EINVAL; + + /* + * Report 'all clear' success for all commands even though a new + * scrub will find errors again. This is enough to have the + * error removed from the 'badblocks' tracking in the pmem + * driver. + */ + clear_err->status = 0; + clear_err->cleared = clear_err->length; + *cmd_rc = 0; + return 0; +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -365,6 +389,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len, cmd_rc); break; + case ND_CMD_CLEAR_ERROR: + rc = nfit_test_cmd_clear_error(buf, buf_len, cmd_rc); + break; default: return -ENOTTY; } @@ -1230,6 +1257,7 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1290,6 +1318,7 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, -- cgit v1.2.3 From ff3cc952d3f009e6c376cc40651b87187ce364a6 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 9 Mar 2016 12:47:04 -0700 Subject: resource: Add remove_resource interface insert_resource() and insert_resource_conflict() are called by resource producers to insert a new resource. When there is any conflict, they move conflicting resources down to the children of the new resource. There is no destructor of these interfaces, however. Add remove_resource(), which removes a resource previously inserted by insert_resource() or insert_resource_conflict(), and moves the children up to where they were before. __release_resource() is changed to have @release_child, so that this function can be used for remove_resource() as well. Also add comments to clarify that these functions are intended for producers of resources to avoid any confusion with request/release_resource() for consumers. Signed-off-by: Toshi Kani Cc: Ingo Molnar Cc: Borislav Petkov Cc: Andrew Morton Cc: Dan Williams Signed-off-by: Dan Williams --- include/linux/ioport.h | 1 + kernel/resource.c | 51 +++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index afb45597fb5f..8017b8bf45fa 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -174,6 +174,7 @@ extern void reserve_region_with_split(struct resource *root, extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); +extern int remove_resource(struct resource *old); extern void arch_remove_reservations(struct resource *avail); extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, diff --git a/kernel/resource.c b/kernel/resource.c index 5a56e8f24058..effb6ee2c3e8 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -233,9 +233,9 @@ static struct resource * __request_resource(struct resource *root, struct resour } } -static int __release_resource(struct resource *old) +static int __release_resource(struct resource *old, bool release_child) { - struct resource *tmp, **p; + struct resource *tmp, **p, *chd; p = &old->parent->child; for (;;) { @@ -243,7 +243,17 @@ static int __release_resource(struct resource *old) if (!tmp) break; if (tmp == old) { - *p = tmp->sibling; + if (release_child || !(tmp->child)) { + *p = tmp->sibling; + } else { + for (chd = tmp->child;; chd = chd->sibling) { + chd->parent = tmp->parent; + if (!(chd->sibling)) + break; + } + *p = tmp->child; + chd->sibling = tmp->sibling; + } old->parent = NULL; return 0; } @@ -325,7 +335,7 @@ int release_resource(struct resource *old) int retval; write_lock(&resource_lock); - retval = __release_resource(old); + retval = __release_resource(old, true); write_unlock(&resource_lock); return retval; } @@ -679,7 +689,7 @@ static int reallocate_resource(struct resource *root, struct resource *old, old->start = new.start; old->end = new.end; } else { - __release_resource(old); + __release_resource(old, true); *old = new; conflict = __request_resource(root, old); BUG_ON(conflict); @@ -825,6 +835,9 @@ static struct resource * __insert_resource(struct resource *parent, struct resou * entirely fit within the range of the new resource, then the new * resource is inserted and the conflicting resources become children of * the new resource. + * + * This function is intended for producers of resources, such as FW modules + * and bus drivers. */ struct resource *insert_resource_conflict(struct resource *parent, struct resource *new) { @@ -842,6 +855,9 @@ struct resource *insert_resource_conflict(struct resource *parent, struct resour * @new: new resource to insert * * Returns 0 on success, -EBUSY if the resource can't be inserted. + * + * This function is intended for producers of resources, such as FW modules + * and bus drivers. */ int insert_resource(struct resource *parent, struct resource *new) { @@ -885,6 +901,31 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new) write_unlock(&resource_lock); } +/** + * remove_resource - Remove a resource in the resource tree + * @old: resource to remove + * + * Returns 0 on success, -EINVAL if the resource is not valid. + * + * This function removes a resource previously inserted by insert_resource() + * or insert_resource_conflict(), and moves the children (if any) up to + * where they were before. insert_resource() and insert_resource_conflict() + * insert a new resource, and move any conflicting resources down to the + * children of the new resource. + * + * insert_resource(), insert_resource_conflict() and remove_resource() are + * intended for producers of resources, such as FW modules and bus drivers. + */ +int remove_resource(struct resource *old) +{ + int retval; + + write_lock(&resource_lock); + retval = __release_resource(old, false); + write_unlock(&resource_lock); + return retval; +} + static int __adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) { -- cgit v1.2.3 From 59e6473980f321c16299e12db69d1fabc2644a6f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Mar 2016 07:16:07 -0800 Subject: libnvdimm, pmem: clear poison on write If a write is directed at a known bad block perform the following: 1/ write the data 2/ send a clear poison command 3/ invalidate the poison out of the cache hierarchy Cc: Cc: Ross Zwisler Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- arch/x86/include/asm/pmem.h | 5 +++++ drivers/nvdimm/bus.c | 46 +++++++++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/nd.h | 2 ++ drivers/nvdimm/pmem.c | 29 +++++++++++++++++++++++++++- include/linux/pmem.h | 19 +++++++++++++++++++ 5 files changed, 100 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index c57fd1ea9689..bf8b35d2035a 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -137,6 +137,11 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) arch_wb_cache_pmem(addr, size); } +static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +{ + clflush_cache_range((void __force *) addr, size); +} + static inline bool __arch_has_wmb_pmem(void) { /* diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index cb6fd64b13e3..33557481d452 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -159,6 +159,52 @@ void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) } EXPORT_SYMBOL_GPL(nvdimm_region_notify); +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc; + struct nd_cmd_clear_error clear_err; + struct nd_cmd_ars_cap ars_cap; + u32 clear_err_unit, mask; + int cmd_rc, rc; + + if (!nvdimm_bus) + return -ENXIO; + + nd_desc = nvdimm_bus->nd_desc; + if (!nd_desc->ndctl) + return -ENXIO; + + memset(&ars_cap, 0, sizeof(ars_cap)); + ars_cap.address = phys; + ars_cap.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap, + sizeof(ars_cap), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + clear_err_unit = ars_cap.clear_err_unit; + if (!clear_err_unit || !is_power_of_2(clear_err_unit)) + return -ENXIO; + + mask = clear_err_unit - 1; + if ((phys | len) & mask) + return -ENXIO; + memset(&clear_err, 0, sizeof(clear_err)); + clear_err.address = phys; + clear_err.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err, + sizeof(clear_err), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + return clear_err.cleared; +} +EXPORT_SYMBOL_GPL(nvdimm_clear_poison); + static struct bus_type nvdimm_bus_type = { .name = "nd", .uevent = nvdimm_bus_uevent, diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 78b82f6dd191..1799bd97a9ce 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -186,6 +186,8 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len); struct nd_btt *to_nd_btt(struct device *dev); struct nd_gen_sb { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index e7b86a7fca0a..adc387236fe7 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -62,17 +62,40 @@ static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) return false; } +static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, + unsigned int len) +{ + struct device *dev = disk_to_dev(pmem->pmem_disk); + sector_t sector; + long cleared; + + sector = (offset - pmem->data_offset) / 512; + cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); + + if (cleared > 0 && cleared / 512) { + dev_dbg(dev, "%s: %llx clear %ld sector%s\n", + __func__, (unsigned long long) sector, + cleared / 512, cleared / 512 > 1 ? "s" : ""); + badblocks_clear(&pmem->bb, sector, cleared / 512); + } + invalidate_pmem(pmem->virt_addr + offset, len); +} + static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, sector_t sector) { int rc = 0; + bool bad_pmem = false; void *mem = kmap_atomic(page); phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + bad_pmem = true; + if (rw == READ) { - if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + if (unlikely(bad_pmem)) rc = -EIO; else { memcpy_from_pmem(mem + off, pmem_addr, len); @@ -81,6 +104,10 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, } else { flush_dcache_page(page); memcpy_to_pmem(pmem_addr, mem + off, len); + if (unlikely(bad_pmem)) { + pmem_clear_poison(pmem, pmem_off, len); + memcpy_to_pmem(pmem_addr, mem + off, len); + } } kunmap_atomic(mem); diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 7c3d11a6b4ad..3ec5309e29f3 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -58,6 +58,11 @@ static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) { BUG(); } + +static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +{ + BUG(); +} #endif /* @@ -185,6 +190,20 @@ static inline void clear_pmem(void __pmem *addr, size_t size) default_clear_pmem(addr, size); } +/** + * invalidate_pmem - flush a pmem range from the cache hierarchy + * @addr: virtual start address + * @size: bytes to invalidate (internally aligned to cache line size) + * + * For platforms that support clearing poison this flushes any poisoned + * ranges out of the cache + */ +static inline void invalidate_pmem(void __pmem *addr, size_t size) +{ + if (arch_has_pmem_api()) + arch_invalidate_pmem(addr, size); +} + /** * wb_cache_pmem - write back processor cache for PMEM memory range * @addr: virtual start address -- cgit v1.2.3