diff options
-rw-r--r-- | Documentation/filesystems/proc.txt | 22 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 4 | ||||
-rw-r--r-- | drivers/acpi/nfit.c | 298 | ||||
-rw-r--r-- | drivers/acpi/nfit.h | 2 | ||||
-rw-r--r-- | drivers/base/devres.c | 19 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 28 | ||||
-rw-r--r-- | fs/binfmt_elf.c | 10 | ||||
-rw-r--r-- | fs/binfmt_elf_fdpic.c | 15 | ||||
-rw-r--r-- | include/linux/device.h | 16 | ||||
-rw-r--r-- | include/linux/pmem.h | 26 | ||||
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | kernel/memremap.c | 16 | ||||
-rw-r--r-- | tools/testing/nvdimm/test/nfit.c | 164 |
14 files changed, 494 insertions, 134 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 1e4a6cc1b6ea..402ab99e409f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1605,16 +1605,16 @@ Documentation/accounting. --------------------------------------------------------------- When a process is dumped, all anonymous memory is written to a core file as long as the size of the core file isn't limited. But sometimes we don't want -to dump some memory segments, for example, huge shared memory. Conversely, -sometimes we want to save file-backed memory segments into a core file, not -only the individual files. +to dump some memory segments, for example, huge shared memory or DAX. +Conversely, sometimes we want to save file-backed memory segments into a core +file, not only the individual files. /proc/<pid>/coredump_filter allows you to customize which memory segments will be dumped when the <pid> process is dumped. coredump_filter is a bitmask of memory types. If a bit of the bitmask is set, memory segments of the corresponding memory type are dumped, otherwise they are not dumped. -The following 7 memory types are supported: +The following 9 memory types are supported: - (bit 0) anonymous private memory - (bit 1) anonymous shared memory - (bit 2) file-backed private memory @@ -1623,20 +1623,22 @@ The following 7 memory types are supported: effective only if the bit 2 is cleared) - (bit 5) hugetlb private memory - (bit 6) hugetlb shared memory + - (bit 7) DAX private memory + - (bit 8) DAX shared memory Note that MMIO pages such as frame buffer are never dumped and vDSO pages are always dumped regardless of the bitmask status. - Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only - effected by bit 5-6. + Note that bits 0-4 don't affect hugetlb or DAX memory. hugetlb memory is + only affected by bit 5-6, and DAX is only affected by bits 7-8. -Default value of coredump_filter is 0x23; this means all anonymous memory -segments and hugetlb private memory are dumped. +The default value of coredump_filter is 0x33; this means all anonymous memory +segments, ELF header pages and hugetlb private memory are dumped. If you don't want to dump all shared memory segments attached to pid 1234, -write 0x21 to the process's proc file. +write 0x31 to the process's proc file. - $ echo 0x21 > /proc/1234/coredump_filter + $ echo 0x31 > /proc/1234/coredump_filter When a new process is created, the process inherits the bitmask status from its parent. It is useful to set up coredump_filter before the program runs. diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1f37cb2b56a9..493f54172b4a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -354,7 +354,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, } for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", + pr_debug(" [mem %#010lx-%#010lx] page %s\n", mr[i].start, mr[i].end - 1, page_size_string(&mr[i])); @@ -401,7 +401,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long ret = 0; int nr_range, i; - pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n", + pr_debug("init_memory_mapping: [mem %#010lx-%#010lx]\n", start, end - 1); memset(mr, 0, sizeof(mr)); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5ed62eff31bd..ec081fe0ce2c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1270,7 +1270,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, /* check to see if we have contiguous blocks */ if (p_end != p || node_start != node) { if (p_start) - printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", + pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", addr_start, addr_end-1, p_start, p_end-1, node_start); addr_start = addr; node_start = node; @@ -1368,7 +1368,7 @@ void register_page_bootmem_memmap(unsigned long section_nr, void __meminit vmemmap_populate_print_last(void) { if (p_start) { - printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", + pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", addr_start, addr_end-1, p_start, p_end-1, node_start); p_start = NULL; p_end = NULL; diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 6e26761a27da..f7dab53b352a 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -33,6 +33,15 @@ static bool force_enable_dimms; module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); +struct nfit_table_prev { + struct list_head spas; + struct list_head memdevs; + struct list_head dcrs; + struct list_head bdws; + struct list_head idts; + struct list_head flushes; +}; + static u8 nfit_uuid[NFIT_UUID_MAX][16]; const u8 *to_nfit_uuid(enum nfit_uuids id) @@ -221,12 +230,20 @@ static int nfit_spa_type(struct acpi_nfit_system_address *spa) } static bool add_spa(struct acpi_nfit_desc *acpi_desc, + struct nfit_table_prev *prev, struct acpi_nfit_system_address *spa) { struct device *dev = acpi_desc->dev; - struct nfit_spa *nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), - GFP_KERNEL); + struct nfit_spa *nfit_spa; + + list_for_each_entry(nfit_spa, &prev->spas, list) { + if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) { + list_move_tail(&nfit_spa->list, &acpi_desc->spas); + return true; + } + } + nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), GFP_KERNEL); if (!nfit_spa) return false; INIT_LIST_HEAD(&nfit_spa->list); @@ -239,12 +256,19 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc, } static bool add_memdev(struct acpi_nfit_desc *acpi_desc, + struct nfit_table_prev *prev, struct acpi_nfit_memory_map *memdev) { struct device *dev = acpi_desc->dev; - struct nfit_memdev *nfit_memdev = devm_kzalloc(dev, - sizeof(*nfit_memdev), GFP_KERNEL); + struct nfit_memdev *nfit_memdev; + list_for_each_entry(nfit_memdev, &prev->memdevs, list) + if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) { + list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs); + return true; + } + + nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev), GFP_KERNEL); if (!nfit_memdev) return false; INIT_LIST_HEAD(&nfit_memdev->list); @@ -257,12 +281,19 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc, } static bool add_dcr(struct acpi_nfit_desc *acpi_desc, + struct nfit_table_prev *prev, struct acpi_nfit_control_region *dcr) { struct device *dev = acpi_desc->dev; - struct nfit_dcr *nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), - GFP_KERNEL); + struct nfit_dcr *nfit_dcr; + + list_for_each_entry(nfit_dcr, &prev->dcrs, list) + if (memcmp(nfit_dcr->dcr, dcr, sizeof(*dcr)) == 0) { + list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs); + return true; + } + nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), GFP_KERNEL); if (!nfit_dcr) return false; INIT_LIST_HEAD(&nfit_dcr->list); @@ -274,12 +305,19 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc, } static bool add_bdw(struct acpi_nfit_desc *acpi_desc, + struct nfit_table_prev *prev, struct acpi_nfit_data_region *bdw) { struct device *dev = acpi_desc->dev; - struct nfit_bdw *nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), - GFP_KERNEL); + struct nfit_bdw *nfit_bdw; + + list_for_each_entry(nfit_bdw, &prev->bdws, list) + if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) { + list_move_tail(&nfit_bdw->list, &acpi_desc->bdws); + return true; + } + nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), GFP_KERNEL); if (!nfit_bdw) return false; INIT_LIST_HEAD(&nfit_bdw->list); @@ -291,12 +329,19 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc, } static bool add_idt(struct acpi_nfit_desc *acpi_desc, + struct nfit_table_prev *prev, struct acpi_nfit_interleave *idt) { struct device *dev = acpi_desc->dev; - struct nfit_idt *nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), - GFP_KERNEL); + struct nfit_idt *nfit_idt; + + list_for_each_entry(nfit_idt, &prev->idts, list) + if (memcmp(nfit_idt->idt, idt, sizeof(*idt)) == 0) { + list_move_tail(&nfit_idt->list, &acpi_desc->idts); + return true; + } + nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), GFP_KERNEL); if (!nfit_idt) return false; INIT_LIST_HEAD(&nfit_idt->list); @@ -308,12 +353,19 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc, } static bool add_flush(struct acpi_nfit_desc *acpi_desc, + struct nfit_table_prev *prev, struct acpi_nfit_flush_address *flush) { struct device *dev = acpi_desc->dev; - struct nfit_flush *nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), - GFP_KERNEL); + struct nfit_flush *nfit_flush; + list_for_each_entry(nfit_flush, &prev->flushes, list) + if (memcmp(nfit_flush->flush, flush, sizeof(*flush)) == 0) { + list_move_tail(&nfit_flush->list, &acpi_desc->flushes); + return true; + } + + nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), GFP_KERNEL); if (!nfit_flush) return false; INIT_LIST_HEAD(&nfit_flush->list); @@ -324,8 +376,8 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc, return true; } -static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, - const void *end) +static void *add_table(struct acpi_nfit_desc *acpi_desc, + struct nfit_table_prev *prev, void *table, const void *end) { struct device *dev = acpi_desc->dev; struct acpi_nfit_header *hdr; @@ -335,29 +387,35 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, return NULL; hdr = table; + if (!hdr->length) { + dev_warn(dev, "found a zero length table '%d' parsing nfit\n", + hdr->type); + return NULL; + } + switch (hdr->type) { case ACPI_NFIT_TYPE_SYSTEM_ADDRESS: - if (!add_spa(acpi_desc, table)) + if (!add_spa(acpi_desc, prev, table)) return err; break; case ACPI_NFIT_TYPE_MEMORY_MAP: - if (!add_memdev(acpi_desc, table)) + if (!add_memdev(acpi_desc, prev, table)) return err; break; case ACPI_NFIT_TYPE_CONTROL_REGION: - if (!add_dcr(acpi_desc, table)) + if (!add_dcr(acpi_desc, prev, table)) return err; break; case ACPI_NFIT_TYPE_DATA_REGION: - if (!add_bdw(acpi_desc, table)) + if (!add_bdw(acpi_desc, prev, table)) return err; break; case ACPI_NFIT_TYPE_INTERLEAVE: - if (!add_idt(acpi_desc, table)) + if (!add_idt(acpi_desc, prev, table)) return err; break; case ACPI_NFIT_TYPE_FLUSH_ADDRESS: - if (!add_flush(acpi_desc, table)) + if (!add_flush(acpi_desc, prev, table)) return err; break; case ACPI_NFIT_TYPE_SMBIOS: @@ -802,12 +860,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) device_handle = __to_nfit_memdev(nfit_mem)->device_handle; nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle); if (nvdimm) { - /* - * If for some reason we find multiple DCRs the - * first one wins - */ - dev_err(acpi_desc->dev, "duplicate DCR detected: %s\n", - nvdimm_name(nvdimm)); + dimm_count++; continue; } @@ -1476,6 +1529,9 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct resource res; int count = 0, rc; + if (nfit_spa->is_registered) + return 0; + if (spa->range_index == 0) { dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n", __func__); @@ -1529,6 +1585,8 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc)) return -ENOMEM; } + + nfit_spa->is_registered = 1; return 0; } @@ -1545,71 +1603,101 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) return 0; } +static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc, + struct nfit_table_prev *prev) +{ + struct device *dev = acpi_desc->dev; + + if (!list_empty(&prev->spas) || + !list_empty(&prev->memdevs) || + !list_empty(&prev->dcrs) || + !list_empty(&prev->bdws) || + !list_empty(&prev->idts) || + !list_empty(&prev->flushes)) { + dev_err(dev, "new nfit deletes entries (unsupported)\n"); + return -ENXIO; + } + return 0; +} + int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) { struct device *dev = acpi_desc->dev; + struct nfit_table_prev prev; const void *end; u8 *data; int rc; - INIT_LIST_HEAD(&acpi_desc->spa_maps); - INIT_LIST_HEAD(&acpi_desc->spas); - INIT_LIST_HEAD(&acpi_desc->dcrs); - INIT_LIST_HEAD(&acpi_desc->bdws); - INIT_LIST_HEAD(&acpi_desc->idts); - INIT_LIST_HEAD(&acpi_desc->flushes); - INIT_LIST_HEAD(&acpi_desc->memdevs); - INIT_LIST_HEAD(&acpi_desc->dimms); - mutex_init(&acpi_desc->spa_map_mutex); + mutex_lock(&acpi_desc->init_mutex); + + INIT_LIST_HEAD(&prev.spas); + INIT_LIST_HEAD(&prev.memdevs); + INIT_LIST_HEAD(&prev.dcrs); + INIT_LIST_HEAD(&prev.bdws); + INIT_LIST_HEAD(&prev.idts); + INIT_LIST_HEAD(&prev.flushes); + + list_cut_position(&prev.spas, &acpi_desc->spas, + acpi_desc->spas.prev); + list_cut_position(&prev.memdevs, &acpi_desc->memdevs, + acpi_desc->memdevs.prev); + list_cut_position(&prev.dcrs, &acpi_desc->dcrs, + acpi_desc->dcrs.prev); + list_cut_position(&prev.bdws, &acpi_desc->bdws, + acpi_desc->bdws.prev); + list_cut_position(&prev.idts, &acpi_desc->idts, + acpi_desc->idts.prev); + list_cut_position(&prev.flushes, &acpi_desc->flushes, + acpi_desc->flushes.prev); data = (u8 *) acpi_desc->nfit; end = data + sz; data += sizeof(struct acpi_table_nfit); while (!IS_ERR_OR_NULL(data)) - data = add_table(acpi_desc, data, end); + data = add_table(acpi_desc, &prev, data, end); if (IS_ERR(data)) { dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__, PTR_ERR(data)); - return PTR_ERR(data); + rc = PTR_ERR(data); + goto out_unlock; } - if (nfit_mem_init(acpi_desc) != 0) - return -ENOMEM; + rc = acpi_nfit_check_deletions(acpi_desc, &prev); + if (rc) + goto out_unlock; + + if (nfit_mem_init(acpi_desc) != 0) { + rc = -ENOMEM; + goto out_unlock; + } acpi_nfit_init_dsms(acpi_desc); rc = acpi_nfit_register_dimms(acpi_desc); if (rc) - return rc; + goto out_unlock; + + rc = acpi_nfit_register_regions(acpi_desc); - return acpi_nfit_register_regions(acpi_desc); + out_unlock: + mutex_unlock(&acpi_desc->init_mutex); + return rc; } EXPORT_SYMBOL_GPL(acpi_nfit_init); -static int acpi_nfit_add(struct acpi_device *adev) +static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) { struct nvdimm_bus_descriptor *nd_desc; struct acpi_nfit_desc *acpi_desc; struct device *dev = &adev->dev; - struct acpi_table_header *tbl; - acpi_status status = AE_OK; - acpi_size sz; - int rc; - - status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz); - if (ACPI_FAILURE(status)) { - dev_err(dev, "failed to find NFIT\n"); - return -ENXIO; - } acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); if (!acpi_desc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); dev_set_drvdata(dev, acpi_desc); acpi_desc->dev = dev; - acpi_desc->nfit = (struct acpi_table_nfit *) tbl; acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io; nd_desc = &acpi_desc->nd_desc; nd_desc->provider_name = "ACPI.NFIT"; @@ -1617,8 +1705,57 @@ static int acpi_nfit_add(struct acpi_device *adev) nd_desc->attr_groups = acpi_nfit_attribute_groups; acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc); - if (!acpi_desc->nvdimm_bus) - return -ENXIO; + if (!acpi_desc->nvdimm_bus) { + devm_kfree(dev, acpi_desc); + return ERR_PTR(-ENXIO); + } + + INIT_LIST_HEAD(&acpi_desc->spa_maps); + INIT_LIST_HEAD(&acpi_desc->spas); + INIT_LIST_HEAD(&acpi_desc->dcrs); + INIT_LIST_HEAD(&acpi_desc->bdws); + INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->flushes); + INIT_LIST_HEAD(&acpi_desc->memdevs); + INIT_LIST_HEAD(&acpi_desc->dimms); + mutex_init(&acpi_desc->spa_map_mutex); + mutex_init(&acpi_desc->init_mutex); + + return acpi_desc; +} + +static int acpi_nfit_add(struct acpi_device *adev) +{ + struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_nfit_desc *acpi_desc; + struct device *dev = &adev->dev; + struct acpi_table_header *tbl; + acpi_status status = AE_OK; + acpi_size sz; + int rc; + + status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz); + if (ACPI_FAILURE(status)) { + /* This is ok, we could have an nvdimm hotplugged later */ + dev_dbg(dev, "failed to find NFIT at startup\n"); + return 0; + } + + acpi_desc = acpi_nfit_desc_init(adev); + if (IS_ERR(acpi_desc)) { + dev_err(dev, "%s: error initializing acpi_desc: %ld\n", + __func__, PTR_ERR(acpi_desc)); + return PTR_ERR(acpi_desc); + } + + acpi_desc->nfit = (struct acpi_table_nfit *) tbl; + + /* Evaluate _FIT and override with that if present */ + status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf); + if (ACPI_SUCCESS(status) && buf.length > 0) { + acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer; + sz = buf.length; + } rc = acpi_nfit_init(acpi_desc, sz); if (rc) { @@ -1636,6 +1773,54 @@ static int acpi_nfit_remove(struct acpi_device *adev) return 0; } +static void acpi_nfit_notify(struct acpi_device *adev, u32 event) +{ + struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_table_nfit *nfit_saved; + struct device *dev = &adev->dev; + acpi_status status; + int ret; + + dev_dbg(dev, "%s: event: %d\n", __func__, event); + + device_lock(dev); + if (!dev->driver) { + /* dev->driver may be null if we're being removed */ + dev_dbg(dev, "%s: no driver found for dev\n", __func__); + return; + } + + if (!acpi_desc) { + acpi_desc = acpi_nfit_desc_init(adev); + if (IS_ERR(acpi_desc)) { + dev_err(dev, "%s: error initializing acpi_desc: %ld\n", + __func__, PTR_ERR(acpi_desc)); + goto out_unlock; + } + } + + /* Evaluate _FIT */ + status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf); + if (ACPI_FAILURE(status)) { + dev_err(dev, "failed to evaluate _FIT\n"); + goto out_unlock; + } + + nfit_saved = acpi_desc->nfit; + acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer; + ret = acpi_nfit_init(acpi_desc, buf.length); + if (!ret) { + /* Merge failed, restore old nfit, and exit */ + acpi_desc->nfit = nfit_saved; + dev_err(dev, "failed to merge updated NFIT\n"); + } + kfree(buf.pointer); + + out_unlock: + device_unlock(dev); +} + static const struct acpi_device_id acpi_nfit_ids[] = { { "ACPI0012", 0 }, { "", 0 }, @@ -1648,6 +1833,7 @@ static struct acpi_driver acpi_nfit_driver = { .ops = { .add = acpi_nfit_add, .remove = acpi_nfit_remove, + .notify = acpi_nfit_notify, }, }; diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 329a1eba0c16..2ea5c0797c8f 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -48,6 +48,7 @@ enum { struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; + int is_registered; }; struct nfit_dcr { @@ -97,6 +98,7 @@ struct acpi_nfit_desc { struct nvdimm_bus_descriptor nd_desc; struct acpi_table_nfit *nfit; struct mutex spa_map_mutex; + struct mutex init_mutex; struct list_head spa_maps; struct list_head memdevs; struct list_head flushes; diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 875464690117..8fc654f0807b 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -82,12 +82,12 @@ static struct devres_group * node_to_group(struct devres_node *node) } static __always_inline struct devres * alloc_dr(dr_release_t release, - size_t size, gfp_t gfp) + size_t size, gfp_t gfp, int nid) { size_t tot_size = sizeof(struct devres) + size; struct devres *dr; - dr = kmalloc_track_caller(tot_size, gfp); + dr = kmalloc_node_track_caller(tot_size, gfp, nid); if (unlikely(!dr)) return NULL; @@ -106,24 +106,25 @@ static void add_dr(struct device *dev, struct devres_node *node) } #ifdef CONFIG_DEBUG_DEVRES -void * __devres_alloc(dr_release_t release, size_t size, gfp_t gfp, +void * __devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid, const char *name) { struct devres *dr; - dr = alloc_dr(release, size, gfp | __GFP_ZERO); + dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid); if (unlikely(!dr)) return NULL; set_node_dbginfo(&dr->node, name, size); return dr->data; } -EXPORT_SYMBOL_GPL(__devres_alloc); +EXPORT_SYMBOL_GPL(__devres_alloc_node); #else /** * devres_alloc - Allocate device resource data * @release: Release function devres will be associated with * @size: Allocation size * @gfp: Allocation flags + * @nid: NUMA node * * Allocate devres of @size bytes. The allocated area is zeroed, then * associated with @release. The returned pointer can be passed to @@ -132,16 +133,16 @@ EXPORT_SYMBOL_GPL(__devres_alloc); * RETURNS: * Pointer to allocated devres on success, NULL on failure. */ -void * devres_alloc(dr_release_t release, size_t size, gfp_t gfp) +void * devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid) { struct devres *dr; - dr = alloc_dr(release, size, gfp | __GFP_ZERO); + dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid); if (unlikely(!dr)) return NULL; return dr->data; } -EXPORT_SYMBOL_GPL(devres_alloc); +EXPORT_SYMBOL_GPL(devres_alloc_node); #endif /** @@ -776,7 +777,7 @@ void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) struct devres *dr; /* use raw alloc_dr for kmalloc caller tracing */ - dr = alloc_dr(devm_kmalloc_release, size, gfp); + dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev)); if (unlikely(!dr)) return NULL; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 0ba6a978f227..349f03e7ed06 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -150,18 +150,15 @@ static struct pmem_device *pmem_alloc(struct device *dev, return ERR_PTR(-EBUSY); } - if (pmem_should_map_pages(dev)) { - void *addr = devm_memremap_pages(dev, res); + if (pmem_should_map_pages(dev)) + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res); + else + pmem->virt_addr = (void __pmem *) devm_memremap(dev, + pmem->phys_addr, pmem->size, + ARCH_MEMREMAP_PMEM); - if (IS_ERR(addr)) - return addr; - pmem->virt_addr = (void __pmem *) addr; - } else { - pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, - pmem->size); - if (!pmem->virt_addr) - return ERR_PTR(-ENXIO); - } + if (IS_ERR(pmem->virt_addr)) + return (void __force *) pmem->virt_addr; return pmem; } @@ -179,9 +176,10 @@ static void pmem_detach_disk(struct pmem_device *pmem) static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns, struct pmem_device *pmem) { + int nid = dev_to_node(dev); struct gendisk *disk; - pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL); + pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid); if (!pmem->pmem_queue) return -ENOMEM; @@ -191,7 +189,7 @@ static int pmem_attach_disk(struct device *dev, blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue); - disk = alloc_disk(0); + disk = alloc_disk_node(0, nid); if (!disk) { blk_cleanup_queue(pmem->pmem_queue); return -ENOMEM; @@ -363,8 +361,8 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) /* establish pfn range for lookup, and switch to direct map */ pmem = dev_get_drvdata(dev); - memunmap_pmem(dev, pmem->virt_addr); - pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res); + devm_memunmap(dev, (void __force *) pmem->virt_addr); + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res); if (IS_ERR(pmem->virt_addr)) { rc = PTR_ERR(pmem->virt_addr); goto err; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6b659967898e..5f399ea1d20a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -35,6 +35,7 @@ #include <linux/utsname.h> #include <linux/coredump.h> #include <linux/sched.h> +#include <linux/dax.h> #include <asm/uaccess.h> #include <asm/param.h> #include <asm/page.h> @@ -1236,6 +1237,15 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, if (vma->vm_flags & VM_DONTDUMP) return 0; + /* support for DAX */ + if (vma_is_dax(vma)) { + if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) + goto whole; + if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) + goto whole; + return 0; + } + /* Hugetlb memory check */ if (vma->vm_flags & VM_HUGETLB) { if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 50d15b7b0ca9..b1adb92e69de 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -35,6 +35,7 @@ #include <linux/elf-fdpic.h> #include <linux/elfcore.h> #include <linux/coredump.h> +#include <linux/dax.h> #include <asm/uaccess.h> #include <asm/param.h> @@ -1231,6 +1232,20 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags) return 0; } + /* support for DAX */ + if (vma_is_dax(vma)) { + if (vma->vm_flags & VM_SHARED) { + dump_ok = test_bit(MMF_DUMP_DAX_SHARED, &mm_flags); + kdcore("%08lx: %08lx: %s (DAX shared)", vma->vm_start, + vma->vm_flags, dump_ok ? "yes" : "no"); + } else { + dump_ok = test_bit(MMF_DUMP_DAX_PRIVATE, &mm_flags); + kdcore("%08lx: %08lx: %s (DAX private)", vma->vm_start, + vma->vm_flags, dump_ok ? "yes" : "no"); + } + return dump_ok; + } + /* By default, dump shared memory if mapped from an anonymous file. */ if (vma->vm_flags & VM_SHARED) { if (file_inode(vma->vm_file)->i_nlink == 0) { diff --git a/include/linux/device.h b/include/linux/device.h index 5d7bc6349930..b8f411b57dcb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -604,13 +604,21 @@ typedef void (*dr_release_t)(struct device *dev, void *res); typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data); #ifdef CONFIG_DEBUG_DEVRES -extern void *__devres_alloc(dr_release_t release, size_t size, gfp_t gfp, - const char *name); +extern void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, + int nid, const char *name); #define devres_alloc(release, size, gfp) \ - __devres_alloc(release, size, gfp, #release) + __devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release) +#define devres_alloc_node(release, size, gfp, nid) \ + __devres_alloc_node(release, size, gfp, nid, #release) #else -extern void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp); +extern void *devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, + int nid); +static inline void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp) +{ + return devres_alloc_node(release, size, gfp, NUMA_NO_NODE); +} #endif + extern void devres_for_each_res(struct device *dev, dr_release_t release, dr_match_t match, void *match_data, void (*fn)(struct device *, void *, void *), diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 85f810b33917..acfea8ce4a07 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -65,11 +65,6 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si memcpy(dst, (void __force const *) src, size); } -static inline void memunmap_pmem(struct device *dev, void __pmem *addr) -{ - devm_memunmap(dev, (void __force *) addr); -} - static inline bool arch_has_pmem_api(void) { return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); @@ -93,7 +88,7 @@ static inline bool arch_has_wmb_pmem(void) * These defaults seek to offer decent performance and minimize the * window between i/o completion and writes being durable on media. * However, it is undefined / architecture specific whether - * default_memremap_pmem + default_memcpy_to_pmem is sufficient for + * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for * making data durable relative to i/o completion. */ static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src, @@ -117,25 +112,6 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size) } /** - * memremap_pmem - map physical persistent memory for pmem api - * @offset: physical address of persistent memory - * @size: size of the mapping - * - * Establish a mapping of the architecture specific memory type expected - * by memcpy_to_pmem() and wmb_pmem(). For example, it may be - * the case that an uncacheable or writethrough mapping is sufficient, - * or a writeback mapping provided memcpy_to_pmem() and - * wmb_pmem() arrange for the data to be written through the - * cache to persistent media. - */ -static inline void __pmem *memremap_pmem(struct device *dev, - resource_size_t offset, unsigned long size) -{ - return (void __pmem *) devm_memremap(dev, offset, size, - ARCH_MEMREMAP_PMEM); -} - -/** * memcpy_to_pmem - copy data to persistent memory * @dst: destination buffer for the copy * @src: source buffer for the copy diff --git a/include/linux/sched.h b/include/linux/sched.h index 4069febaa34a..edad7a43edea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -484,9 +484,11 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_DUMP_ELF_HEADERS 6 #define MMF_DUMP_HUGETLB_PRIVATE 7 #define MMF_DUMP_HUGETLB_SHARED 8 +#define MMF_DUMP_DAX_PRIVATE 9 +#define MMF_DUMP_DAX_SHARED 10 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 7 +#define MMF_DUMP_FILTER_BITS 9 #define MMF_DUMP_FILTER_MASK \ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ diff --git a/kernel/memremap.c b/kernel/memremap.c index 9d6b55587eaa..7658d32c5c78 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -124,9 +124,10 @@ void *devm_memremap(struct device *dev, resource_size_t offset, { void **ptr, *addr; - ptr = devres_alloc(devm_memremap_release, sizeof(*ptr), GFP_KERNEL); + ptr = devres_alloc_node(devm_memremap_release, sizeof(*ptr), GFP_KERNEL, + dev_to_node(dev)); if (!ptr) - return NULL; + return ERR_PTR(-ENOMEM); addr = memremap(offset, size, flags); if (addr) { @@ -141,9 +142,8 @@ EXPORT_SYMBOL(devm_memremap); void devm_memunmap(struct device *dev, void *addr) { - WARN_ON(devres_destroy(dev, devm_memremap_release, devm_memremap_match, - addr)); - memunmap(addr); + WARN_ON(devres_release(dev, devm_memremap_release, + devm_memremap_match, addr)); } EXPORT_SYMBOL(devm_memunmap); @@ -176,8 +176,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res) if (is_ram == REGION_INTERSECTS) return __va(res->start); - page_map = devres_alloc(devm_memremap_pages_release, - sizeof(*page_map), GFP_KERNEL); + page_map = devres_alloc_node(devm_memremap_pages_release, + sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); if (!page_map) return ERR_PTR(-ENOMEM); @@ -185,7 +185,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res) nid = dev_to_node(dev); if (nid < 0) - nid = 0; + nid = numa_mem_id(); error = arch_add_memory(nid, res->start, resource_size(res), true); if (error) { diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 021e6f97f33e..dce346aa94ea 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -17,8 +17,10 @@ #include <linux/vmalloc.h> #include <linux/device.h> #include <linux/module.h> +#include <linux/mutex.h> #include <linux/ndctl.h> #include <linux/sizes.h> +#include <linux/list.h> #include <linux/slab.h> #include <nfit.h> #include <nd.h> @@ -44,6 +46,15 @@ * +------+ | blk5.0 | pm1.0 | 3 region5 * +-------------------------+----------+-+-------+ * + * +--+---+ + * | cpu1 | + * +--+---+ (Hotplug DIMM) + * | +----------------------------------------------+ + * +--+---+ | blk6.0/pm7.0 | 4 region6/7 + * | imc0 +--+----------------------------------------------+ + * +------+ + * + * * *) In this layout we have four dimms and two memory controllers in one * socket. Each unique interface (BLK or PMEM) to DPA space * is identified by a region device with a dynamically assigned id. @@ -85,8 +96,8 @@ * reference an NVDIMM. */ enum { - NUM_PM = 2, - NUM_DCR = 4, + NUM_PM = 3, + NUM_DCR = 5, NUM_BDW = NUM_DCR, NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, @@ -115,6 +126,7 @@ static u32 handle[NUM_DCR] = { [1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1), [2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0), [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), + [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), }; struct nfit_test { @@ -138,6 +150,7 @@ struct nfit_test { dma_addr_t *dcr_dma; int (*alloc)(struct nfit_test *t); void (*setup)(struct nfit_test *t); + int setup_hotplug; }; static struct nfit_test *to_nfit_test(struct device *dev) @@ -428,6 +441,10 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]); + if (!t->spa_set[2]) + return -ENOMEM; + for (i = 0; i < NUM_DCR; i++) { t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]); if (!t->dimm[i]) @@ -950,6 +967,126 @@ static void nfit_test0_setup(struct nfit_test *t) flush->hint_count = 1; flush->hint_address[0] = t->flush_dma[3]; + if (t->setup_hotplug) { + offset = offset + sizeof(struct acpi_nfit_flush_address) * 4; + /* dcr-descriptor4 */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 4+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[4]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + offset = offset + sizeof(struct acpi_nfit_control_region); + /* bdw4 (spa/dcr4, dimm4) */ + bdw = nfit_buf + offset; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 4+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + offset = offset + sizeof(struct acpi_nfit_data_region); + /* spa10 (dcr4) dimm4 */ + spa = nfit_buf + offset; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 10+1; + spa->address = t->dcr_dma[4]; + spa->length = DCR_SIZE; + + /* + * spa11 (single-dimm interleave for hotplug, note storage + * does not actually alias the related block-data-window + * regions) + */ + spa = nfit_buf + offset + sizeof(*spa); + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 11+1; + spa->address = t->spa_set_dma[2]; + spa->length = SPA0_SIZE; + + /* spa12 (bdw for dcr4) dimm4 */ + spa = nfit_buf + offset + sizeof(*spa) * 2; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 12+1; + spa->address = t->dimm_dma[4]; + spa->length = DIMM_SIZE; + + offset = offset + sizeof(*spa) * 3; + /* mem-region14 (spa/dcr4, dimm4) */ + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[4]; + memdev->physical_id = 4; + memdev->region_id = 0; + memdev->range_index = 10+1; + memdev->region_index = 4+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region15 (spa0, dimm4) */ + memdev = nfit_buf + offset + + sizeof(struct acpi_nfit_memory_map); + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[4]; + memdev->physical_id = 4; + memdev->region_id = 0; + memdev->range_index = 11+1; + memdev->region_index = 4+1; + memdev->region_size = SPA0_SIZE; + memdev->region_offset = t->spa_set_dma[2]; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region16 (spa/dcr4, dimm4) */ + memdev = nfit_buf + offset + + sizeof(struct acpi_nfit_memory_map) * 2; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[4]; + memdev->physical_id = 4; + memdev->region_id = 0; + memdev->range_index = 12+1; + memdev->region_index = 4+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + offset = offset + sizeof(struct acpi_nfit_memory_map) * 3; + /* flush3 (dimm4) */ + flush = nfit_buf + offset; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[4]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[4]; + } + acpi_desc = &t->acpi_desc; set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); @@ -1108,6 +1245,29 @@ static int nfit_test_probe(struct platform_device *pdev) if (!acpi_desc->nvdimm_bus) return -ENXIO; + INIT_LIST_HEAD(&acpi_desc->spa_maps); + INIT_LIST_HEAD(&acpi_desc->spas); + INIT_LIST_HEAD(&acpi_desc->dcrs); + INIT_LIST_HEAD(&acpi_desc->bdws); + INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->flushes); + INIT_LIST_HEAD(&acpi_desc->memdevs); + INIT_LIST_HEAD(&acpi_desc->dimms); + mutex_init(&acpi_desc->spa_map_mutex); + mutex_init(&acpi_desc->init_mutex); + + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); + if (rc) { + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + return rc; + } + + if (nfit_test->setup != nfit_test0_setup) + return 0; + + nfit_test->setup_hotplug = 1; + nfit_test->setup(nfit_test); + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); if (rc) { nvdimm_bus_unregister(acpi_desc->nvdimm_bus); |