aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/device-mapper/unstriped.txt124
-rw-r--r--drivers/md/Kconfig7
-rw-r--r--drivers/md/Makefile1
-rw-r--r--drivers/md/dm-unstripe.c225
4 files changed, 357 insertions, 0 deletions
diff --git a/Documentation/device-mapper/unstriped.txt b/Documentation/device-mapper/unstriped.txt
new file mode 100644
index 000000000000..0b2a306c54ee
--- /dev/null
+++ b/Documentation/device-mapper/unstriped.txt
@@ -0,0 +1,124 @@
+Introduction
+============
+
+The device-mapper "unstriped" target provides a transparent mechanism to
+unstripe a device-mapper "striped" target to access the underlying disks
+without having to touch the true backing block-device. It can also be
+used to unstripe a hardware RAID-0 to access backing disks.
+
+Parameters:
+<number of stripes> <chunk size> <stripe #> <dev_path> <offset>
+
+<number of stripes>
+ The number of stripes in the RAID 0.
+
+<chunk size>
+ The amount of 512B sectors in the chunk striping.
+
+<dev_path>
+ The block device you wish to unstripe.
+
+<stripe #>
+ The stripe number within the device that corresponds to physical
+ drive you wish to unstripe. This must be 0 indexed.
+
+
+Why use this module?
+====================
+
+An example of undoing an existing dm-stripe
+-------------------------------------------
+
+This small bash script will setup 4 loop devices and use the existing
+striped target to combine the 4 devices into one. It then will use
+the unstriped target ontop of the striped device to access the
+individual backing loop devices. We write data to the newly exposed
+unstriped devices and verify the data written matches the correct
+underlying device on the striped array.
+
+#!/bin/bash
+
+MEMBER_SIZE=$((128 * 1024 * 1024))
+NUM=4
+SEQ_END=$((${NUM}-1))
+CHUNK=256
+BS=4096
+
+RAID_SIZE=$((${MEMBER_SIZE}*${NUM}/512))
+DM_PARMS="0 ${RAID_SIZE} striped ${NUM} ${CHUNK}"
+COUNT=$((${MEMBER_SIZE} / ${BS}))
+
+for i in $(seq 0 ${SEQ_END}); do
+ dd if=/dev/zero of=member-${i} bs=${MEMBER_SIZE} count=1 oflag=direct
+ losetup /dev/loop${i} member-${i}
+ DM_PARMS+=" /dev/loop${i} 0"
+done
+
+echo $DM_PARMS | dmsetup create raid0
+for i in $(seq 0 ${SEQ_END}); do
+ echo "0 1 unstriped ${NUM} ${CHUNK} ${i} /dev/mapper/raid0 0" | dmsetup create set-${i}
+done;
+
+for i in $(seq 0 ${SEQ_END}); do
+ dd if=/dev/urandom of=/dev/mapper/set-${i} bs=${BS} count=${COUNT} oflag=direct
+ diff /dev/mapper/set-${i} member-${i}
+done;
+
+for i in $(seq 0 ${SEQ_END}); do
+ dmsetup remove set-${i}
+done
+
+dmsetup remove raid0
+
+for i in $(seq 0 ${SEQ_END}); do
+ losetup -d /dev/loop${i}
+ rm -f member-${i}
+done
+
+Another example
+---------------
+
+Intel NVMe drives contain two cores on the physical device.
+Each core of the drive has segregated access to its LBA range.
+The current LBA model has a RAID 0 128k chunk on each core, resulting
+in a 256k stripe across the two cores:
+
+ Core 0: Core 1:
+ __________ __________
+ | LBA 512| | LBA 768|
+ | LBA 0 | | LBA 256|
+ ---------- ----------
+
+The purpose of this unstriping is to provide better QoS in noisy
+neighbor environments. When two partitions are created on the
+aggregate drive without this unstriping, reads on one partition
+can affect writes on another partition. This is because the partitions
+are striped across the two cores. When we unstripe this hardware RAID 0
+and make partitions on each new exposed device the two partitions are now
+physically separated.
+
+With the dm-unstriped target we're able to segregate an fio script that
+has read and write jobs that are independent of each other. Compared to
+when we run the test on a combined drive with partitions, we were able
+to get a 92% reduction in read latency using this device mapper target.
+
+
+Example dmsetup usage
+=====================
+
+unstriped ontop of Intel NVMe device that has 2 cores
+-----------------------------------------------------
+dmsetup create nvmset0 --table '0 512 unstriped 2 256 0 /dev/nvme0n1 0'
+dmsetup create nvmset1 --table '0 512 unstriped 2 256 1 /dev/nvme0n1 0'
+
+There will now be two devices that expose Intel NVMe core 0 and 1
+respectively:
+/dev/mapper/nvmset0
+/dev/mapper/nvmset1
+
+unstriped ontop of striped with 4 drives using 128K chunk size
+--------------------------------------------------------------
+dmsetup create raid_disk0 --table '0 512 unstriped 4 256 0 /dev/mapper/striped 0'
+dmsetup create raid_disk1 --table '0 512 unstriped 4 256 1 /dev/mapper/striped 0'
+dmsetup create raid_disk2 --table '0 512 unstriped 4 256 2 /dev/mapper/striped 0'
+dmsetup create raid_disk3 --table '0 512 unstriped 4 256 3 /dev/mapper/striped 0'
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 83b9362be09c..2c8ac3688815 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -269,6 +269,13 @@ config DM_BIO_PRISON
source "drivers/md/persistent-data/Kconfig"
+config DM_UNSTRIPED
+ tristate "Unstriped target"
+ depends on BLK_DEV_DM
+ ---help---
+ Unstripes I/O so it is issued solely on a single drive in a HW
+ RAID0 or dm-striped target.
+
config DM_CRYPT
tristate "Crypt target support"
depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index f701bb211783..63255f3ebd97 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_BCACHE) += bcache/
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o
+obj-$(CONFIG_DM_UNSTRIPED) += dm-unstripe.o
obj-$(CONFIG_DM_BUFIO) += dm-bufio.o
obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c
new file mode 100644
index 000000000000..061b4f10bf5c
--- /dev/null
+++ b/drivers/md/dm-unstripe.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2017 Intel Corporation.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/device-mapper.h>
+
+struct unstripe_c {
+ struct dm_dev *dev;
+ sector_t physical_start;
+
+ uint32_t stripes;
+
+ uint32_t unstripe;
+ sector_t unstripe_width;
+ sector_t unstripe_offset;
+
+ uint32_t chunk_size;
+ u8 chunk_shift;
+};
+
+#define DM_MSG_PREFIX "unstriped"
+
+static void cleanup_unstripe(struct unstripe_c *uc, struct dm_target *ti)
+{
+ if (uc->dev)
+ dm_put_device(ti, uc->dev);
+ kfree(uc);
+}
+
+/*
+ * Contruct an unstriped mapping.
+ * <number of stripes> <chunk size> <stripe #> <dev_path> <offset>
+ */
+static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ struct unstripe_c *uc;
+ sector_t width, tmp_len;
+ unsigned long long start;
+ char dummy;
+
+ if (argc != 5) {
+ ti->error = "Invalid number of arguments";
+ return -EINVAL;
+ }
+
+ uc = kzalloc(sizeof(*uc), GFP_KERNEL);
+ if (!uc) {
+ ti->error = "Memory allocation for unstriped context failed";
+ return -ENOMEM;
+ }
+
+ if (kstrtouint(argv[0], 10, &uc->stripes) || !uc->stripes) {
+ ti->error = "Invalid stripe count";
+ goto err;
+ }
+
+ if (kstrtouint(argv[1], 10, &uc->chunk_size) || !uc->chunk_size) {
+ ti->error = "Invalid chunk_size";
+ goto err;
+ }
+
+ // FIXME: must support non power of 2 chunk_size, dm-stripe.c does
+ if (!is_power_of_2(uc->chunk_size)) {
+ ti->error = "Non power of 2 chunk_size is not supported yet";
+ goto err;
+ }
+
+ if (kstrtouint(argv[2], 10, &uc->unstripe)) {
+ ti->error = "Invalid stripe number";
+ goto err;
+ }
+
+ if (uc->unstripe > uc->stripes && uc->stripes > 1) {
+ ti->error = "Please provide stripe between [0, # of stripes]";
+ goto err;
+ }
+
+ if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &uc->dev)) {
+ ti->error = "Couldn't get striped device";
+ goto err;
+ }
+
+ if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1) {
+ ti->error = "Invalid striped device offset";
+ goto err;
+ }
+ uc->physical_start = start;
+
+ uc->unstripe_offset = uc->unstripe * uc->chunk_size;
+ uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size;
+ uc->chunk_shift = fls(uc->chunk_size) - 1;
+
+ width = ti->len;
+ if (sector_div(width, uc->stripes)) {
+ ti->error = "Target length not divisible by number of stripes";
+ goto err;
+ }
+
+ tmp_len = width;
+ if (sector_div(tmp_len, uc->chunk_size)) {
+ ti->error = "Target length not divisible by chunk size";
+ goto err;
+ }
+
+ if (dm_set_target_max_io_len(ti, uc->chunk_size)) {
+ ti->error = "Failed to set max io len";
+ goto err;
+ }
+
+ ti->private = uc;
+ return 0;
+err:
+ cleanup_unstripe(uc, ti);
+ return -EINVAL;
+}
+
+static void unstripe_dtr(struct dm_target *ti)
+{
+ struct unstripe_c *uc = ti->private;
+
+ cleanup_unstripe(uc, ti);
+}
+
+static sector_t map_to_core(struct dm_target *ti, struct bio *bio)
+{
+ struct unstripe_c *uc = ti->private;
+ sector_t sector = bio->bi_iter.bi_sector;
+
+ /* Shift us up to the right "row" on the stripe */
+ sector += uc->unstripe_width * (sector >> uc->chunk_shift);
+
+ /* Account for what stripe we're operating on */
+ sector += uc->unstripe_offset;
+
+ return sector;
+}
+
+static int unstripe_map(struct dm_target *ti, struct bio *bio)
+{
+ struct unstripe_c *uc = ti->private;
+
+ bio_set_dev(bio, uc->dev->bdev);
+ bio->bi_iter.bi_sector = map_to_core(ti, bio) + uc->physical_start;
+
+ return DM_MAPIO_REMAPPED;
+}
+
+static void unstripe_status(struct dm_target *ti, status_type_t type,
+ unsigned int status_flags, char *result, unsigned int maxlen)
+{
+ struct unstripe_c *uc = ti->private;
+ unsigned int sz = 0;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ break;
+
+ case STATUSTYPE_TABLE:
+ DMEMIT("%d %llu %d %s %llu",
+ uc->stripes, (unsigned long long)uc->chunk_size, uc->unstripe,
+ uc->dev->name, (unsigned long long)uc->physical_start);
+ break;
+ }
+}
+
+static int unstripe_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct unstripe_c *uc = ti->private;
+
+ return fn(ti, uc->dev, uc->physical_start, ti->len, data);
+}
+
+static void unstripe_io_hints(struct dm_target *ti,
+ struct queue_limits *limits)
+{
+ struct unstripe_c *uc = ti->private;
+
+ limits->chunk_sectors = uc->chunk_size;
+}
+
+static struct target_type unstripe_target = {
+ .name = "unstriped",
+ .version = {1, 0, 0},
+ .module = THIS_MODULE,
+ .ctr = unstripe_ctr,
+ .dtr = unstripe_dtr,
+ .map = unstripe_map,
+ .status = unstripe_status,
+ .iterate_devices = unstripe_iterate_devices,
+ .io_hints = unstripe_io_hints,
+};
+
+static int __init dm_unstripe_init(void)
+{
+ int r;
+
+ r = dm_register_target(&unstripe_target);
+ if (r < 0)
+ DMERR("target registration failed");
+
+ return r;
+}
+
+static void __exit dm_unstripe_exit(void)
+{
+ dm_unregister_target(&unstripe_target);
+}
+
+module_init(dm_unstripe_init);
+module_exit(dm_unstripe_exit);
+
+MODULE_DESCRIPTION(DM_NAME " unstriped target");
+MODULE_AUTHOR("Scott Bauer <scott.bauer@intel.com>");
+MODULE_LICENSE("GPL");