diff options
Diffstat (limited to 'drivers')
149 files changed, 29487 insertions, 2488 deletions
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index ac6739e085e3..c3de70de00d4 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -1,6 +1,6 @@ /* n2-drv.c: Niagara-2 RNG driver. * - * Copyright (C) 2008 David S. Miller <davem@davemloft.net> + * Copyright (C) 2008, 2011 David S. Miller <davem@davemloft.net> */ #include <linux/kernel.h> @@ -22,8 +22,8 @@ #define DRV_MODULE_NAME "n2rng" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "0.1" -#define DRV_MODULE_RELDATE "May 15, 2008" +#define DRV_MODULE_VERSION "0.2" +#define DRV_MODULE_RELDATE "July 27, 2011" static char version[] __devinitdata = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; @@ -623,14 +623,14 @@ static const struct of_device_id n2rng_match[]; static int __devinit n2rng_probe(struct platform_device *op) { const struct of_device_id *match; - int victoria_falls; + int multi_capable; int err = -ENOMEM; struct n2rng *np; match = of_match_device(n2rng_match, &op->dev); if (!match) return -EINVAL; - victoria_falls = (match->data != NULL); + multi_capable = (match->data != NULL); n2rng_driver_version(); np = kzalloc(sizeof(*np), GFP_KERNEL); @@ -640,8 +640,8 @@ static int __devinit n2rng_probe(struct platform_device *op) INIT_DELAYED_WORK(&np->work, n2rng_work); - if (victoria_falls) - np->flags |= N2RNG_FLAG_VF; + if (multi_capable) + np->flags |= N2RNG_FLAG_MULTI; err = -ENODEV; np->hvapi_major = 2; @@ -658,10 +658,10 @@ static int __devinit n2rng_probe(struct platform_device *op) } } - if (np->flags & N2RNG_FLAG_VF) { + if (np->flags & N2RNG_FLAG_MULTI) { if (np->hvapi_major < 2) { - dev_err(&op->dev, "VF RNG requires HVAPI major " - "version 2 or later, got %lu\n", + dev_err(&op->dev, "multi-unit-capable RNG requires " + "HVAPI major version 2 or later, got %lu\n", np->hvapi_major); goto out_hvapi_unregister; } @@ -688,8 +688,8 @@ static int __devinit n2rng_probe(struct platform_device *op) goto out_free_units; dev_info(&op->dev, "Found %s RNG, units: %d\n", - ((np->flags & N2RNG_FLAG_VF) ? - "Victoria Falls" : "Niagara2"), + ((np->flags & N2RNG_FLAG_MULTI) ? + "multi-unit-capable" : "single-unit"), np->num_units); np->hwrng.name = "n2rng"; @@ -751,6 +751,11 @@ static const struct of_device_id n2rng_match[] = { .compatible = "SUNW,vf-rng", .data = (void *) 1, }, + { + .name = "random-number-generator", + .compatible = "SUNW,kt-rng", + .data = (void *) 1, + }, {}, }; MODULE_DEVICE_TABLE(of, n2rng_match); diff --git a/drivers/char/hw_random/n2rng.h b/drivers/char/hw_random/n2rng.h index 4bea07f30978..f244ac89087f 100644 --- a/drivers/char/hw_random/n2rng.h +++ b/drivers/char/hw_random/n2rng.h @@ -68,7 +68,7 @@ struct n2rng { struct platform_device *op; unsigned long flags; -#define N2RNG_FLAG_VF 0x00000001 /* Victoria Falls RNG, else N2 */ +#define N2RNG_FLAG_MULTI 0x00000001 /* Multi-unit capable RNG */ #define N2RNG_FLAG_CONTROL 0x00000002 /* Operating in control domain */ #define N2RNG_FLAG_READY 0x00000008 /* Ready for hw-rng layer */ #define N2RNG_FLAG_SHUTDOWN 0x00000010 /* Driver unregistering */ diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 7beb0e25f1e1..caf8012ef47c 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -534,6 +534,7 @@ void tpm_get_timeouts(struct tpm_chip *chip) struct duration_t *duration_cap; ssize_t rc; u32 timeout; + unsigned int scale = 1; tpm_cmd.header.in = tpm_getcap_header; tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; @@ -545,24 +546,30 @@ void tpm_get_timeouts(struct tpm_chip *chip) if (rc) goto duration; - if (be32_to_cpu(tpm_cmd.header.out.length) - != 4 * sizeof(u32)) - goto duration; + if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 || + be32_to_cpu(tpm_cmd.header.out.length) + != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32)) + return; timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout; /* Don't overwrite default if value is 0 */ timeout = be32_to_cpu(timeout_cap->a); + if (timeout && timeout < 1000) { + /* timeouts in msec rather usec */ + scale = 1000; + chip->vendor.timeout_adjusted = true; + } if (timeout) - chip->vendor.timeout_a = usecs_to_jiffies(timeout); + chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale); timeout = be32_to_cpu(timeout_cap->b); if (timeout) - chip->vendor.timeout_b = usecs_to_jiffies(timeout); + chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale); timeout = be32_to_cpu(timeout_cap->c); if (timeout) - chip->vendor.timeout_c = usecs_to_jiffies(timeout); + chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale); timeout = be32_to_cpu(timeout_cap->d); if (timeout) - chip->vendor.timeout_d = usecs_to_jiffies(timeout); + chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale); duration: tpm_cmd.header.in = tpm_getcap_header; @@ -575,23 +582,31 @@ duration: if (rc) return; - if (be32_to_cpu(tpm_cmd.header.out.return_code) - != 3 * sizeof(u32)) + if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 || + be32_to_cpu(tpm_cmd.header.out.length) + != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32)) return; + duration_cap = &tpm_cmd.params.getcap_out.cap.duration; chip->vendor.duration[TPM_SHORT] = usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short)); + chip->vendor.duration[TPM_MEDIUM] = + usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium)); + chip->vendor.duration[TPM_LONG] = + usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long)); + /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above * value wrong and apparently reports msecs rather than usecs. So we * fix up the resulting too-small TPM_SHORT value to make things work. + * We also scale the TPM_MEDIUM and -_LONG values by 1000. */ - if (chip->vendor.duration[TPM_SHORT] < (HZ/100)) + if (chip->vendor.duration[TPM_SHORT] < (HZ / 100)) { chip->vendor.duration[TPM_SHORT] = HZ; - - chip->vendor.duration[TPM_MEDIUM] = - usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium)); - chip->vendor.duration[TPM_LONG] = - usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long)); + chip->vendor.duration[TPM_MEDIUM] *= 1000; + chip->vendor.duration[TPM_LONG] *= 1000; + chip->vendor.duration_adjusted = true; + dev_info(chip->dev, "Adjusting TPM timeout parameters."); + } } EXPORT_SYMBOL_GPL(tpm_get_timeouts); @@ -600,7 +615,7 @@ void tpm_continue_selftest(struct tpm_chip *chip) u8 data[] = { 0, 193, /* TPM_TAG_RQU_COMMAND */ 0, 0, 0, 10, /* length */ - 0, 0, 0, 83, /* TPM_ORD_GetCapability */ + 0, 0, 0, 83, /* TPM_ORD_ContinueSelfTest */ }; tpm_transmit(chip, data, sizeof(data)); @@ -863,18 +878,24 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr, data = tpm_cmd.params.readpubek_out_buffer; str += sprintf(str, - "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n" - "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X" - " %02X %02X %02X %02X %02X %02X %02X %02X\n" - "Modulus length: %d\nModulus: \n", - data[10], data[11], data[12], data[13], data[14], - data[15], data[16], data[17], data[22], data[23], - data[24], data[25], data[26], data[27], data[28], - data[29], data[30], data[31], data[32], data[33], - be32_to_cpu(*((__be32 *) (data + 34)))); + "Algorithm: %02X %02X %02X %02X\n" + "Encscheme: %02X %02X\n" + "Sigscheme: %02X %02X\n" + "Parameters: %02X %02X %02X %02X " + "%02X %02X %02X %02X " + "%02X %02X %02X %02X\n" + "Modulus length: %d\n" + "Modulus:\n", + data[0], data[1], data[2], data[3], + data[4], data[5], + data[6], data[7], + data[12], data[13], data[14], data[15], + data[16], data[17], data[18], data[19], + data[20], data[21], data[22], data[23], + be32_to_cpu(*((__be32 *) (data + 24)))); for (i = 0; i < 256; i++) { - str += sprintf(str, "%02X ", data[i + 38]); + str += sprintf(str, "%02X ", data[i + 28]); if ((i + 1) % 16 == 0) str += sprintf(str, "\n"); } @@ -937,6 +958,35 @@ ssize_t tpm_show_caps_1_2(struct device * dev, } EXPORT_SYMBOL_GPL(tpm_show_caps_1_2); +ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d %d %d [%s]\n", + jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]), + jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]), + jiffies_to_usecs(chip->vendor.duration[TPM_LONG]), + chip->vendor.duration_adjusted + ? "adjusted" : "original"); +} +EXPORT_SYMBOL_GPL(tpm_show_durations); + +ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d %d %d %d [%s]\n", + jiffies_to_usecs(chip->vendor.timeout_a), + jiffies_to_usecs(chip->vendor.timeout_b), + jiffies_to_usecs(chip->vendor.timeout_c), + jiffies_to_usecs(chip->vendor.timeout_d), + chip->vendor.timeout_adjusted + ? "adjusted" : "original"); +} +EXPORT_SYMBOL_GPL(tpm_show_timeouts); + ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 72ddb031b69a..9c4163cfa3ce 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -56,6 +56,10 @@ extern ssize_t tpm_show_owned(struct device *, struct device_attribute *attr, char *); extern ssize_t tpm_show_temp_deactivated(struct device *, struct device_attribute *attr, char *); +extern ssize_t tpm_show_durations(struct device *, + struct device_attribute *attr, char *); +extern ssize_t tpm_show_timeouts(struct device *, + struct device_attribute *attr, char *); struct tpm_chip; @@ -67,6 +71,7 @@ struct tpm_vendor_specific { unsigned long base; /* TPM base address */ int irq; + int probed_irq; int region_size; int have_region; @@ -81,7 +86,9 @@ struct tpm_vendor_specific { struct list_head list; int locality; unsigned long timeout_a, timeout_b, timeout_c, timeout_d; /* jiffies */ + bool timeout_adjusted; unsigned long duration[3]; /* jiffies */ + bool duration_adjusted; wait_queue_head_t read_queue; wait_queue_head_t int_queue; diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index a605cb7dd898..82facc9104c7 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -330,12 +330,12 @@ static int __init init_nsc(void) pdev->dev.driver = &nsc_drv.driver; pdev->dev.release = tpm_nsc_remove; - if ((rc = platform_device_register(pdev)) < 0) - goto err_free_dev; + if ((rc = platform_device_add(pdev)) < 0) + goto err_put_dev; if (request_region(base, 2, "tpm_nsc0") == NULL ) { rc = -EBUSY; - goto err_unreg_dev; + goto err_del_dev; } if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_nsc))) { @@ -382,10 +382,10 @@ static int __init init_nsc(void) err_rel_reg: release_region(base, 2); -err_unreg_dev: - platform_device_unregister(pdev); -err_free_dev: - kfree(pdev); +err_del_dev: + platform_device_del(pdev); +err_put_dev: + platform_device_put(pdev); err_unreg_drv: platform_driver_unregister(&nsc_drv); return rc; diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index dd21df55689d..7fc2f108f490 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -26,6 +26,7 @@ #include <linux/interrupt.h> #include <linux/wait.h> #include <linux/acpi.h> +#include <linux/freezer.h> #include "tpm.h" #define TPM_HEADER_SIZE 10 @@ -79,7 +80,7 @@ enum tis_defaults { static LIST_HEAD(tis_chips); static DEFINE_SPINLOCK(tis_lock); -#ifdef CONFIG_ACPI +#ifdef CONFIG_PNP static int is_itpm(struct pnp_dev *dev) { struct acpi_device *acpi = pnp_acpi_device(dev); @@ -92,11 +93,6 @@ static int is_itpm(struct pnp_dev *dev) return 0; } -#else -static int is_itpm(struct pnp_dev *dev) -{ - return 0; -} #endif static int check_locality(struct tpm_chip *chip, int l) @@ -120,7 +116,7 @@ static void release_locality(struct tpm_chip *chip, int l, int force) static int request_locality(struct tpm_chip *chip, int l) { - unsigned long stop; + unsigned long stop, timeout; long rc; if (check_locality(chip, l) >= 0) @@ -129,17 +125,25 @@ static int request_locality(struct tpm_chip *chip, int l) iowrite8(TPM_ACCESS_REQUEST_USE, chip->vendor.iobase + TPM_ACCESS(l)); + stop = jiffies + chip->vendor.timeout_a; + if (chip->vendor.irq) { +again: + timeout = stop - jiffies; + if ((long)timeout <= 0) + return -1; rc = wait_event_interruptible_timeout(chip->vendor.int_queue, (check_locality (chip, l) >= 0), - chip->vendor.timeout_a); + timeout); if (rc > 0) return l; - + if (rc == -ERESTARTSYS && freezing(current)) { + clear_thread_flag(TIF_SIGPENDING); + goto again; + } } else { /* wait for burstcount */ - stop = jiffies + chip->vendor.timeout_a; do { if (check_locality(chip, l) >= 0) return l; @@ -196,15 +200,24 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, if ((status & mask) == mask) return 0; + stop = jiffies + timeout; + if (chip->vendor.irq) { +again: + timeout = stop - jiffies; + if ((long)timeout <= 0) + return -ETIME; rc = wait_event_interruptible_timeout(*queue, ((tpm_tis_status (chip) & mask) == mask), timeout); if (rc > 0) return 0; + if (rc == -ERESTARTSYS && freezing(current)) { + clear_thread_flag(TIF_SIGPENDING); + goto again; + } } else { - stop = jiffies + timeout; do { msleep(TPM_TIMEOUT); status = tpm_tis_status(chip); @@ -288,11 +301,10 @@ MODULE_PARM_DESC(itpm, "Force iTPM workarounds (found on some Lenovo laptops)"); * tpm.c can skip polling for the data to be available as the interrupt is * waited for here */ -static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) +static int tpm_tis_send_data(struct tpm_chip *chip, u8 *buf, size_t len) { int rc, status, burstcnt; size_t count = 0; - u32 ordinal; if (request_locality(chip, 0) < 0) return -EBUSY; @@ -327,8 +339,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) /* write last byte */ iowrite8(buf[count], - chip->vendor.iobase + - TPM_DATA_FIFO(chip->vendor.locality)); + chip->vendor.iobase + TPM_DATA_FIFO(chip->vendor.locality)); wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, &chip->vendor.int_queue); status = tpm_tis_status(chip); @@ -337,6 +348,28 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) goto out_err; } + return 0; + +out_err: + tpm_tis_ready(chip); + release_locality(chip, chip->vendor.locality, 0); + return rc; +} + +/* + * If interrupts are used (signaled by an irq set in the vendor structure) + * tpm.c can skip polling for the data to be available as the interrupt is + * waited for here + */ +static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) +{ + int rc; + u32 ordinal; + + rc = tpm_tis_send_data(chip, buf, len); + if (rc < 0) + return rc; + /* go and do it */ iowrite8(TPM_STS_GO, chip->vendor.iobase + TPM_STS(chip->vendor.locality)); @@ -358,6 +391,47 @@ out_err: return rc; } +/* + * Early probing for iTPM with STS_DATA_EXPECT flaw. + * Try sending command without itpm flag set and if that + * fails, repeat with itpm flag set. + */ +static int probe_itpm(struct tpm_chip *chip) +{ + int rc = 0; + u8 cmd_getticks[] = { + 0x00, 0xc1, 0x00, 0x00, 0x00, 0x0a, + 0x00, 0x00, 0x00, 0xf1 + }; + size_t len = sizeof(cmd_getticks); + int rem_itpm = itpm; + + itpm = 0; + + rc = tpm_tis_send_data(chip, cmd_getticks, len); + if (rc == 0) + goto out; + + tpm_tis_ready(chip); + release_locality(chip, chip->vendor.locality, 0); + + itpm = 1; + + rc = tpm_tis_send_data(chip, cmd_getticks, len); + if (rc == 0) { + dev_info(chip->dev, "Detected an iTPM.\n"); + rc = 1; + } else + rc = -EFAULT; + +out: + itpm = rem_itpm; + tpm_tis_ready(chip); + release_locality(chip, chip->vendor.locality, 0); + + return rc; +} + static const struct file_operations tis_ops = { .owner = THIS_MODULE, .llseek = no_llseek, @@ -376,6 +450,8 @@ static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); +static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); +static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); static struct attribute *tis_attrs[] = { &dev_attr_pubek.attr, @@ -385,7 +461,9 @@ static struct attribute *tis_attrs[] = { &dev_attr_owned.attr, &dev_attr_temp_deactivated.attr, &dev_attr_caps.attr, - &dev_attr_cancel.attr, NULL, + &dev_attr_cancel.attr, + &dev_attr_durations.attr, + &dev_attr_timeouts.attr, NULL, }; static struct attribute_group tis_attr_grp = { @@ -416,7 +494,7 @@ static irqreturn_t tis_int_probe(int irq, void *dev_id) if (interrupt == 0) return IRQ_NONE; - chip->vendor.irq = irq; + chip->vendor.probed_irq = irq; /* Clear interrupts handled with TPM_EOI */ iowrite32(interrupt, @@ -464,7 +542,7 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, resource_size_t len, unsigned int irq) { u32 vendor, intfcaps, intmask; - int rc, i; + int rc, i, irq_s, irq_e; struct tpm_chip *chip; if (!(chip = tpm_register_hardware(dev, &tpm_tis))) @@ -493,6 +571,14 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, "1.2 TPM (device-id 0x%X, rev-id %d)\n", vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); + if (!itpm) { + itpm = probe_itpm(chip); + if (itpm < 0) { + rc = -ENODEV; + goto out_err; + } + } + if (itpm) dev_info(dev, "Intel iTPM workaround enabled\n"); @@ -522,6 +608,9 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, if (intfcaps & TPM_INTF_DATA_AVAIL_INT) dev_dbg(dev, "\tData Avail Int Support\n"); + /* get the timeouts before testing for irqs */ + tpm_get_timeouts(chip); + /* INTERRUPT Setup */ init_waitqueue_head(&chip->vendor.read_queue); init_waitqueue_head(&chip->vendor.int_queue); @@ -540,13 +629,19 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, if (interrupts) chip->vendor.irq = irq; if (interrupts && !chip->vendor.irq) { - chip->vendor.irq = + irq_s = ioread8(chip->vendor.iobase + TPM_INT_VECTOR(chip->vendor.locality)); + if (irq_s) { + irq_e = irq_s; + } else { + irq_s = 3; + irq_e = 15; + } - for (i = 3; i < 16 && chip->vendor.irq == 0; i++) { + for (i = irq_s; i <= irq_e && chip->vendor.irq == 0; i++) { iowrite8(i, chip->vendor.iobase + - TPM_INT_VECTOR(chip->vendor.locality)); + TPM_INT_VECTOR(chip->vendor.locality)); if (request_irq (i, tis_int_probe, IRQF_SHARED, chip->vendor.miscdev.name, chip) != 0) { @@ -568,9 +663,22 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); + chip->vendor.probed_irq = 0; + /* Generate Interrupts */ tpm_gen_interrupt(chip); + chip->vendor.irq = chip->vendor.probed_irq; + + /* free_irq will call into tis_int_probe; + clear all irqs we haven't seen while doing + tpm_gen_interrupt */ + iowrite32(ioread32 + (chip->vendor.iobase + + TPM_INT_STATUS(chip->vendor.locality)), + chip->vendor.iobase + + TPM_INT_STATUS(chip->vendor.locality)); + /* Turn off */ iowrite32(intmask, chip->vendor.iobase + @@ -609,7 +717,6 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, list_add(&chip->vendor.list, &tis_chips); spin_unlock(&tis_lock); - tpm_get_timeouts(chip); tpm_continue_selftest(chip); return 0; @@ -619,6 +726,29 @@ out_err: tpm_remove_hardware(chip->dev); return rc; } + +static void tpm_tis_reenable_interrupts(struct tpm_chip *chip) +{ + u32 intmask; + + /* reenable interrupts that device may have lost or + BIOS/firmware may have disabled */ + iowrite8(chip->vendor.irq, chip->vendor.iobase + + TPM_INT_VECTOR(chip->vendor.locality)); + + intmask = + ioread32(chip->vendor.iobase + + TPM_INT_ENABLE(chip->vendor.locality)); + + intmask |= TPM_INTF_CMD_READY_INT + | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT + | TPM_INTF_STS_VALID_INT | TPM_GLOBAL_INT_ENABLE; + + iowrite32(intmask, + chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); +} + + #ifdef CONFIG_PNP static int __devinit tpm_tis_pnp_init(struct pnp_dev *pnp_dev, const struct pnp_device_id *pnp_id) @@ -650,6 +780,9 @@ static int tpm_tis_pnp_resume(struct pnp_dev *dev) struct tpm_chip *chip = pnp_get_drvdata(dev); int ret; + if (chip->vendor.irq) + tpm_tis_reenable_interrupts(chip); + ret = tpm_pm_resume(&dev->dev); if (!ret) tpm_continue_selftest(chip); @@ -702,6 +835,11 @@ static int tpm_tis_suspend(struct platform_device *dev, pm_message_t msg) static int tpm_tis_resume(struct platform_device *dev) { + struct tpm_chip *chip = dev_get_drvdata(&dev->dev); + + if (chip->vendor.irq) + tpm_tis_reenable_interrupts(chip); + return tpm_pm_resume(&dev->dev); } static struct platform_driver tis_drv = { diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 2e5b2044c96f..d0183ddb3076 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1,6 +1,6 @@ /* n2_core.c: Niagara2 Stream Processing Unit (SPU) crypto support. * - * Copyright (C) 2010 David S. Miller <davem@davemloft.net> + * Copyright (C) 2010, 2011 David S. Miller <davem@davemloft.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -31,8 +31,8 @@ #include "n2_core.h" #define DRV_MODULE_NAME "n2_crypto" -#define DRV_MODULE_VERSION "0.1" -#define DRV_MODULE_RELDATE "April 29, 2010" +#define DRV_MODULE_VERSION "0.2" +#define DRV_MODULE_RELDATE "July 28, 2011" static char version[] __devinitdata = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; @@ -1823,22 +1823,17 @@ static int spu_mdesc_scan(struct mdesc_handle *mdesc, struct platform_device *de static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node, struct spu_mdesc_info *ip) { - const u64 *intr, *ino; - int intr_len, ino_len; + const u64 *ino; + int ino_len; int i; - intr = mdesc_get_property(mdesc, node, "intr", &intr_len); - if (!intr) - return -ENODEV; - ino = mdesc_get_property(mdesc, node, "ino", &ino_len); - if (!ino) + if (!ino) { + printk("NO 'ino'\n"); return -ENODEV; + } - if (intr_len != ino_len) - return -EINVAL; - - ip->num_intrs = intr_len / sizeof(u64); + ip->num_intrs = ino_len / sizeof(u64); ip->ino_table = kzalloc((sizeof(struct ino_blob) * ip->num_intrs), GFP_KERNEL); @@ -1847,7 +1842,7 @@ static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node, for (i = 0; i < ip->num_intrs; i++) { struct ino_blob *b = &ip->ino_table[i]; - b->intr = intr[i]; + b->intr = i + 1; b->ino = ino[i]; } @@ -2204,6 +2199,10 @@ static struct of_device_id n2_crypto_match[] = { .name = "n2cp", .compatible = "SUNW,vf-cwq", }, + { + .name = "n2cp", + .compatible = "SUNW,kt-cwq", + }, {}, }; @@ -2228,6 +2227,10 @@ static struct of_device_id n2_mau_match[] = { .name = "ncp", .compatible = "SUNW,vf-mau", }, + { + .name = "ncp", + .compatible = "SUNW,kt-mau", + }, {}, }; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 95a08a8ca8aa..5745b7fe158c 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -271,7 +271,7 @@ int iser_send_command(struct iscsi_conn *conn, unsigned long edtl; int err; struct iser_data_buf *data_buf; - struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; + struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; struct scsi_cmnd *sc = task->sc; struct iser_tx_desc *tx_desc = &iser_task->desc; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 56abf3d0e911..d72887585a14 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -154,10 +154,13 @@ static const struct xpad_device { { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, { 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, + { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 }, { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 }, + { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, @@ -236,9 +239,10 @@ static struct usb_device_id xpad_table [] = { XPAD_XBOX360_VENDOR(0x046d), /* Logitech X-Box 360 style controllers */ XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f X-Box 360 controllers */ + XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x146b), /* BigBen Interactive Controllers */ - XPAD_XBOX360_VENDOR(0x1bad), /* Rock Band Drums */ + XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori Controllers */ { } }; @@ -545,7 +549,7 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad) struct usb_endpoint_descriptor *ep_irq_out; int error; - if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX) + if (xpad->xtype == XTYPE_UNKNOWN) return 0; xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN, @@ -579,13 +583,13 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad) static void xpad_stop_output(struct usb_xpad *xpad) { - if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX) + if (xpad->xtype != XTYPE_UNKNOWN) usb_kill_urb(xpad->irq_out); } static void xpad_deinit_output(struct usb_xpad *xpad) { - if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX) { + if (xpad->xtype != XTYPE_UNKNOWN) { usb_free_urb(xpad->irq_out); usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->odata, xpad->odata_dma); @@ -632,6 +636,23 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); + case XTYPE_XBOX360W: + xpad->odata[0] = 0x00; + xpad->odata[1] = 0x01; + xpad->odata[2] = 0x0F; + xpad->odata[3] = 0xC0; + xpad->odata[4] = 0x00; + xpad->odata[5] = strong / 256; + xpad->odata[6] = weak / 256; + xpad->odata[7] = 0x00; + xpad->odata[8] = 0x00; + xpad->odata[9] = 0x00; + xpad->odata[10] = 0x00; + xpad->odata[11] = 0x00; + xpad->irq_out->transfer_buffer_length = 12; + + return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); + default: dbg("%s - rumble command sent to unsupported xpad type: %d", __func__, xpad->xtype); @@ -644,7 +665,7 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect static int xpad_init_ff(struct usb_xpad *xpad) { - if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX) + if (xpad->xtype == XTYPE_UNKNOWN) return 0; input_set_capability(xpad->dev, EV_FF, FF_RUMBLE); diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index af45d275f686..7b404e5443ed 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -9,7 +9,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/irq.h> diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c index 631598663aab..c7708263051b 100644 --- a/drivers/input/keyboard/adp5589-keys.c +++ b/drivers/input/keyboard/adp5589-keys.c @@ -8,7 +8,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/irq.h> diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 11478eb2c27d..19cfc0cf558c 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -1578,14 +1578,14 @@ static int __init atkbd_setup_forced_release(const struct dmi_system_id *id) atkbd_platform_fixup = atkbd_apply_forced_release_keylist; atkbd_platform_fixup_data = id->driver_data; - return 0; + return 1; } static int __init atkbd_setup_scancode_fixup(const struct dmi_system_id *id) { atkbd_platform_scancode_fixup = id->driver_data; - return 0; + return 1; } static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = { diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 6e6145b9a4c1..ce281d152275 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -2,6 +2,7 @@ * Driver for keys on GPIO lines capable of generating interrupts. * * Copyright 2005 Phil Blundell + * Copyright 2010, 2011 David Jander <david@protonic.nl> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -25,6 +26,8 @@ #include <linux/gpio_keys.h> #include <linux/workqueue.h> #include <linux/gpio.h> +#include <linux/of_platform.h> +#include <linux/of_gpio.h> struct gpio_button_data { struct gpio_keys_button *button; @@ -415,7 +418,7 @@ static int __devinit gpio_keys_setup_key(struct platform_device *pdev, if (!button->can_disable) irqflags |= IRQF_SHARED; - error = request_any_context_irq(irq, gpio_keys_isr, irqflags, desc, bdata); + error = request_threaded_irq(irq, NULL, gpio_keys_isr, irqflags, desc, bdata); if (error < 0) { dev_err(dev, "Unable to claim irq %d; error %d\n", irq, error); @@ -445,15 +448,120 @@ static void gpio_keys_close(struct input_dev *input) ddata->disable(input->dev.parent); } +/* + * Handlers for alternative sources of platform_data + */ +#ifdef CONFIG_OF +/* + * Translate OpenFirmware node properties into platform_data + */ +static int gpio_keys_get_devtree_pdata(struct device *dev, + struct gpio_keys_platform_data *pdata) +{ + struct device_node *node, *pp; + int i; + struct gpio_keys_button *buttons; + const u32 *reg; + int len; + + node = dev->of_node; + if (node == NULL) + return -ENODEV; + + memset(pdata, 0, sizeof *pdata); + + pdata->rep = !!of_get_property(node, "autorepeat", &len); + + /* First count the subnodes */ + pdata->nbuttons = 0; + pp = NULL; + while ((pp = of_get_next_child(node, pp))) + pdata->nbuttons++; + + if (pdata->nbuttons == 0) + return -ENODEV; + + buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL); + if (!buttons) + return -ENODEV; + + pp = NULL; + i = 0; + while ((pp = of_get_next_child(node, pp))) { + enum of_gpio_flags flags; + + if (!of_find_property(pp, "gpios", NULL)) { + pdata->nbuttons--; + dev_warn(dev, "Found button without gpios\n"); + continue; + } + buttons[i].gpio = of_get_gpio_flags(pp, 0, &flags); + buttons[i].active_low = flags & OF_GPIO_ACTIVE_LOW; + + reg = of_get_property(pp, "linux,code", &len); + if (!reg) { + dev_err(dev, "Button without keycode: 0x%x\n", buttons[i].gpio); + goto out_fail; + } + buttons[i].code = be32_to_cpup(reg); + + buttons[i].desc = of_get_property(pp, "label", &len); + + reg = of_get_property(pp, "linux,input-type", &len); + buttons[i].type = reg ? be32_to_cpup(reg) : EV_KEY; + + buttons[i].wakeup = !!of_get_property(pp, "gpio-key,wakeup", NULL); + + reg = of_get_property(pp, "debounce-interval", &len); + buttons[i].debounce_interval = reg ? be32_to_cpup(reg) : 5; + + i++; + } + + pdata->buttons = buttons; + + return 0; + +out_fail: + kfree(buttons); + return -ENODEV; +} + +static struct of_device_id gpio_keys_of_match[] = { + { .compatible = "gpio-keys", }, + { }, +}; +MODULE_DEVICE_TABLE(of, gpio_keys_of_match); + +#else + +static int gpio_keys_get_devtree_pdata(struct device *dev, + struct gpio_keys_platform_data *altp) +{ + return -ENODEV; +} + +#define gpio_keys_of_match NULL + +#endif + static int __devinit gpio_keys_probe(struct platform_device *pdev) { struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; struct gpio_keys_drvdata *ddata; struct device *dev = &pdev->dev; + struct gpio_keys_platform_data alt_pdata; struct input_dev *input; int i, error; int wakeup = 0; + if (!pdata) { + error = gpio_keys_get_devtree_pdata(dev, &alt_pdata); + if (error) + return error; + pdata = &alt_pdata; + } + ddata = kzalloc(sizeof(struct gpio_keys_drvdata) + pdata->nbuttons * sizeof(struct gpio_button_data), GFP_KERNEL); @@ -544,13 +652,15 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) fail1: input_free_device(input); kfree(ddata); + /* If we have no platform_data, we allocated buttons dynamically. */ + if (!pdev->dev.platform_data) + kfree(pdata->buttons); return error; } static int __devexit gpio_keys_remove(struct platform_device *pdev) { - struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev); struct input_dev *input = ddata->input; int i; @@ -559,31 +669,39 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 0); - for (i = 0; i < pdata->nbuttons; i++) { - int irq = gpio_to_irq(pdata->buttons[i].gpio); + for (i = 0; i < ddata->n_buttons; i++) { + int irq = gpio_to_irq(ddata->data[i].button->gpio); free_irq(irq, &ddata->data[i]); if (ddata->data[i].timer_debounce) del_timer_sync(&ddata->data[i].timer); cancel_work_sync(&ddata->data[i].work); - gpio_free(pdata->buttons[i].gpio); + gpio_free(ddata->data[i].button->gpio); } input_unregister_device(input); + /* + * If we had no platform_data, we allocated buttons dynamically, and + * must free them here. ddata->data[0].button is the pointer to the + * beginning of the allocated array. + */ + if (!pdev->dev.platform_data) + kfree(ddata->data[0].button); + + kfree(ddata); + return 0; } - -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int gpio_keys_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; + struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev); int i; - if (device_may_wakeup(&pdev->dev)) { - for (i = 0; i < pdata->nbuttons; i++) { - struct gpio_keys_button *button = &pdata->buttons[i]; + if (device_may_wakeup(dev)) { + for (i = 0; i < ddata->n_buttons; i++) { + struct gpio_keys_button *button = ddata->data[i].button; if (button->wakeup) { int irq = gpio_to_irq(button->gpio); enable_irq_wake(irq); @@ -596,15 +714,13 @@ static int gpio_keys_suspend(struct device *dev) static int gpio_keys_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev); - struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; + struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev); int i; - for (i = 0; i < pdata->nbuttons; i++) { + for (i = 0; i < ddata->n_buttons; i++) { - struct gpio_keys_button *button = &pdata->buttons[i]; - if (button->wakeup && device_may_wakeup(&pdev->dev)) { + struct gpio_keys_button *button = ddata->data[i].button; + if (button->wakeup && device_may_wakeup(dev)) { int irq = gpio_to_irq(button->gpio); disable_irq_wake(irq); } @@ -615,22 +731,18 @@ static int gpio_keys_resume(struct device *dev) return 0; } - -static const struct dev_pm_ops gpio_keys_pm_ops = { - .suspend = gpio_keys_suspend, - .resume = gpio_keys_resume, -}; #endif +static SIMPLE_DEV_PM_OPS(gpio_keys_pm_ops, gpio_keys_suspend, gpio_keys_resume); + static struct platform_driver gpio_keys_device_driver = { .probe = gpio_keys_probe, .remove = __devexit_p(gpio_keys_remove), .driver = { .name = "gpio-keys", .owner = THIS_MODULE, -#ifdef CONFIG_PM .pm = &gpio_keys_pm_ops, -#endif + .of_match_table = gpio_keys_of_match, } }; @@ -644,10 +756,10 @@ static void __exit gpio_keys_exit(void) platform_driver_unregister(&gpio_keys_device_driver); } -module_init(gpio_keys_init); +late_initcall(gpio_keys_init); module_exit(gpio_keys_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Phil Blundell <pb@handhelds.org>"); -MODULE_DESCRIPTION("Keyboard driver for CPU GPIOs"); +MODULE_DESCRIPTION("Keyboard driver for GPIOs"); MODULE_ALIAS("platform:gpio-keys"); diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c index 71f744a8e686..ab0acaf7fe8f 100644 --- a/drivers/input/keyboard/lm8323.c +++ b/drivers/input/keyboard/lm8323.c @@ -146,7 +146,6 @@ struct lm8323_chip { /* device lock */ struct mutex lock; struct i2c_client *client; - struct work_struct work; struct input_dev *idev; bool kp_enabled; bool pm_suspend; @@ -162,7 +161,6 @@ struct lm8323_chip { #define client_to_lm8323(c) container_of(c, struct lm8323_chip, client) #define dev_to_lm8323(d) container_of(d, struct lm8323_chip, client->dev) -#define work_to_lm8323(w) container_of(w, struct lm8323_chip, work) #define cdev_to_pwm(c) container_of(c, struct lm8323_pwm, cdev) #define work_to_pwm(w) container_of(w, struct lm8323_pwm, work) @@ -375,9 +373,9 @@ static void pwm_done(struct lm8323_pwm *pwm) * Bottom half: handle the interrupt by posting key events, or dealing with * errors appropriately. */ -static void lm8323_work(struct work_struct *work) +static irqreturn_t lm8323_irq(int irq, void *_lm) { - struct lm8323_chip *lm = work_to_lm8323(work); + struct lm8323_chip *lm = _lm; u8 ints; int i; @@ -409,16 +407,6 @@ static void lm8323_work(struct work_struct *work) } mutex_unlock(&lm->lock); -} - -/* - * We cannot use I2C in interrupt context, so we just schedule work. - */ -static irqreturn_t lm8323_irq(int irq, void *data) -{ - struct lm8323_chip *lm = data; - - schedule_work(&lm->work); return IRQ_HANDLED; } @@ -675,7 +663,6 @@ static int __devinit lm8323_probe(struct i2c_client *client, lm->client = client; lm->idev = idev; mutex_init(&lm->lock); - INIT_WORK(&lm->work, lm8323_work); lm->size_x = pdata->size_x; lm->size_y = pdata->size_y; @@ -746,9 +733,8 @@ static int __devinit lm8323_probe(struct i2c_client *client, goto fail3; } - err = request_irq(client->irq, lm8323_irq, - IRQF_TRIGGER_FALLING | IRQF_DISABLED, - "lm8323", lm); + err = request_threaded_irq(client->irq, NULL, lm8323_irq, + IRQF_TRIGGER_LOW|IRQF_ONESHOT, "lm8323", lm); if (err) { dev_err(&client->dev, "could not get IRQ %d\n", client->irq); goto fail4; @@ -783,7 +769,6 @@ static int __devexit lm8323_remove(struct i2c_client *client) disable_irq_wake(client->irq); free_irq(client->irq, lm); - cancel_work_sync(&lm->work); input_unregister_device(lm->idev); diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c index 0a9e81194888..1c1615d9a7f9 100644 --- a/drivers/input/keyboard/mpr121_touchkey.c +++ b/drivers/input/keyboard/mpr121_touchkey.c @@ -43,14 +43,15 @@ * enabled capacitance sensing inputs and its run/suspend mode. */ #define ELECTRODE_CONF_ADDR 0x5e +#define ELECTRODE_CONF_QUICK_CHARGE 0x80 #define AUTO_CONFIG_CTRL_ADDR 0x7b #define AUTO_CONFIG_USL_ADDR 0x7d #define AUTO_CONFIG_LSL_ADDR 0x7e #define AUTO_CONFIG_TL_ADDR 0x7f /* Threshold of touch/release trigger */ -#define TOUCH_THRESHOLD 0x0f -#define RELEASE_THRESHOLD 0x0a +#define TOUCH_THRESHOLD 0x08 +#define RELEASE_THRESHOLD 0x05 /* Masks for touch and release triggers */ #define TOUCH_STATUS_MASK 0xfff /* MPR121 has 12 keys */ @@ -127,7 +128,7 @@ static int __devinit mpr121_phys_init(const struct mpr121_platform_data *pdata, struct i2c_client *client) { const struct mpr121_init_register *reg; - unsigned char usl, lsl, tl; + unsigned char usl, lsl, tl, eleconf; int i, t, vdd, ret; /* Set up touch/release threshold for ele0-ele11 */ @@ -163,8 +164,15 @@ static int __devinit mpr121_phys_init(const struct mpr121_platform_data *pdata, ret = i2c_smbus_write_byte_data(client, AUTO_CONFIG_USL_ADDR, usl); ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_LSL_ADDR, lsl); ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_TL_ADDR, tl); + + /* + * Quick charge bit will let the capacitive charge to ready + * state quickly, or the buttons may not function after system + * boot. + */ + eleconf = mpr121->keycount | ELECTRODE_CONF_QUICK_CHARGE; ret |= i2c_smbus_write_byte_data(client, ELECTRODE_CONF_ADDR, - mpr121->keycount); + eleconf); if (ret != 0) goto err_i2c_write; diff --git a/drivers/input/keyboard/pmic8xxx-keypad.c b/drivers/input/keyboard/pmic8xxx-keypad.c index 6229c3e8e78b..e7cc51d0fb34 100644 --- a/drivers/input/keyboard/pmic8xxx-keypad.c +++ b/drivers/input/keyboard/pmic8xxx-keypad.c @@ -700,9 +700,9 @@ static int __devinit pmic8xxx_kp_probe(struct platform_device *pdev) return 0; err_pmic_reg_read: - free_irq(kp->key_stuck_irq, NULL); + free_irq(kp->key_stuck_irq, kp); err_req_stuck_irq: - free_irq(kp->key_sense_irq, NULL); + free_irq(kp->key_sense_irq, kp); err_gpio_config: err_get_irq: input_free_device(kp->input); @@ -717,8 +717,8 @@ static int __devexit pmic8xxx_kp_remove(struct platform_device *pdev) struct pmic8xxx_kp *kp = platform_get_drvdata(pdev); device_init_wakeup(&pdev->dev, 0); - free_irq(kp->key_stuck_irq, NULL); - free_irq(kp->key_sense_irq, NULL); + free_irq(kp->key_stuck_irq, kp); + free_irq(kp->key_sense_irq, kp); input_unregister_device(kp->input); kfree(kp); diff --git a/drivers/input/keyboard/qt1070.c b/drivers/input/keyboard/qt1070.c index ca7b89196ab7..b21bf5b876bb 100644 --- a/drivers/input/keyboard/qt1070.c +++ b/drivers/input/keyboard/qt1070.c @@ -239,8 +239,6 @@ static int __devexit qt1070_remove(struct i2c_client *client) input_unregister_device(data->input); kfree(data); - i2c_set_clientdata(client, NULL); - return 0; } diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c index 6876700a4469..934aeb583b30 100644 --- a/drivers/input/keyboard/sh_keysc.c +++ b/drivers/input/keyboard/sh_keysc.c @@ -291,7 +291,7 @@ static int __devexit sh_keysc_remove(struct platform_device *pdev) return 0; } -#if CONFIG_PM_SLEEP +#ifdef CONFIG_PM_SLEEP static int sh_keysc_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index 2b3b73ec6689..da3828fc2c09 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -657,7 +657,7 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev) input_set_drvdata(input_dev, kbc); - input_dev->evbit[0] = BIT_MASK(EV_KEY); + input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); input_set_capability(input_dev, EV_MSC, MSC_SCAN); input_dev->keycode = kbc->keycode; diff --git a/drivers/input/keyboard/tnetv107x-keypad.c b/drivers/input/keyboard/tnetv107x-keypad.c index c8f097a15d89..1c58681de81f 100644 --- a/drivers/input/keyboard/tnetv107x-keypad.c +++ b/drivers/input/keyboard/tnetv107x-keypad.c @@ -337,5 +337,5 @@ module_exit(keypad_exit); MODULE_AUTHOR("Cyril Chemparathy"); MODULE_DESCRIPTION("TNETV107X Keypad Driver"); -MODULE_ALIAS("platform: tnetv107x-keypad"); +MODULE_ALIAS("platform:tnetv107x-keypad"); MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index d1bf8724b58f..c9104bb4db06 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -100,6 +100,27 @@ config INPUT_MAX8925_ONKEY To compile this driver as a module, choose M here: the module will be called max8925_onkey. +config INPUT_MMA8450 + tristate "MMA8450 - Freescale's 3-Axis, 8/12-bit Digital Accelerometer" + depends on I2C + select INPUT_POLLDEV + help + Say Y here if you want to support Freescale's MMA8450 Accelerometer + through I2C interface. + + To compile this driver as a module, choose M here: the + module will be called mma8450. + +config INPUT_MPU3050 + tristate "MPU3050 Triaxial gyroscope sensor" + depends on I2C + help + Say Y here if you want to support InvenSense MPU3050 + connected via an I2C bus. + + To compile this driver as a module, choose M here: the + module will be called mpu3050. + config INPUT_APANEL tristate "Fujitsu Lifebook Application Panel buttons" depends on X86 && I2C && LEDS_CLASS @@ -209,6 +230,23 @@ config INPUT_KEYSPAN_REMOTE To compile this driver as a module, choose M here: the module will be called keyspan_remote. +config INPUT_KXTJ9 + tristate "Kionix KXTJ9 tri-axis digital accelerometer" + depends on I2C + help + Say Y here to enable support for the Kionix KXTJ9 digital tri-axis + accelerometer. + + To compile this driver as a module, choose M here: the module will + be called kxtj9. + +config INPUT_KXTJ9_POLLED_MODE + bool "Enable polling mode support" + depends on INPUT_KXTJ9 + select INPUT_POLLDEV + help + Say Y here if you need accelerometer to work in polling mode. + config INPUT_POWERMATE tristate "Griffin PowerMate and Contour Jog support" depends on USB_ARCH_HAS_HCD diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 4da7c3a60e04..299ad5edba84 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -25,8 +25,11 @@ obj-$(CONFIG_INPUT_DM355EVM) += dm355evm_keys.o obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o obj-$(CONFIG_INPUT_IXP4XX_BEEPER) += ixp4xx-beeper.o obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o +obj-$(CONFIG_INPUT_KXTJ9) += kxtj9.o obj-$(CONFIG_INPUT_M68K_BEEP) += m68kspkr.o obj-$(CONFIG_INPUT_MAX8925_ONKEY) += max8925_onkey.o +obj-$(CONFIG_INPUT_MMA8450) += mma8450.o +obj-$(CONFIG_INPUT_MPU3050) += mpu3050.o obj-$(CONFIG_INPUT_PCAP) += pcap_keys.o obj-$(CONFIG_INPUT_PCF50633_PMU) += pcf50633-input.o obj-$(CONFIG_INPUT_PCF8574) += pcf8574_keypad.o @@ -46,4 +49,3 @@ obj-$(CONFIG_INPUT_WISTRON_BTNS) += wistron_btns.o obj-$(CONFIG_INPUT_WM831X_ON) += wm831x-on.o obj-$(CONFIG_INPUT_XEN_KBDDEV_FRONTEND) += xen-kbdfront.o obj-$(CONFIG_INPUT_YEALINK) += yealink.o - diff --git a/drivers/input/misc/bfin_rotary.c b/drivers/input/misc/bfin_rotary.c index 4f72bdd69410..d00edc9f39d1 100644 --- a/drivers/input/misc/bfin_rotary.c +++ b/drivers/input/misc/bfin_rotary.c @@ -6,7 +6,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/irq.h> diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c new file mode 100644 index 000000000000..c456f63b6bae --- /dev/null +++ b/drivers/input/misc/kxtj9.c @@ -0,0 +1,671 @@ +/* + * Copyright (C) 2011 Kionix, Inc. + * Written by Chris Hudson <chudson@kionix.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307, USA + */ + +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/input/kxtj9.h> +#include <linux/input-polldev.h> + +#define NAME "kxtj9" +#define G_MAX 8000 +/* OUTPUT REGISTERS */ +#define XOUT_L 0x06 +#define WHO_AM_I 0x0F +/* CONTROL REGISTERS */ +#define INT_REL 0x1A +#define CTRL_REG1 0x1B +#define INT_CTRL1 0x1E +#define DATA_CTRL 0x21 +/* CONTROL REGISTER 1 BITS */ +#define PC1_OFF 0x7F +#define PC1_ON (1 << 7) +/* Data ready funtion enable bit: set during probe if using irq mode */ +#define DRDYE (1 << 5) +/* INTERRUPT CONTROL REGISTER 1 BITS */ +/* Set these during probe if using irq mode */ +#define KXTJ9_IEL (1 << 3) +#define KXTJ9_IEA (1 << 4) +#define KXTJ9_IEN (1 << 5) +/* INPUT_ABS CONSTANTS */ +#define FUZZ 3 +#define FLAT 3 +/* RESUME STATE INDICES */ +#define RES_DATA_CTRL 0 +#define RES_CTRL_REG1 1 +#define RES_INT_CTRL1 2 +#define RESUME_ENTRIES 3 + +/* + * The following table lists the maximum appropriate poll interval for each + * available output data rate. + */ +static const struct { + unsigned int cutoff; + u8 mask; +} kxtj9_odr_table[] = { + { 3, ODR800F }, + { 5, ODR400F }, + { 10, ODR200F }, + { 20, ODR100F }, + { 40, ODR50F }, + { 80, ODR25F }, + { 0, ODR12_5F}, +}; + +struct kxtj9_data { + struct i2c_client *client; + struct kxtj9_platform_data pdata; + struct input_dev *input_dev; +#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE + struct input_polled_dev *poll_dev; +#endif + unsigned int last_poll_interval; + u8 shift; + u8 ctrl_reg1; + u8 data_ctrl; + u8 int_ctrl; +}; + +static int kxtj9_i2c_read(struct kxtj9_data *tj9, u8 addr, u8 *data, int len) +{ + struct i2c_msg msgs[] = { + { + .addr = tj9->client->addr, + .flags = tj9->client->flags, + .len = 1, + .buf = &addr, + }, + { + .addr = tj9->client->addr, + .flags = tj9->client->flags | I2C_M_RD, + .len = len, + .buf = data, + }, + }; + + return i2c_transfer(tj9->client->adapter, msgs, 2); +} + +static void kxtj9_report_acceleration_data(struct kxtj9_data *tj9) +{ + s16 acc_data[3]; /* Data bytes from hardware xL, xH, yL, yH, zL, zH */ + s16 x, y, z; + int err; + + err = kxtj9_i2c_read(tj9, XOUT_L, (u8 *)acc_data, 6); + if (err < 0) + dev_err(&tj9->client->dev, "accelerometer data read failed\n"); + + x = le16_to_cpu(acc_data[tj9->pdata.axis_map_x]) >> tj9->shift; + y = le16_to_cpu(acc_data[tj9->pdata.axis_map_y]) >> tj9->shift; + z = le16_to_cpu(acc_data[tj9->pdata.axis_map_z]) >> tj9->shift; + + input_report_abs(tj9->input_dev, ABS_X, tj9->pdata.negate_x ? -x : x); + input_report_abs(tj9->input_dev, ABS_Y, tj9->pdata.negate_y ? -y : y); + input_report_abs(tj9->input_dev, ABS_Z, tj9->pdata.negate_z ? -z : z); + input_sync(tj9->input_dev); +} + +static irqreturn_t kxtj9_isr(int irq, void *dev) +{ + struct kxtj9_data *tj9 = dev; + int err; + + /* data ready is the only possible interrupt type */ + kxtj9_report_acceleration_data(tj9); + + err = i2c_smbus_read_byte_data(tj9->client, INT_REL); + if (err < 0) + dev_err(&tj9->client->dev, + "error clearing interrupt status: %d\n", err); + + return IRQ_HANDLED; +} + +static int kxtj9_update_g_range(struct kxtj9_data *tj9, u8 new_g_range) +{ + switch (new_g_range) { + case KXTJ9_G_2G: + tj9->shift = 4; + break; + case KXTJ9_G_4G: + tj9->shift = 3; + break; + case KXTJ9_G_8G: + tj9->shift = 2; + break; + default: + return -EINVAL; + } + + tj9->ctrl_reg1 &= 0xe7; + tj9->ctrl_reg1 |= new_g_range; + + return 0; +} + +static int kxtj9_update_odr(struct kxtj9_data *tj9, unsigned int poll_interval) +{ + int err; + int i; + + /* Use the lowest ODR that can support the requested poll interval */ + for (i = 0; i < ARRAY_SIZE(kxtj9_odr_table); i++) { + tj9->data_ctrl = kxtj9_odr_table[i].mask; + if (poll_interval < kxtj9_odr_table[i].cutoff) + break; + } + + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0); + if (err < 0) + return err; + + err = i2c_smbus_write_byte_data(tj9->client, DATA_CTRL, tj9->data_ctrl); + if (err < 0) + return err; + + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1); + if (err < 0) + return err; + + return 0; +} + +static int kxtj9_device_power_on(struct kxtj9_data *tj9) +{ + if (tj9->pdata.power_on) + return tj9->pdata.power_on(); + + return 0; +} + +static void kxtj9_device_power_off(struct kxtj9_data *tj9) +{ + int err; + + tj9->ctrl_reg1 &= PC1_OFF; + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1); + if (err < 0) + dev_err(&tj9->client->dev, "soft power off failed\n"); + + if (tj9->pdata.power_off) + tj9->pdata.power_off(); +} + +static int kxtj9_enable(struct kxtj9_data *tj9) +{ + int err; + + err = kxtj9_device_power_on(tj9); + if (err < 0) + return err; + + /* ensure that PC1 is cleared before updating control registers */ + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0); + if (err < 0) + return err; + + /* only write INT_CTRL_REG1 if in irq mode */ + if (tj9->client->irq) { + err = i2c_smbus_write_byte_data(tj9->client, + INT_CTRL1, tj9->int_ctrl); + if (err < 0) + return err; + } + + err = kxtj9_update_g_range(tj9, tj9->pdata.g_range); + if (err < 0) + return err; + + /* turn on outputs */ + tj9->ctrl_reg1 |= PC1_ON; + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1); + if (err < 0) + return err; + + err = kxtj9_update_odr(tj9, tj9->last_poll_interval); + if (err < 0) + return err; + + /* clear initial interrupt if in irq mode */ + if (tj9->client->irq) { + err = i2c_smbus_read_byte_data(tj9->client, INT_REL); + if (err < 0) { + dev_err(&tj9->client->dev, + "error clearing interrupt: %d\n", err); + goto fail; + } + } + + return 0; + +fail: + kxtj9_device_power_off(tj9); + return err; +} + +static void kxtj9_disable(struct kxtj9_data *tj9) +{ + kxtj9_device_power_off(tj9); +} + +static int kxtj9_input_open(struct input_dev *input) +{ + struct kxtj9_data *tj9 = input_get_drvdata(input); + + return kxtj9_enable(tj9); +} + +static void kxtj9_input_close(struct input_dev *dev) +{ + struct kxtj9_data *tj9 = input_get_drvdata(dev); + + kxtj9_disable(tj9); +} + +static void __devinit kxtj9_init_input_device(struct kxtj9_data *tj9, + struct input_dev *input_dev) +{ + __set_bit(EV_ABS, input_dev->evbit); + input_set_abs_params(input_dev, ABS_X, -G_MAX, G_MAX, FUZZ, FLAT); + input_set_abs_params(input_dev, ABS_Y, -G_MAX, G_MAX, FUZZ, FLAT); + input_set_abs_params(input_dev, ABS_Z, -G_MAX, G_MAX, FUZZ, FLAT); + + input_dev->name = "kxtj9_accel"; + input_dev->id.bustype = BUS_I2C; + input_dev->dev.parent = &tj9->client->dev; +} + +static int __devinit kxtj9_setup_input_device(struct kxtj9_data *tj9) +{ + struct input_dev *input_dev; + int err; + + input_dev = input_allocate_device(); + if (!input_dev) { + dev_err(&tj9->client->dev, "input device allocate failed\n"); + return -ENOMEM; + } + + tj9->input_dev = input_dev; + + input_dev->open = kxtj9_input_open; + input_dev->close = kxtj9_input_close; + input_set_drvdata(input_dev, tj9); + + kxtj9_init_input_device(tj9, input_dev); + + err = input_register_device(tj9->input_dev); + if (err) { + dev_err(&tj9->client->dev, + "unable to register input polled device %s: %d\n", + tj9->input_dev->name, err); + input_free_device(tj9->input_dev); + return err; + } + + return 0; +} + +/* + * When IRQ mode is selected, we need to provide an interface to allow the user + * to change the output data rate of the part. For consistency, we are using + * the set_poll method, which accepts a poll interval in milliseconds, and then + * calls update_odr() while passing this value as an argument. In IRQ mode, the + * data outputs will not be read AT the requested poll interval, rather, the + * lowest ODR that can support the requested interval. The client application + * will be responsible for retrieving data from the input node at the desired + * interval. + */ + +/* Returns currently selected poll interval (in ms) */ +static ssize_t kxtj9_get_poll(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + + return sprintf(buf, "%d\n", tj9->last_poll_interval); +} + +/* Allow users to select a new poll interval (in ms) */ +static ssize_t kxtj9_set_poll(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + struct input_dev *input_dev = tj9->input_dev; + unsigned int interval; + int error; + + error = kstrtouint(buf, 10, &interval); + if (error < 0) + return error; + + /* Lock the device to prevent races with open/close (and itself) */ + mutex_lock(&input_dev->mutex); + + disable_irq(client->irq); + + /* + * Set current interval to the greater of the minimum interval or + * the requested interval + */ + tj9->last_poll_interval = max(interval, tj9->pdata.min_interval); + + kxtj9_update_odr(tj9, tj9->last_poll_interval); + + enable_irq(client->irq); + mutex_unlock(&input_dev->mutex); + + return count; +} + +static DEVICE_ATTR(poll, S_IRUGO|S_IWUSR, kxtj9_get_poll, kxtj9_set_poll); + +static struct attribute *kxtj9_attributes[] = { + &dev_attr_poll.attr, + NULL +}; + +static struct attribute_group kxtj9_attribute_group = { + .attrs = kxtj9_attributes +}; + + +#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE +static void kxtj9_poll(struct input_polled_dev *dev) +{ + struct kxtj9_data *tj9 = dev->private; + unsigned int poll_interval = dev->poll_interval; + + kxtj9_report_acceleration_data(tj9); + + if (poll_interval != tj9->last_poll_interval) { + kxtj9_update_odr(tj9, poll_interval); + tj9->last_poll_interval = poll_interval; + } +} + +static void kxtj9_polled_input_open(struct input_polled_dev *dev) +{ + struct kxtj9_data *tj9 = dev->private; + + kxtj9_enable(tj9); +} + +static void kxtj9_polled_input_close(struct input_polled_dev *dev) +{ + struct kxtj9_data *tj9 = dev->private; + + kxtj9_disable(tj9); +} + +static int __devinit kxtj9_setup_polled_device(struct kxtj9_data *tj9) +{ + int err; + struct input_polled_dev *poll_dev; + poll_dev = input_allocate_polled_device(); + + if (!poll_dev) { + dev_err(&tj9->client->dev, + "Failed to allocate polled device\n"); + return -ENOMEM; + } + + tj9->poll_dev = poll_dev; + tj9->input_dev = poll_dev->input; + + poll_dev->private = tj9; + poll_dev->poll = kxtj9_poll; + poll_dev->open = kxtj9_polled_input_open; + poll_dev->close = kxtj9_polled_input_close; + + kxtj9_init_input_device(tj9, poll_dev->input); + + err = input_register_polled_device(poll_dev); + if (err) { + dev_err(&tj9->client->dev, + "Unable to register polled device, err=%d\n", err); + input_free_polled_device(poll_dev); + return err; + } + + return 0; +} + +static void __devexit kxtj9_teardown_polled_device(struct kxtj9_data *tj9) +{ + input_unregister_polled_device(tj9->poll_dev); + input_free_polled_device(tj9->poll_dev); +} + +#else + +static inline int kxtj9_setup_polled_device(struct kxtj9_data *tj9) +{ + return -ENOSYS; +} + +static inline void kxtj9_teardown_polled_device(struct kxtj9_data *tj9) +{ +} + +#endif + +static int __devinit kxtj9_verify(struct kxtj9_data *tj9) +{ + int retval; + + retval = kxtj9_device_power_on(tj9); + if (retval < 0) + return retval; + + retval = i2c_smbus_read_byte_data(tj9->client, WHO_AM_I); + if (retval < 0) { + dev_err(&tj9->client->dev, "read err int source\n"); + goto out; + } + + retval = retval != 0x06 ? -EIO : 0; + +out: + kxtj9_device_power_off(tj9); + return retval; +} + +static int __devinit kxtj9_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + const struct kxtj9_platform_data *pdata = client->dev.platform_data; + struct kxtj9_data *tj9; + int err; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_I2C | I2C_FUNC_SMBUS_BYTE_DATA)) { + dev_err(&client->dev, "client is not i2c capable\n"); + return -ENXIO; + } + + if (!pdata) { + dev_err(&client->dev, "platform data is NULL; exiting\n"); + return -EINVAL; + } + + tj9 = kzalloc(sizeof(*tj9), GFP_KERNEL); + if (!tj9) { + dev_err(&client->dev, + "failed to allocate memory for module data\n"); + return -ENOMEM; + } + + tj9->client = client; + tj9->pdata = *pdata; + + if (pdata->init) { + err = pdata->init(); + if (err < 0) + goto err_free_mem; + } + + err = kxtj9_verify(tj9); + if (err < 0) { + dev_err(&client->dev, "device not recognized\n"); + goto err_pdata_exit; + } + + i2c_set_clientdata(client, tj9); + + tj9->ctrl_reg1 = tj9->pdata.res_12bit | tj9->pdata.g_range; + tj9->data_ctrl = tj9->pdata.data_odr_init; + + if (client->irq) { + /* If in irq mode, populate INT_CTRL_REG1 and enable DRDY. */ + tj9->int_ctrl |= KXTJ9_IEN | KXTJ9_IEA | KXTJ9_IEL; + tj9->ctrl_reg1 |= DRDYE; + + err = kxtj9_setup_input_device(tj9); + if (err) + goto err_pdata_exit; + + err = request_threaded_irq(client->irq, NULL, kxtj9_isr, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + "kxtj9-irq", tj9); + if (err) { + dev_err(&client->dev, "request irq failed: %d\n", err); + goto err_destroy_input; + } + + err = sysfs_create_group(&client->dev.kobj, &kxtj9_attribute_group); + if (err) { + dev_err(&client->dev, "sysfs create failed: %d\n", err); + goto err_free_irq; + } + + } else { + err = kxtj9_setup_polled_device(tj9); + if (err) + goto err_pdata_exit; + } + + return 0; + +err_free_irq: + free_irq(client->irq, tj9); +err_destroy_input: + input_unregister_device(tj9->input_dev); +err_pdata_exit: + if (tj9->pdata.exit) + tj9->pdata.exit(); +err_free_mem: + kfree(tj9); + return err; +} + +static int __devexit kxtj9_remove(struct i2c_client *client) +{ + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + + if (client->irq) { + sysfs_remove_group(&client->dev.kobj, &kxtj9_attribute_group); + free_irq(client->irq, tj9); + input_unregister_device(tj9->input_dev); + } else { + kxtj9_teardown_polled_device(tj9); + } + + if (tj9->pdata.exit) + tj9->pdata.exit(); + + kfree(tj9); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int kxtj9_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + struct input_dev *input_dev = tj9->input_dev; + + mutex_lock(&input_dev->mutex); + + if (input_dev->users) + kxtj9_disable(tj9); + + mutex_unlock(&input_dev->mutex); + return 0; +} + +static int kxtj9_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + struct input_dev *input_dev = tj9->input_dev; + int retval = 0; + + mutex_lock(&input_dev->mutex); + + if (input_dev->users) + kxtj9_enable(tj9); + + mutex_unlock(&input_dev->mutex); + return retval; +} +#endif + +static SIMPLE_DEV_PM_OPS(kxtj9_pm_ops, kxtj9_suspend, kxtj9_resume); + +static const struct i2c_device_id kxtj9_id[] = { + { NAME, 0 }, + { }, +}; + +MODULE_DEVICE_TABLE(i2c, kxtj9_id); + +static struct i2c_driver kxtj9_driver = { + .driver = { + .name = NAME, + .owner = THIS_MODULE, + .pm = &kxtj9_pm_ops, + }, + .probe = kxtj9_probe, + .remove = __devexit_p(kxtj9_remove), + .id_table = kxtj9_id, +}; + +static int __init kxtj9_init(void) +{ + return i2c_add_driver(&kxtj9_driver); +} +module_init(kxtj9_init); + +static void __exit kxtj9_exit(void) +{ + i2c_del_driver(&kxtj9_driver); +} +module_exit(kxtj9_exit); + +MODULE_DESCRIPTION("KXTJ9 accelerometer driver"); +MODULE_AUTHOR("Chris Hudson <chudson@kionix.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c new file mode 100644 index 000000000000..20f8f9284f02 --- /dev/null +++ b/drivers/input/misc/mma8450.c @@ -0,0 +1,256 @@ +/* + * Driver for Freescale's 3-Axis Accelerometer MMA8450 + * + * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/input-polldev.h> + +#define MMA8450_DRV_NAME "mma8450" + +#define MODE_CHANGE_DELAY_MS 100 +#define POLL_INTERVAL 100 +#define POLL_INTERVAL_MAX 500 + +/* register definitions */ +#define MMA8450_STATUS 0x00 +#define MMA8450_STATUS_ZXYDR 0x08 + +#define MMA8450_OUT_X8 0x01 +#define MMA8450_OUT_Y8 0x02 +#define MMA8450_OUT_Z8 0x03 + +#define MMA8450_OUT_X_LSB 0x05 +#define MMA8450_OUT_X_MSB 0x06 +#define MMA8450_OUT_Y_LSB 0x07 +#define MMA8450_OUT_Y_MSB 0x08 +#define MMA8450_OUT_Z_LSB 0x09 +#define MMA8450_OUT_Z_MSB 0x0a + +#define MMA8450_XYZ_DATA_CFG 0x16 + +#define MMA8450_CTRL_REG1 0x38 +#define MMA8450_CTRL_REG2 0x39 + +/* mma8450 status */ +struct mma8450 { + struct i2c_client *client; + struct input_polled_dev *idev; +}; + +static int mma8450_read(struct mma8450 *m, unsigned off) +{ + struct i2c_client *c = m->client; + int ret; + + ret = i2c_smbus_read_byte_data(c, off); + if (ret < 0) + dev_err(&c->dev, + "failed to read register 0x%02x, error %d\n", + off, ret); + + return ret; +} + +static int mma8450_write(struct mma8450 *m, unsigned off, u8 v) +{ + struct i2c_client *c = m->client; + int error; + + error = i2c_smbus_write_byte_data(c, off, v); + if (error < 0) { + dev_err(&c->dev, + "failed to write to register 0x%02x, error %d\n", + off, error); + return error; + } + + return 0; +} + +static int mma8450_read_xyz(struct mma8450 *m, int *x, int *y, int *z) +{ + struct i2c_client *c = m->client; + u8 buff[6]; + int err; + + err = i2c_smbus_read_i2c_block_data(c, MMA8450_OUT_X_LSB, 6, buff); + if (err < 0) { + dev_err(&c->dev, + "failed to read block data at 0x%02x, error %d\n", + MMA8450_OUT_X_LSB, err); + return err; + } + + *x = ((buff[1] << 4) & 0xff0) | (buff[0] & 0xf); + *y = ((buff[3] << 4) & 0xff0) | (buff[2] & 0xf); + *z = ((buff[5] << 4) & 0xff0) | (buff[4] & 0xf); + + return 0; +} + +static void mma8450_poll(struct input_polled_dev *dev) +{ + struct mma8450 *m = dev->private; + int x, y, z; + int ret; + int err; + + ret = mma8450_read(m, MMA8450_STATUS); + if (ret < 0) + return; + + if (!(ret & MMA8450_STATUS_ZXYDR)) + return; + + err = mma8450_read_xyz(m, &x, &y, &z); + if (err) + return; + + input_report_abs(dev->input, ABS_X, x); + input_report_abs(dev->input, ABS_Y, y); + input_report_abs(dev->input, ABS_Z, z); + input_sync(dev->input); +} + +/* Initialize the MMA8450 chip */ +static void mma8450_open(struct input_polled_dev *dev) +{ + struct mma8450 *m = dev->private; + int err; + + /* enable all events from X/Y/Z, no FIFO */ + err = mma8450_write(m, MMA8450_XYZ_DATA_CFG, 0x07); + if (err) + return; + + /* + * Sleep mode poll rate - 50Hz + * System output data rate - 400Hz + * Full scale selection - Active, +/- 2G + */ + err = mma8450_write(m, MMA8450_CTRL_REG1, 0x01); + if (err < 0) + return; + + msleep(MODE_CHANGE_DELAY_MS); +} + +static void mma8450_close(struct input_polled_dev *dev) +{ + struct mma8450 *m = dev->private; + + mma8450_write(m, MMA8450_CTRL_REG1, 0x00); + mma8450_write(m, MMA8450_CTRL_REG2, 0x01); +} + +/* + * I2C init/probing/exit functions + */ +static int __devinit mma8450_probe(struct i2c_client *c, + const struct i2c_device_id *id) +{ + struct input_polled_dev *idev; + struct mma8450 *m; + int err; + + m = kzalloc(sizeof(struct mma8450), GFP_KERNEL); + idev = input_allocate_polled_device(); + if (!m || !idev) { + err = -ENOMEM; + goto err_free_mem; + } + + m->client = c; + m->idev = idev; + + idev->private = m; + idev->input->name = MMA8450_DRV_NAME; + idev->input->id.bustype = BUS_I2C; + idev->poll = mma8450_poll; + idev->poll_interval = POLL_INTERVAL; + idev->poll_interval_max = POLL_INTERVAL_MAX; + idev->open = mma8450_open; + idev->close = mma8450_close; + + __set_bit(EV_ABS, idev->input->evbit); + input_set_abs_params(idev->input, ABS_X, -2048, 2047, 32, 32); + input_set_abs_params(idev->input, ABS_Y, -2048, 2047, 32, 32); + input_set_abs_params(idev->input, ABS_Z, -2048, 2047, 32, 32); + + err = input_register_polled_device(idev); + if (err) { + dev_err(&c->dev, "failed to register polled input device\n"); + goto err_free_mem; + } + + return 0; + +err_free_mem: + input_free_polled_device(idev); + kfree(m); + return err; +} + +static int __devexit mma8450_remove(struct i2c_client *c) +{ + struct mma8450 *m = i2c_get_clientdata(c); + struct input_polled_dev *idev = m->idev; + + input_unregister_polled_device(idev); + input_free_polled_device(idev); + kfree(m); + + return 0; +} + +static const struct i2c_device_id mma8450_id[] = { + { MMA8450_DRV_NAME, 0 }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, mma8450_id); + +static struct i2c_driver mma8450_driver = { + .driver = { + .name = MMA8450_DRV_NAME, + .owner = THIS_MODULE, + }, + .probe = mma8450_probe, + .remove = __devexit_p(mma8450_remove), + .id_table = mma8450_id, +}; + +static int __init mma8450_init(void) +{ + return i2c_add_driver(&mma8450_driver); +} +module_init(mma8450_init); + +static void __exit mma8450_exit(void) +{ + i2c_del_driver(&mma8450_driver); +} +module_exit(mma8450_exit); + +MODULE_AUTHOR("Freescale Semiconductor, Inc."); +MODULE_DESCRIPTION("MMA8450 3-Axis Accelerometer Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c new file mode 100644 index 000000000000..b95fac15b2ea --- /dev/null +++ b/drivers/input/misc/mpu3050.c @@ -0,0 +1,376 @@ +/* + * MPU3050 Tri-axis gyroscope driver + * + * Copyright (C) 2011 Wistron Co.Ltd + * Joseph Lai <joseph_lai@wistron.com> + * + * Trimmed down by Alan Cox <alan@linux.intel.com> to produce this version + * + * This is a 'lite' version of the driver, while we consider the right way + * to present the other features to user space. In particular it requires the + * device has an IRQ, and it only provides an input interface, so is not much + * use for device orientation. A fuller version is available from the Meego + * tree. + * + * This program is based on bma023.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/mutex.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/pm_runtime.h> + +#define MPU3050_CHIP_ID_REG 0x00 +#define MPU3050_CHIP_ID 0x69 +#define MPU3050_XOUT_H 0x1D +#define MPU3050_PWR_MGM 0x3E +#define MPU3050_PWR_MGM_POS 6 +#define MPU3050_PWR_MGM_MASK 0x40 + +#define MPU3050_AUTO_DELAY 1000 + +#define MPU3050_MIN_VALUE -32768 +#define MPU3050_MAX_VALUE 32767 + +struct axis_data { + s16 x; + s16 y; + s16 z; +}; + +struct mpu3050_sensor { + struct i2c_client *client; + struct device *dev; + struct input_dev *idev; +}; + +/** + * mpu3050_xyz_read_reg - read the axes values + * @buffer: provide register addr and get register + * @length: length of register + * + * Reads the register values in one transaction or returns a negative + * error code on failure. + */ +static int mpu3050_xyz_read_reg(struct i2c_client *client, + u8 *buffer, int length) +{ + /* + * Annoying we can't make this const because the i2c layer doesn't + * declare input buffers const. + */ + char cmd = MPU3050_XOUT_H; + struct i2c_msg msg[] = { + { + .addr = client->addr, + .flags = 0, + .len = 1, + .buf = &cmd, + }, + { + .addr = client->addr, + .flags = I2C_M_RD, + .len = length, + .buf = buffer, + }, + }; + + return i2c_transfer(client->adapter, msg, 2); +} + +/** + * mpu3050_read_xyz - get co-ordinates from device + * @client: i2c address of sensor + * @coords: co-ordinates to update + * + * Return the converted X Y and Z co-ordinates from the sensor device + */ +static void mpu3050_read_xyz(struct i2c_client *client, + struct axis_data *coords) +{ + u16 buffer[3]; + + mpu3050_xyz_read_reg(client, (u8 *)buffer, 6); + coords->x = be16_to_cpu(buffer[0]); + coords->y = be16_to_cpu(buffer[1]); + coords->z = be16_to_cpu(buffer[2]); + dev_dbg(&client->dev, "%s: x %d, y %d, z %d\n", __func__, + coords->x, coords->y, coords->z); +} + +/** + * mpu3050_set_power_mode - set the power mode + * @client: i2c client for the sensor + * @val: value to switch on/off of power, 1: normal power, 0: low power + * + * Put device to normal-power mode or low-power mode. + */ +static void mpu3050_set_power_mode(struct i2c_client *client, u8 val) +{ + u8 value; + + value = i2c_smbus_read_byte_data(client, MPU3050_PWR_MGM); + value = (value & ~MPU3050_PWR_MGM_MASK) | + (((val << MPU3050_PWR_MGM_POS) & MPU3050_PWR_MGM_MASK) ^ + MPU3050_PWR_MGM_MASK); + i2c_smbus_write_byte_data(client, MPU3050_PWR_MGM, value); +} + +/** + * mpu3050_input_open - called on input event open + * @input: input dev of opened device + * + * The input layer calls this function when input event is opened. The + * function will push the device to resume. Then, the device is ready + * to provide data. + */ +static int mpu3050_input_open(struct input_dev *input) +{ + struct mpu3050_sensor *sensor = input_get_drvdata(input); + + pm_runtime_get(sensor->dev); + + return 0; +} + +/** + * mpu3050_input_close - called on input event close + * @input: input dev of closed device + * + * The input layer calls this function when input event is closed. The + * function will push the device to suspend. + */ +static void mpu3050_input_close(struct input_dev *input) +{ + struct mpu3050_sensor *sensor = input_get_drvdata(input); + + pm_runtime_put(sensor->dev); +} + +/** + * mpu3050_interrupt_thread - handle an IRQ + * @irq: interrupt numner + * @data: the sensor + * + * Called by the kernel single threaded after an interrupt occurs. Read + * the sensor data and generate an input event for it. + */ +static irqreturn_t mpu3050_interrupt_thread(int irq, void *data) +{ + struct mpu3050_sensor *sensor = data; + struct axis_data axis; + + mpu3050_read_xyz(sensor->client, &axis); + + input_report_abs(sensor->idev, ABS_X, axis.x); + input_report_abs(sensor->idev, ABS_Y, axis.y); + input_report_abs(sensor->idev, ABS_Z, axis.z); + input_sync(sensor->idev); + + return IRQ_HANDLED; +} + +/** + * mpu3050_probe - device detection callback + * @client: i2c client of found device + * @id: id match information + * + * The I2C layer calls us when it believes a sensor is present at this + * address. Probe to see if this is correct and to validate the device. + * + * If present install the relevant sysfs interfaces and input device. + */ +static int __devinit mpu3050_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct mpu3050_sensor *sensor; + struct input_dev *idev; + int ret; + int error; + + sensor = kzalloc(sizeof(struct mpu3050_sensor), GFP_KERNEL); + idev = input_allocate_device(); + if (!sensor || !idev) { + dev_err(&client->dev, "failed to allocate driver data\n"); + error = -ENOMEM; + goto err_free_mem; + } + + sensor->client = client; + sensor->dev = &client->dev; + sensor->idev = idev; + + mpu3050_set_power_mode(client, 1); + msleep(10); + + ret = i2c_smbus_read_byte_data(client, MPU3050_CHIP_ID_REG); + if (ret < 0) { + dev_err(&client->dev, "failed to detect device\n"); + error = -ENXIO; + goto err_free_mem; + } + + if (ret != MPU3050_CHIP_ID) { + dev_err(&client->dev, "unsupported chip id\n"); + error = -ENXIO; + goto err_free_mem; + } + + idev->name = "MPU3050"; + idev->id.bustype = BUS_I2C; + idev->dev.parent = &client->dev; + + idev->open = mpu3050_input_open; + idev->close = mpu3050_input_close; + + __set_bit(EV_ABS, idev->evbit); + input_set_abs_params(idev, ABS_X, + MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0); + input_set_abs_params(idev, ABS_Y, + MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0); + input_set_abs_params(idev, ABS_Z, + MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0); + + input_set_drvdata(idev, sensor); + + pm_runtime_set_active(&client->dev); + + error = request_threaded_irq(client->irq, + NULL, mpu3050_interrupt_thread, + IRQF_TRIGGER_RISING, + "mpu_int", sensor); + if (error) { + dev_err(&client->dev, + "can't get IRQ %d, error %d\n", client->irq, error); + goto err_pm_set_suspended; + } + + error = input_register_device(idev); + if (error) { + dev_err(&client->dev, "failed to register input device\n"); + goto err_free_irq; + } + + pm_runtime_enable(&client->dev); + pm_runtime_set_autosuspend_delay(&client->dev, MPU3050_AUTO_DELAY); + + return 0; + +err_free_irq: + free_irq(client->irq, sensor); +err_pm_set_suspended: + pm_runtime_set_suspended(&client->dev); +err_free_mem: + input_unregister_device(idev); + kfree(sensor); + return error; +} + +/** + * mpu3050_remove - remove a sensor + * @client: i2c client of sensor being removed + * + * Our sensor is going away, clean up the resources. + */ +static int __devexit mpu3050_remove(struct i2c_client *client) +{ + struct mpu3050_sensor *sensor = i2c_get_clientdata(client); + + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); + + free_irq(client->irq, sensor); + input_unregister_device(sensor->idev); + kfree(sensor); + + return 0; +} + +#ifdef CONFIG_PM +/** + * mpu3050_suspend - called on device suspend + * @dev: device being suspended + * + * Put the device into sleep mode before we suspend the machine. + */ +static int mpu3050_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + mpu3050_set_power_mode(client, 0); + + return 0; +} + +/** + * mpu3050_resume - called on device resume + * @dev: device being resumed + * + * Put the device into powered mode on resume. + */ +static int mpu3050_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + mpu3050_set_power_mode(client, 1); + msleep(100); /* wait for gyro chip resume */ + + return 0; +} +#endif + +static UNIVERSAL_DEV_PM_OPS(mpu3050_pm, mpu3050_suspend, mpu3050_resume, NULL); + +static const struct i2c_device_id mpu3050_ids[] = { + { "mpu3050", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, mpu3050_ids); + +static struct i2c_driver mpu3050_i2c_driver = { + .driver = { + .name = "mpu3050", + .owner = THIS_MODULE, + .pm = &mpu3050_pm, + }, + .probe = mpu3050_probe, + .remove = __devexit_p(mpu3050_remove), + .id_table = mpu3050_ids, +}; + +static int __init mpu3050_init(void) +{ + return i2c_add_driver(&mpu3050_i2c_driver); +} +module_init(mpu3050_init); + +static void __exit mpu3050_exit(void) +{ + i2c_del_driver(&mpu3050_i2c_driver); +} +module_exit(mpu3050_exit); + +MODULE_AUTHOR("Wistron Corp."); +MODULE_DESCRIPTION("MPU3050 Tri-axis gyroscope driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 62bae99424e6..ad2e51c04db8 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -373,7 +373,7 @@ static struct xenbus_driver xenkbd_driver = { static int __init xenkbd_init(void) { - if (!xen_pv_domain()) + if (!xen_domain()) return -ENODEV; /* Nothing to do if running in dom0. */ diff --git a/drivers/input/mouse/gpio_mouse.c b/drivers/input/mouse/gpio_mouse.c index 7b6ce178f1b6..58902fbb9896 100644 --- a/drivers/input/mouse/gpio_mouse.c +++ b/drivers/input/mouse/gpio_mouse.c @@ -191,7 +191,7 @@ static void __exit gpio_mouse_exit(void) } module_exit(gpio_mouse_exit); -MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>"); +MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>"); MODULE_DESCRIPTION("GPIO mouse driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:gpio_mouse"); /* work with hotplug and coldplug */ diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c index c31ad11df6bb..83bcaba96b89 100644 --- a/drivers/input/mouse/lifebook.c +++ b/drivers/input/mouse/lifebook.c @@ -33,7 +33,7 @@ static const char *desired_serio_phys; static int lifebook_limit_serio3(const struct dmi_system_id *d) { desired_serio_phys = "isa0060/serio3"; - return 0; + return 1; } static bool lifebook_use_6byte_proto; @@ -41,7 +41,7 @@ static bool lifebook_use_6byte_proto; static int lifebook_set_6byte_proto(const struct dmi_system_id *d) { lifebook_use_6byte_proto = true; - return 0; + return 1; } static const struct dmi_system_id __initconst lifebook_dmi_table[] = { diff --git a/drivers/input/mouse/pxa930_trkball.c b/drivers/input/mouse/pxa930_trkball.c index 943cfec15665..6c5d84fcdea1 100644 --- a/drivers/input/mouse/pxa930_trkball.c +++ b/drivers/input/mouse/pxa930_trkball.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/input.h> -#include <linux/version.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/platform_device.h> diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c index 1242775fee19..2fc887a51066 100644 --- a/drivers/input/mouse/sentelic.c +++ b/drivers/input/mouse/sentelic.c @@ -20,7 +20,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/input.h> #include <linux/ctype.h> #include <linux/libps2.h> diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index e06e045bf907..5538fc657af1 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -207,27 +207,37 @@ static int synaptics_identify(struct psmouse *psmouse) static int synaptics_resolution(struct psmouse *psmouse) { struct synaptics_data *priv = psmouse->private; - unsigned char res[3]; - unsigned char max[3]; + unsigned char resp[3]; if (SYN_ID_MAJOR(priv->identity) < 4) return 0; - if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, res) == 0) { - if (res[0] != 0 && (res[1] & 0x80) && res[2] != 0) { - priv->x_res = res[0]; /* x resolution in units/mm */ - priv->y_res = res[2]; /* y resolution in units/mm */ + if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, resp) == 0) { + if (resp[0] != 0 && (resp[1] & 0x80) && resp[2] != 0) { + priv->x_res = resp[0]; /* x resolution in units/mm */ + priv->y_res = resp[2]; /* y resolution in units/mm */ } } if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 5 && SYN_CAP_MAX_DIMENSIONS(priv->ext_cap_0c)) { - if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_DIMENSIONS, max)) { - printk(KERN_ERR "Synaptics claims to have dimensions query," - " but I'm not able to read it.\n"); + if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MAX_COORDS, resp)) { + printk(KERN_ERR "Synaptics claims to have max coordinates" + " query, but I'm not able to read it.\n"); + } else { + priv->x_max = (resp[0] << 5) | ((resp[1] & 0x0f) << 1); + priv->y_max = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3); + } + } + + if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 7 && + SYN_CAP_MIN_DIMENSIONS(priv->ext_cap_0c)) { + if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MIN_COORDS, resp)) { + printk(KERN_ERR "Synaptics claims to have min coordinates" + " query, but I'm not able to read it.\n"); } else { - priv->x_max = (max[0] << 5) | ((max[1] & 0x0f) << 1); - priv->y_max = (max[2] << 5) | ((max[1] & 0xf0) >> 3); + priv->x_min = (resp[0] << 5) | ((resp[1] & 0x0f) << 1); + priv->y_min = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3); } } @@ -406,26 +416,10 @@ static int synaptics_parse_hw_state(const unsigned char buf[], memset(hw, 0, sizeof(struct synaptics_hw_state)); if (SYN_MODEL_NEWABS(priv->model_id)) { - hw->x = (((buf[3] & 0x10) << 8) | - ((buf[1] & 0x0f) << 8) | - buf[4]); - hw->y = (((buf[3] & 0x20) << 7) | - ((buf[1] & 0xf0) << 4) | - buf[5]); - - hw->z = buf[2]; hw->w = (((buf[0] & 0x30) >> 2) | ((buf[0] & 0x04) >> 1) | ((buf[3] & 0x04) >> 2)); - if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) { - /* Gesture packet: (x, y, z) at half resolution */ - priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1; - priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1; - priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1; - return 1; - } - hw->left = (buf[0] & 0x01) ? 1 : 0; hw->right = (buf[0] & 0x02) ? 1 : 0; @@ -448,6 +442,22 @@ static int synaptics_parse_hw_state(const unsigned char buf[], hw->down = ((buf[0] ^ buf[3]) & 0x02) ? 1 : 0; } + if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) { + /* Gesture packet: (x, y, z) at half resolution */ + priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1; + priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1; + priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1; + return 1; + } + + hw->x = (((buf[3] & 0x10) << 8) | + ((buf[1] & 0x0f) << 8) | + buf[4]); + hw->y = (((buf[3] & 0x20) << 7) | + ((buf[1] & 0xf0) << 4) | + buf[5]); + hw->z = buf[2]; + if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) && ((buf[0] ^ buf[3]) & 0x02)) { switch (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) & ~0x01) { @@ -485,7 +495,8 @@ static int synaptics_parse_hw_state(const unsigned char buf[], return 0; } -static void set_slot(struct input_dev *dev, int slot, bool active, int x, int y) +static void synaptics_report_semi_mt_slot(struct input_dev *dev, int slot, + bool active, int x, int y) { input_mt_slot(dev, slot); input_mt_report_slot_state(dev, MT_TOOL_FINGER, active); @@ -502,14 +513,16 @@ static void synaptics_report_semi_mt_data(struct input_dev *dev, int num_fingers) { if (num_fingers >= 2) { - set_slot(dev, 0, true, min(a->x, b->x), min(a->y, b->y)); - set_slot(dev, 1, true, max(a->x, b->x), max(a->y, b->y)); + synaptics_report_semi_mt_slot(dev, 0, true, min(a->x, b->x), + min(a->y, b->y)); + synaptics_report_semi_mt_slot(dev, 1, true, max(a->x, b->x), + max(a->y, b->y)); } else if (num_fingers == 1) { - set_slot(dev, 0, true, a->x, a->y); - set_slot(dev, 1, false, 0, 0); + synaptics_report_semi_mt_slot(dev, 0, true, a->x, a->y); + synaptics_report_semi_mt_slot(dev, 1, false, 0, 0); } else { - set_slot(dev, 0, false, 0, 0); - set_slot(dev, 1, false, 0, 0); + synaptics_report_semi_mt_slot(dev, 0, false, 0, 0); + synaptics_report_semi_mt_slot(dev, 1, false, 0, 0); } } @@ -684,23 +697,36 @@ static psmouse_ret_t synaptics_process_byte(struct psmouse *psmouse) static void set_input_params(struct input_dev *dev, struct synaptics_data *priv) { int i; + int fuzz = SYN_CAP_REDUCED_FILTERING(priv->ext_cap_0c) ? + SYN_REDUCED_FILTER_FUZZ : 0; __set_bit(INPUT_PROP_POINTER, dev->propbit); __set_bit(EV_ABS, dev->evbit); input_set_abs_params(dev, ABS_X, - XMIN_NOMINAL, priv->x_max ?: XMAX_NOMINAL, 0, 0); + priv->x_min ?: XMIN_NOMINAL, + priv->x_max ?: XMAX_NOMINAL, + fuzz, 0); input_set_abs_params(dev, ABS_Y, - YMIN_NOMINAL, priv->y_max ?: YMAX_NOMINAL, 0, 0); + priv->y_min ?: YMIN_NOMINAL, + priv->y_max ?: YMAX_NOMINAL, + fuzz, 0); input_set_abs_params(dev, ABS_PRESSURE, 0, 255, 0, 0); if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c)) { __set_bit(INPUT_PROP_SEMI_MT, dev->propbit); input_mt_init_slots(dev, 2); - input_set_abs_params(dev, ABS_MT_POSITION_X, XMIN_NOMINAL, - priv->x_max ?: XMAX_NOMINAL, 0, 0); - input_set_abs_params(dev, ABS_MT_POSITION_Y, YMIN_NOMINAL, - priv->y_max ?: YMAX_NOMINAL, 0, 0); + input_set_abs_params(dev, ABS_MT_POSITION_X, + priv->x_min ?: XMIN_NOMINAL, + priv->x_max ?: XMAX_NOMINAL, + fuzz, 0); + input_set_abs_params(dev, ABS_MT_POSITION_Y, + priv->y_min ?: YMIN_NOMINAL, + priv->y_max ?: YMAX_NOMINAL, + fuzz, 0); + + input_abs_set_res(dev, ABS_MT_POSITION_X, priv->x_res); + input_abs_set_res(dev, ABS_MT_POSITION_Y, priv->y_res); } if (SYN_CAP_PALMDETECT(priv->capabilities)) @@ -971,4 +997,3 @@ bool synaptics_supported(void) } #endif /* CONFIG_MOUSE_PS2_SYNAPTICS */ - diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h index 7453938bf5ef..ca040aa80fa7 100644 --- a/drivers/input/mouse/synaptics.h +++ b/drivers/input/mouse/synaptics.h @@ -19,7 +19,8 @@ #define SYN_QUE_RESOLUTION 0x08 #define SYN_QUE_EXT_CAPAB 0x09 #define SYN_QUE_EXT_CAPAB_0C 0x0c -#define SYN_QUE_EXT_DIMENSIONS 0x0d +#define SYN_QUE_EXT_MAX_COORDS 0x0d +#define SYN_QUE_EXT_MIN_COORDS 0x0f /* synatics modes */ #define SYN_BIT_ABSOLUTE_MODE (1 << 7) @@ -66,18 +67,21 @@ * 1 0x60 multifinger mode identifies firmware finger counting * (not reporting!) algorithm. * Not particularly meaningful - * 1 0x80 covered pad W clipped to 14, 15 == pad mostly covered - * 2 0x01 clickpad bit 1 2-button ClickPad - * 2 0x02 deluxe LED controls touchpad support LED commands + * 1 0x80 covered pad W clipped to 14, 15 == pad mostly covered + * 2 0x01 clickpad bit 1 2-button ClickPad + * 2 0x02 deluxe LED controls touchpad support LED commands * ala multimedia control bar * 2 0x04 reduced filtering firmware does less filtering on * position data, driver should watch * for noise. + * 2 0x20 report min query 0x0f gives min coord reported */ #define SYN_CAP_CLICKPAD(ex0c) ((ex0c) & 0x100000) /* 1-button ClickPad */ #define SYN_CAP_CLICKPAD2BTN(ex0c) ((ex0c) & 0x000100) /* 2-button ClickPad */ #define SYN_CAP_MAX_DIMENSIONS(ex0c) ((ex0c) & 0x020000) +#define SYN_CAP_MIN_DIMENSIONS(ex0c) ((ex0c) & 0x002000) #define SYN_CAP_ADV_GESTURE(ex0c) ((ex0c) & 0x080000) +#define SYN_CAP_REDUCED_FILTERING(ex0c) ((ex0c) & 0x000400) /* synaptics modes query bits */ #define SYN_MODE_ABSOLUTE(m) ((m) & (1 << 7)) @@ -104,6 +108,9 @@ #define SYN_NEWABS_RELAXED 2 #define SYN_OLDABS 3 +/* amount to fuzz position data when touchpad reports reduced filtering */ +#define SYN_REDUCED_FILTER_FUZZ 8 + /* * A structure to describe the state of the touchpad hardware (buttons and pad) */ @@ -130,7 +137,8 @@ struct synaptics_data { unsigned long int ext_cap_0c; /* Ext Caps from 0x0c query */ unsigned long int identity; /* Identification */ unsigned int x_res, y_res; /* X/Y resolution in units/mm */ - unsigned int x_max, y_max; /* Max dimensions (from FW) */ + unsigned int x_max, y_max; /* Max coordinates (from FW) */ + unsigned int x_min, y_min; /* Min coordinates (from FW) */ unsigned char pkt_type; /* packet type - old, new, etc */ unsigned char mode; /* current mode byte */ diff --git a/drivers/input/serio/at32psif.c b/drivers/input/serio/at32psif.c index 6ee8f0ddad51..95280f9207e1 100644 --- a/drivers/input/serio/at32psif.c +++ b/drivers/input/serio/at32psif.c @@ -372,6 +372,6 @@ static void __exit psif_exit(void) module_init(psif_init); module_exit(psif_exit); -MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>"); +MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>"); MODULE_DESCRIPTION("Atmel AVR32 PSIF PS/2 driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 42206205e4f5..979c443bf1ef 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -795,7 +795,7 @@ int hp_sdc_release_cooked_irq(hp_sdc_irqhook *callback) /************************* Keepalive timer task *********************/ -void hp_sdc_kicker (unsigned long data) +static void hp_sdc_kicker(unsigned long data) { tasklet_schedule(&hp_sdc.task); /* Re-insert the periodic task. */ diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index 0a619c558bfb..6d89fd1842c3 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -225,7 +225,6 @@ /* toolMode codes */ #define AIPTEK_TOOL_BUTTON_PEN_MODE BTN_TOOL_PEN -#define AIPTEK_TOOL_BUTTON_PEN_MODE BTN_TOOL_PEN #define AIPTEK_TOOL_BUTTON_PENCIL_MODE BTN_TOOL_PENCIL #define AIPTEK_TOOL_BUTTON_BRUSH_MODE BTN_TOOL_BRUSH #define AIPTEK_TOOL_BUTTON_AIRBRUSH_MODE BTN_TOOL_AIRBRUSH diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 08ba5ad9c9be..03ebcc8b24b5 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -15,6 +15,7 @@ #include "wacom_wac.h" #include "wacom.h" #include <linux/input/mt.h> +#include <linux/hid.h> /* resolution for penabled devices */ #define WACOM_PL_RES 20 @@ -264,6 +265,7 @@ static int wacom_graphire_irq(struct wacom_wac *wacom) wacom->id[0] = 0; input_report_abs(input, ABS_MISC, wacom->id[0]); /* report tool id */ input_report_key(input, wacom->tool[0], prox); + input_event(input, EV_MSC, MSC_SERIAL, 1); input_sync(input); /* sync last event */ } @@ -273,11 +275,10 @@ static int wacom_graphire_irq(struct wacom_wac *wacom) prox = data[7] & 0xf8; if (prox || wacom->id[1]) { wacom->id[1] = PAD_DEVICE_ID; - input_report_key(input, BTN_0, (data[7] & 0x40)); - input_report_key(input, BTN_4, (data[7] & 0x80)); + input_report_key(input, BTN_BACK, (data[7] & 0x40)); + input_report_key(input, BTN_FORWARD, (data[7] & 0x80)); rw = ((data[7] & 0x18) >> 3) - ((data[7] & 0x20) >> 3); input_report_rel(input, REL_WHEEL, rw); - input_report_key(input, BTN_TOOL_FINGER, 0xf0); if (!prox) wacom->id[1] = 0; input_report_abs(input, ABS_MISC, wacom->id[1]); @@ -290,18 +291,17 @@ static int wacom_graphire_irq(struct wacom_wac *wacom) prox = (data[7] & 0xf8) || data[8]; if (prox || wacom->id[1]) { wacom->id[1] = PAD_DEVICE_ID; - input_report_key(input, BTN_0, (data[7] & 0x08)); - input_report_key(input, BTN_1, (data[7] & 0x20)); - input_report_key(input, BTN_4, (data[7] & 0x10)); - input_report_key(input, BTN_5, (data[7] & 0x40)); + input_report_key(input, BTN_BACK, (data[7] & 0x08)); + input_report_key(input, BTN_LEFT, (data[7] & 0x20)); + input_report_key(input, BTN_FORWARD, (data[7] & 0x10)); + input_report_key(input, BTN_RIGHT, (data[7] & 0x40)); input_report_abs(input, ABS_WHEEL, (data[8] & 0x7f)); - input_report_key(input, BTN_TOOL_FINGER, 0xf0); if (!prox) wacom->id[1] = 0; input_report_abs(input, ABS_MISC, wacom->id[1]); input_event(input, EV_MSC, MSC_SERIAL, 0xf0); + retval = 1; } - retval = 1; break; } exit: @@ -494,10 +494,6 @@ static int wacom_intuos_irq(struct wacom_wac *wacom) /* pad packets. Works as a second tool and is always in prox */ if (data[0] == WACOM_REPORT_INTUOSPAD) { - /* initiate the pad as a device */ - if (wacom->tool[1] != BTN_TOOL_FINGER) - wacom->tool[1] = BTN_TOOL_FINGER; - if (features->type >= INTUOS4S && features->type <= INTUOS4L) { input_report_key(input, BTN_0, (data[2] & 0x01)); input_report_key(input, BTN_1, (data[3] & 0x01)); @@ -1080,18 +1076,14 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, switch (wacom_wac->features.type) { case WACOM_MO: - __set_bit(BTN_1, input_dev->keybit); - __set_bit(BTN_5, input_dev->keybit); - input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0); /* fall through */ case WACOM_G4: input_set_capability(input_dev, EV_MSC, MSC_SERIAL); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); - __set_bit(BTN_0, input_dev->keybit); - __set_bit(BTN_4, input_dev->keybit); + __set_bit(BTN_BACK, input_dev->keybit); + __set_bit(BTN_FORWARD, input_dev->keybit); /* fall through */ case GRAPHIRE: @@ -1127,10 +1119,12 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, case CINTIQ: for (i = 0; i < 8; i++) __set_bit(BTN_0 + i, input_dev->keybit); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); - input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0); - input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0); + if (wacom_wac->features.type != WACOM_21UX2) { + input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0); + input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0); + } + input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0); wacom_setup_cintiq(wacom_wac); break; @@ -1151,8 +1145,6 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, __set_bit(BTN_2, input_dev->keybit); __set_bit(BTN_3, input_dev->keybit); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); - input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0); input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0); /* fall through */ @@ -1170,7 +1162,6 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, case INTUOS4S: for (i = 0; i < 7; i++) __set_bit(BTN_0 + i, input_dev->keybit); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0); wacom_setup_intuos(wacom_wac); @@ -1295,6 +1286,12 @@ static const struct wacom_features wacom_features_0x65 = static const struct wacom_features wacom_features_0x69 = { "Wacom Bamboo1", WACOM_PKGLEN_GRAPHIRE, 5104, 3712, 511, 63, GRAPHIRE, WACOM_PENPRTN_RES, WACOM_PENPRTN_RES }; +static const struct wacom_features wacom_features_0x6A = + { "Wacom Bamboo1 4x6", WACOM_PKGLEN_GRAPHIRE, 14760, 9225, 1023, + 63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x6B = + { "Wacom Bamboo1 5x8", WACOM_PKGLEN_GRAPHIRE, 21648, 13530, 1023, + 63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x20 = { "Wacom Intuos 4x5", WACOM_PKGLEN_INTUOS, 12700, 10600, 1023, 31, INTUOS, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -1427,6 +1424,9 @@ static const struct wacom_features wacom_features_0x90 = static const struct wacom_features wacom_features_0x93 = { "Wacom ISDv4 93", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 255, 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x97 = + { "Wacom ISDv4 97", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 511, + 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x9A = { "Wacom ISDv4 9A", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 255, 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -1458,7 +1458,7 @@ static const struct wacom_features wacom_features_0xD3 = { "Wacom Bamboo 2FG 6x8", WACOM_PKGLEN_BBFUN, 21648, 13530, 1023, 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0xD4 = - { "Wacom Bamboo Pen", WACOM_PKGLEN_BBFUN, 14720, 9200, 255, + { "Wacom Bamboo Pen", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0xD6 = { "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, @@ -1483,6 +1483,11 @@ static const struct wacom_features wacom_features_0x6004 = USB_DEVICE(USB_VENDOR_ID_WACOM, prod), \ .driver_info = (kernel_ulong_t)&wacom_features_##prod +#define USB_DEVICE_DETAILED(prod, class, sub, proto) \ + USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_WACOM, prod, class, \ + sub, proto), \ + .driver_info = (kernel_ulong_t)&wacom_features_##prod + #define USB_DEVICE_LENOVO(prod) \ USB_DEVICE(USB_VENDOR_ID_LENOVO, prod), \ .driver_info = (kernel_ulong_t)&wacom_features_##prod @@ -1506,6 +1511,8 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x64) }, { USB_DEVICE_WACOM(0x65) }, { USB_DEVICE_WACOM(0x69) }, + { USB_DEVICE_WACOM(0x6A) }, + { USB_DEVICE_WACOM(0x6B) }, { USB_DEVICE_WACOM(0x20) }, { USB_DEVICE_WACOM(0x21) }, { USB_DEVICE_WACOM(0x22) }, @@ -1545,7 +1552,13 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0xC5) }, { USB_DEVICE_WACOM(0xC6) }, { USB_DEVICE_WACOM(0xC7) }, - { USB_DEVICE_WACOM(0xCE) }, + /* + * DTU-2231 has two interfaces on the same configuration, + * only one is used. + */ + { USB_DEVICE_DETAILED(0xCE, USB_CLASS_HID, + USB_INTERFACE_SUBCLASS_BOOT, + USB_INTERFACE_PROTOCOL_MOUSE) }, { USB_DEVICE_WACOM(0xD0) }, { USB_DEVICE_WACOM(0xD1) }, { USB_DEVICE_WACOM(0xD2) }, @@ -1560,6 +1573,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0xCC) }, { USB_DEVICE_WACOM(0x90) }, { USB_DEVICE_WACOM(0x93) }, + { USB_DEVICE_WACOM(0x97) }, { USB_DEVICE_WACOM(0x9A) }, { USB_DEVICE_WACOM(0x9F) }, { USB_DEVICE_WACOM(0xE2) }, diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 5196861b86ef..d507b9b67806 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -967,17 +967,12 @@ static int __devinit ads7846_setup_pendown(struct spi_device *spi, struct ads784 ts->get_pendown_state = pdata->get_pendown_state; } else if (gpio_is_valid(pdata->gpio_pendown)) { - err = gpio_request(pdata->gpio_pendown, "ads7846_pendown"); + err = gpio_request_one(pdata->gpio_pendown, GPIOF_IN, + "ads7846_pendown"); if (err) { - dev_err(&spi->dev, "failed to request pendown GPIO%d\n", - pdata->gpio_pendown); - return err; - } - err = gpio_direction_input(pdata->gpio_pendown); - if (err) { - dev_err(&spi->dev, "failed to setup pendown GPIO%d\n", - pdata->gpio_pendown); - gpio_free(pdata->gpio_pendown); + dev_err(&spi->dev, + "failed to request/setup pendown GPIO%d: %d\n", + pdata->gpio_pendown, err); return err; } diff --git a/drivers/input/touchscreen/atmel-wm97xx.c b/drivers/input/touchscreen/atmel-wm97xx.c index fa8e56bd9094..8034cbb20f74 100644 --- a/drivers/input/touchscreen/atmel-wm97xx.c +++ b/drivers/input/touchscreen/atmel-wm97xx.c @@ -164,7 +164,7 @@ static irqreturn_t atmel_wm97xx_channel_b_interrupt(int irq, void *dev_id) data = ac97c_readl(atmel_wm97xx, CBRHR); value = data & 0x0fff; - source = data & WM97XX_ADCSRC_MASK; + source = data & WM97XX_ADCSEL_MASK; pen_down = (data & WM97XX_PEN_DOWN) >> 8; if (source == WM97XX_ADCSEL_X) @@ -442,6 +442,6 @@ static void __exit atmel_wm97xx_exit(void) } module_exit(atmel_wm97xx_exit); -MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>"); +MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>"); MODULE_DESCRIPTION("wm97xx continuous touch driver for Atmel AT91 and AVR32"); MODULE_LICENSE("GPL"); diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 1e61387c73ca..ae00604a6a81 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -48,41 +48,47 @@ #define MXT_OBJECT_SIZE 6 /* Object types */ -#define MXT_DEBUG_DIAGNOSTIC 37 -#define MXT_GEN_MESSAGE 5 -#define MXT_GEN_COMMAND 6 -#define MXT_GEN_POWER 7 -#define MXT_GEN_ACQUIRE 8 -#define MXT_TOUCH_MULTI 9 -#define MXT_TOUCH_KEYARRAY 15 -#define MXT_TOUCH_PROXIMITY 23 -#define MXT_PROCI_GRIPFACE 20 -#define MXT_PROCG_NOISE 22 -#define MXT_PROCI_ONETOUCH 24 -#define MXT_PROCI_TWOTOUCH 27 -#define MXT_PROCI_GRIP 40 -#define MXT_PROCI_PALM 41 -#define MXT_SPT_COMMSCONFIG 18 -#define MXT_SPT_GPIOPWM 19 -#define MXT_SPT_SELFTEST 25 -#define MXT_SPT_CTECONFIG 28 -#define MXT_SPT_USERDATA 38 -#define MXT_SPT_DIGITIZER 43 -#define MXT_SPT_MESSAGECOUNT 44 - -/* MXT_GEN_COMMAND field */ +#define MXT_DEBUG_DIAGNOSTIC_T37 37 +#define MXT_GEN_MESSAGE_T5 5 +#define MXT_GEN_COMMAND_T6 6 +#define MXT_GEN_POWER_T7 7 +#define MXT_GEN_ACQUIRE_T8 8 +#define MXT_GEN_DATASOURCE_T53 53 +#define MXT_TOUCH_MULTI_T9 9 +#define MXT_TOUCH_KEYARRAY_T15 15 +#define MXT_TOUCH_PROXIMITY_T23 23 +#define MXT_TOUCH_PROXKEY_T52 52 +#define MXT_PROCI_GRIPFACE_T20 20 +#define MXT_PROCG_NOISE_T22 22 +#define MXT_PROCI_ONETOUCH_T24 24 +#define MXT_PROCI_TWOTOUCH_T27 27 +#define MXT_PROCI_GRIP_T40 40 +#define MXT_PROCI_PALM_T41 41 +#define MXT_PROCI_TOUCHSUPPRESSION_T42 42 +#define MXT_PROCI_STYLUS_T47 47 +#define MXT_PROCG_NOISESUPPRESSION_T48 48 +#define MXT_SPT_COMMSCONFIG_T18 18 +#define MXT_SPT_GPIOPWM_T19 19 +#define MXT_SPT_SELFTEST_T25 25 +#define MXT_SPT_CTECONFIG_T28 28 +#define MXT_SPT_USERDATA_T38 38 +#define MXT_SPT_DIGITIZER_T43 43 +#define MXT_SPT_MESSAGECOUNT_T44 44 +#define MXT_SPT_CTECONFIG_T46 46 + +/* MXT_GEN_COMMAND_T6 field */ #define MXT_COMMAND_RESET 0 #define MXT_COMMAND_BACKUPNV 1 #define MXT_COMMAND_CALIBRATE 2 #define MXT_COMMAND_REPORTALL 3 #define MXT_COMMAND_DIAGNOSTIC 5 -/* MXT_GEN_POWER field */ +/* MXT_GEN_POWER_T7 field */ #define MXT_POWER_IDLEACQINT 0 #define MXT_POWER_ACTVACQINT 1 #define MXT_POWER_ACTV2IDLETO 2 -/* MXT_GEN_ACQUIRE field */ +/* MXT_GEN_ACQUIRE_T8 field */ #define MXT_ACQUIRE_CHRGTIME 0 #define MXT_ACQUIRE_TCHDRIFT 2 #define MXT_ACQUIRE_DRIFTST 3 @@ -91,7 +97,7 @@ #define MXT_ACQUIRE_ATCHCALST 6 #define MXT_ACQUIRE_ATCHCALSTHR 7 -/* MXT_TOUCH_MULTI field */ +/* MXT_TOUCH_MULTI_T9 field */ #define MXT_TOUCH_CTRL 0 #define MXT_TOUCH_XORIGIN 1 #define MXT_TOUCH_YORIGIN 2 @@ -121,7 +127,7 @@ #define MXT_TOUCH_YEDGEDIST 29 #define MXT_TOUCH_JUMPLIMIT 30 -/* MXT_PROCI_GRIPFACE field */ +/* MXT_PROCI_GRIPFACE_T20 field */ #define MXT_GRIPFACE_CTRL 0 #define MXT_GRIPFACE_XLOGRIP 1 #define MXT_GRIPFACE_XHIGRIP 2 @@ -151,11 +157,11 @@ #define MXT_NOISE_FREQ4 15 #define MXT_NOISE_IDLEGCAFVALID 16 -/* MXT_SPT_COMMSCONFIG */ +/* MXT_SPT_COMMSCONFIG_T18 */ #define MXT_COMMS_CTRL 0 #define MXT_COMMS_CMD 1 -/* MXT_SPT_CTECONFIG field */ +/* MXT_SPT_CTECONFIG_T28 field */ #define MXT_CTE_CTRL 0 #define MXT_CTE_CMD 1 #define MXT_CTE_MODE 2 @@ -166,7 +172,7 @@ #define MXT_VOLTAGE_DEFAULT 2700000 #define MXT_VOLTAGE_STEP 10000 -/* Define for MXT_GEN_COMMAND */ +/* Define for MXT_GEN_COMMAND_T6 */ #define MXT_BOOT_VALUE 0xa5 #define MXT_BACKUP_VALUE 0x55 #define MXT_BACKUP_TIME 25 /* msec */ @@ -256,24 +262,31 @@ struct mxt_data { static bool mxt_object_readable(unsigned int type) { switch (type) { - case MXT_GEN_MESSAGE: - case MXT_GEN_COMMAND: - case MXT_GEN_POWER: - case MXT_GEN_ACQUIRE: - case MXT_TOUCH_MULTI: - case MXT_TOUCH_KEYARRAY: - case MXT_TOUCH_PROXIMITY: - case MXT_PROCI_GRIPFACE: - case MXT_PROCG_NOISE: - case MXT_PROCI_ONETOUCH: - case MXT_PROCI_TWOTOUCH: - case MXT_PROCI_GRIP: - case MXT_PROCI_PALM: - case MXT_SPT_COMMSCONFIG: - case MXT_SPT_GPIOPWM: - case MXT_SPT_SELFTEST: - case MXT_SPT_CTECONFIG: - case MXT_SPT_USERDATA: + case MXT_GEN_MESSAGE_T5: + case MXT_GEN_COMMAND_T6: + case MXT_GEN_POWER_T7: + case MXT_GEN_ACQUIRE_T8: + case MXT_GEN_DATASOURCE_T53: + case MXT_TOUCH_MULTI_T9: + case MXT_TOUCH_KEYARRAY_T15: + case MXT_TOUCH_PROXIMITY_T23: + case MXT_TOUCH_PROXKEY_T52: + case MXT_PROCI_GRIPFACE_T20: + case MXT_PROCG_NOISE_T22: + case MXT_PROCI_ONETOUCH_T24: + case MXT_PROCI_TWOTOUCH_T27: + case MXT_PROCI_GRIP_T40: + case MXT_PROCI_PALM_T41: + case MXT_PROCI_TOUCHSUPPRESSION_T42: + case MXT_PROCI_STYLUS_T47: + case MXT_PROCG_NOISESUPPRESSION_T48: + case MXT_SPT_COMMSCONFIG_T18: + case MXT_SPT_GPIOPWM_T19: + case MXT_SPT_SELFTEST_T25: + case MXT_SPT_CTECONFIG_T28: + case MXT_SPT_USERDATA_T38: + case MXT_SPT_DIGITIZER_T43: + case MXT_SPT_CTECONFIG_T46: return true; default: return false; @@ -283,21 +296,28 @@ static bool mxt_object_readable(unsigned int type) static bool mxt_object_writable(unsigned int type) { switch (type) { - case MXT_GEN_COMMAND: - case MXT_GEN_POWER: - case MXT_GEN_ACQUIRE: - case MXT_TOUCH_MULTI: - case MXT_TOUCH_KEYARRAY: - case MXT_TOUCH_PROXIMITY: - case MXT_PROCI_GRIPFACE: - case MXT_PROCG_NOISE: - case MXT_PROCI_ONETOUCH: - case MXT_PROCI_TWOTOUCH: - case MXT_PROCI_GRIP: - case MXT_PROCI_PALM: - case MXT_SPT_GPIOPWM: - case MXT_SPT_SELFTEST: - case MXT_SPT_CTECONFIG: + case MXT_GEN_COMMAND_T6: + case MXT_GEN_POWER_T7: + case MXT_GEN_ACQUIRE_T8: + case MXT_TOUCH_MULTI_T9: + case MXT_TOUCH_KEYARRAY_T15: + case MXT_TOUCH_PROXIMITY_T23: + case MXT_TOUCH_PROXKEY_T52: + case MXT_PROCI_GRIPFACE_T20: + case MXT_PROCG_NOISE_T22: + case MXT_PROCI_ONETOUCH_T24: + case MXT_PROCI_TWOTOUCH_T27: + case MXT_PROCI_GRIP_T40: + case MXT_PROCI_PALM_T41: + case MXT_PROCI_TOUCHSUPPRESSION_T42: + case MXT_PROCI_STYLUS_T47: + case MXT_PROCG_NOISESUPPRESSION_T48: + case MXT_SPT_COMMSCONFIG_T18: + case MXT_SPT_GPIOPWM_T19: + case MXT_SPT_SELFTEST_T25: + case MXT_SPT_CTECONFIG_T28: + case MXT_SPT_DIGITIZER_T43: + case MXT_SPT_CTECONFIG_T46: return true; default: return false; @@ -455,7 +475,7 @@ static int mxt_read_message(struct mxt_data *data, struct mxt_object *object; u16 reg; - object = mxt_get_object(data, MXT_GEN_MESSAGE); + object = mxt_get_object(data, MXT_GEN_MESSAGE_T5); if (!object) return -EINVAL; @@ -597,8 +617,8 @@ static irqreturn_t mxt_interrupt(int irq, void *dev_id) reportid = message.reportid; - /* whether reportid is thing of MXT_TOUCH_MULTI */ - object = mxt_get_object(data, MXT_TOUCH_MULTI); + /* whether reportid is thing of MXT_TOUCH_MULTI_T9 */ + object = mxt_get_object(data, MXT_TOUCH_MULTI_T9); if (!object) goto end; @@ -635,7 +655,9 @@ static int mxt_check_reg_init(struct mxt_data *data) if (!mxt_object_writable(object->type)) continue; - for (j = 0; j < object->size + 1; j++) { + for (j = 0; + j < (object->size + 1) * (object->instances + 1); + j++) { config_offset = index + j; if (config_offset > pdata->config_length) { dev_err(dev, "Not enough config data!\n"); @@ -644,7 +666,7 @@ static int mxt_check_reg_init(struct mxt_data *data) mxt_write_object(data, object->type, j, pdata->config[config_offset]); } - index += object->size + 1; + index += (object->size + 1) * (object->instances + 1); } return 0; @@ -678,31 +700,31 @@ static void mxt_handle_pdata(struct mxt_data *data) u8 voltage; /* Set touchscreen lines */ - mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_XSIZE, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XSIZE, pdata->x_line); - mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_YSIZE, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YSIZE, pdata->y_line); /* Set touchscreen orient */ - mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_ORIENT, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_ORIENT, pdata->orient); /* Set touchscreen burst length */ - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_BLEN, pdata->blen); /* Set touchscreen threshold */ - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_TCHTHR, pdata->threshold); /* Set touchscreen resolution */ - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff); - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8); - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff); - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8); /* Set touchscreen voltage */ @@ -715,7 +737,7 @@ static void mxt_handle_pdata(struct mxt_data *data) voltage = (pdata->voltage - MXT_VOLTAGE_DEFAULT) / MXT_VOLTAGE_STEP; - mxt_write_object(data, MXT_SPT_CTECONFIG, + mxt_write_object(data, MXT_SPT_CTECONFIG_T28, MXT_CTE_VOLTAGE, voltage); } } @@ -819,13 +841,13 @@ static int mxt_initialize(struct mxt_data *data) mxt_handle_pdata(data); /* Backup to memory */ - mxt_write_object(data, MXT_GEN_COMMAND, + mxt_write_object(data, MXT_GEN_COMMAND_T6, MXT_COMMAND_BACKUPNV, MXT_BACKUP_VALUE); msleep(MXT_BACKUP_TIME); /* Soft reset */ - mxt_write_object(data, MXT_GEN_COMMAND, + mxt_write_object(data, MXT_GEN_COMMAND_T6, MXT_COMMAND_RESET, 1); msleep(MXT_RESET_TIME); @@ -921,7 +943,7 @@ static int mxt_load_fw(struct device *dev, const char *fn) } /* Change to the bootloader mode */ - mxt_write_object(data, MXT_GEN_COMMAND, + mxt_write_object(data, MXT_GEN_COMMAND_T6, MXT_COMMAND_RESET, MXT_BOOT_VALUE); msleep(MXT_RESET_TIME); @@ -1027,14 +1049,14 @@ static void mxt_start(struct mxt_data *data) { /* Touch enable */ mxt_write_object(data, - MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0x83); + MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0x83); } static void mxt_stop(struct mxt_data *data) { /* Touch disable */ mxt_write_object(data, - MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0); + MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0); } static int mxt_input_open(struct input_dev *dev) @@ -1182,7 +1204,7 @@ static int mxt_resume(struct device *dev) struct input_dev *input_dev = data->input_dev; /* Soft reset */ - mxt_write_object(data, MXT_GEN_COMMAND, + mxt_write_object(data, MXT_GEN_COMMAND_T6, MXT_COMMAND_RESET, 1); msleep(MXT_RESET_TIME); diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c index a93c5c26ab3f..d8815c5d54ad 100644 --- a/drivers/input/touchscreen/cy8ctmg110_ts.c +++ b/drivers/input/touchscreen/cy8ctmg110_ts.c @@ -84,9 +84,9 @@ static int cy8ctmg110_write_regs(struct cy8ctmg110 *tsc, unsigned char reg, memcpy(i2c_data + 1, value, len); ret = i2c_master_send(client, i2c_data, len + 1); - if (ret != 1) { + if (ret != len + 1) { dev_err(&client->dev, "i2c write data cmd failed\n"); - return ret ? ret : -EIO; + return ret < 0 ? ret : -EIO; } return 0; @@ -193,6 +193,8 @@ static int __devinit cy8ctmg110_probe(struct i2c_client *client, ts->client = client; ts->input = input_dev; + ts->reset_pin = pdata->reset_pin; + ts->irq_pin = pdata->irq_pin; snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&client->dev)); @@ -328,7 +330,7 @@ static int __devexit cy8ctmg110_remove(struct i2c_client *client) return 0; } -static struct i2c_device_id cy8ctmg110_idtable[] = { +static const struct i2c_device_id cy8ctmg110_idtable[] = { { CY8CTMG110_DRIVER_NAME, 1 }, { } }; diff --git a/drivers/input/touchscreen/intel-mid-touch.c b/drivers/input/touchscreen/intel-mid-touch.c index 66c96bfc5522..327695268e06 100644 --- a/drivers/input/touchscreen/intel-mid-touch.c +++ b/drivers/input/touchscreen/intel-mid-touch.c @@ -448,15 +448,11 @@ static int __devinit mrstouch_read_pmic_id(uint *vendor, uint *rev) */ static int __devinit mrstouch_chan_parse(struct mrstouch_dev *tsdev) { - int err, i, found; + int found = 0; + int err, i; u8 r8; - found = -1; - for (i = 0; i < MRSTOUCH_MAX_CHANNELS; i++) { - if (found >= 0) - break; - err = intel_scu_ipc_ioread8(PMICADDR0 + i, &r8); if (err) return err; @@ -466,16 +462,15 @@ static int __devinit mrstouch_chan_parse(struct mrstouch_dev *tsdev) break; } } - if (found < 0) - return 0; if (tsdev->vendor == PMIC_VENDOR_FS) { - if (found && found > (MRSTOUCH_MAX_CHANNELS - 18)) + if (found > MRSTOUCH_MAX_CHANNELS - 18) return -ENOSPC; } else { - if (found && found > (MRSTOUCH_MAX_CHANNELS - 4)) + if (found > MRSTOUCH_MAX_CHANNELS - 4) return -ENOSPC; } + return found; } diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c index 3242e7076258..e966c29ff1bb 100644 --- a/drivers/input/touchscreen/mainstone-wm97xx.c +++ b/drivers/input/touchscreen/mainstone-wm97xx.c @@ -157,9 +157,9 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm) x, y, p); /* are samples valid */ - if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X || - (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y || - (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES) + if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X || + (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y || + (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES) goto up; /* coordinate is good */ diff --git a/drivers/input/touchscreen/tnetv107x-ts.c b/drivers/input/touchscreen/tnetv107x-ts.c index 22a3411e93c5..089b0a0f3d8c 100644 --- a/drivers/input/touchscreen/tnetv107x-ts.c +++ b/drivers/input/touchscreen/tnetv107x-ts.c @@ -393,5 +393,5 @@ module_exit(tsc_exit); MODULE_AUTHOR("Cyril Chemparathy"); MODULE_DESCRIPTION("TNETV107X Touchscreen Driver"); -MODULE_ALIAS("platform: tnetv107x-ts"); +MODULE_ALIAS("platform:tnetv107x-ts"); MODULE_LICENSE("GPL"); diff --git a/drivers/input/touchscreen/wm9705.c b/drivers/input/touchscreen/wm9705.c index 98e61175d3f5..adc13a523ab5 100644 --- a/drivers/input/touchscreen/wm9705.c +++ b/drivers/input/touchscreen/wm9705.c @@ -215,8 +215,9 @@ static inline int is_pden(struct wm97xx *wm) static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample) { int timeout = 5 * delay; + bool wants_pen = adcsel & WM97XX_PEN_DOWN; - if (!wm->pen_probably_down) { + if (wants_pen && !wm->pen_probably_down) { u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD); if (!(data & WM97XX_PEN_DOWN)) return RC_PENUP; @@ -224,13 +225,10 @@ static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample) } /* set up digitiser */ - if (adcsel & 0x8000) - adcsel = ((adcsel & 0x7fff) + 3) << 12; - if (wm->mach_ops && wm->mach_ops->pre_sample) wm->mach_ops->pre_sample(adcsel); - wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, - adcsel | WM97XX_POLL | WM97XX_DELAY(delay)); + wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK) + | WM97XX_POLL | WM97XX_DELAY(delay)); /* wait 3 AC97 time slots + delay for conversion */ poll_delay(delay); @@ -256,13 +254,14 @@ static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample) wm->mach_ops->post_sample(adcsel); /* check we have correct sample */ - if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) { - dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel, - *sample & WM97XX_ADCSEL_MASK); + if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) { + dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x", + adcsel & WM97XX_ADCSEL_MASK, + *sample & WM97XX_ADCSEL_MASK); return RC_PENUP; } - if (!(*sample & WM97XX_PEN_DOWN)) { + if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) { wm->pen_probably_down = 0; return RC_PENUP; } @@ -277,14 +276,14 @@ static int wm9705_poll_touch(struct wm97xx *wm, struct wm97xx_data *data) { int rc; - rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X, &data->x); + rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x); if (rc != RC_VALID) return rc; - rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y); + rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y); if (rc != RC_VALID) return rc; if (pil) { - rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES, &data->p); + rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN, &data->p); if (rc != RC_VALID) return rc; } else diff --git a/drivers/input/touchscreen/wm9712.c b/drivers/input/touchscreen/wm9712.c index 2bc2fb801009..6e743e3dfda4 100644 --- a/drivers/input/touchscreen/wm9712.c +++ b/drivers/input/touchscreen/wm9712.c @@ -255,8 +255,9 @@ static inline int is_pden(struct wm97xx *wm) static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample) { int timeout = 5 * delay; + bool wants_pen = adcsel & WM97XX_PEN_DOWN; - if (!wm->pen_probably_down) { + if (wants_pen && !wm->pen_probably_down) { u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD); if (!(data & WM97XX_PEN_DOWN)) return RC_PENUP; @@ -264,13 +265,10 @@ static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample) } /* set up digitiser */ - if (adcsel & 0x8000) - adcsel = ((adcsel & 0x7fff) + 3) << 12; - if (wm->mach_ops && wm->mach_ops->pre_sample) wm->mach_ops->pre_sample(adcsel); - wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, - adcsel | WM97XX_POLL | WM97XX_DELAY(delay)); + wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK) + | WM97XX_POLL | WM97XX_DELAY(delay)); /* wait 3 AC97 time slots + delay for conversion */ poll_delay(delay); @@ -296,13 +294,14 @@ static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample) wm->mach_ops->post_sample(adcsel); /* check we have correct sample */ - if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) { - dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel, - *sample & WM97XX_ADCSEL_MASK); + if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) { + dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x", + adcsel & WM97XX_ADCSEL_MASK, + *sample & WM97XX_ADCSEL_MASK); return RC_PENUP; } - if (!(*sample & WM97XX_PEN_DOWN)) { + if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) { wm->pen_probably_down = 0; return RC_PENUP; } @@ -387,16 +386,18 @@ static int wm9712_poll_touch(struct wm97xx *wm, struct wm97xx_data *data) if (rc != RC_VALID) return rc; } else { - rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X, &data->x); + rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, + &data->x); if (rc != RC_VALID) return rc; - rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y); + rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, + &data->y); if (rc != RC_VALID) return rc; if (pil && !five_wire) { - rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES, + rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN, &data->p); if (rc != RC_VALID) return rc; diff --git a/drivers/input/touchscreen/wm9713.c b/drivers/input/touchscreen/wm9713.c index 73ec99568f12..7405353199d7 100644 --- a/drivers/input/touchscreen/wm9713.c +++ b/drivers/input/touchscreen/wm9713.c @@ -261,8 +261,9 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample) { u16 dig1; int timeout = 5 * delay; + bool wants_pen = adcsel & WM97XX_PEN_DOWN; - if (!wm->pen_probably_down) { + if (wants_pen && !wm->pen_probably_down) { u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD); if (!(data & WM97XX_PEN_DOWN)) return RC_PENUP; @@ -270,15 +271,14 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample) } /* set up digitiser */ - if (adcsel & 0x8000) - adcsel = 1 << ((adcsel & 0x7fff) + 3); - dig1 = wm97xx_reg_read(wm, AC97_WM9713_DIG1); dig1 &= ~WM9713_ADCSEL_MASK; + /* WM97XX_ADCSEL_* channels need to be converted to WM9713 format */ + dig1 |= 1 << ((adcsel & WM97XX_ADCSEL_MASK) >> 12); if (wm->mach_ops && wm->mach_ops->pre_sample) wm->mach_ops->pre_sample(adcsel); - wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | adcsel | WM9713_POLL); + wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | WM9713_POLL); /* wait 3 AC97 time slots + delay for conversion */ poll_delay(delay); @@ -304,13 +304,14 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample) wm->mach_ops->post_sample(adcsel); /* check we have correct sample */ - if ((*sample & WM97XX_ADCSRC_MASK) != ffs(adcsel >> 1) << 12) { - dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel, - *sample & WM97XX_ADCSRC_MASK); + if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) { + dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x", + adcsel & WM97XX_ADCSEL_MASK, + *sample & WM97XX_ADCSEL_MASK); return RC_PENUP; } - if (!(*sample & WM97XX_PEN_DOWN)) { + if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) { wm->pen_probably_down = 0; return RC_PENUP; } @@ -400,14 +401,14 @@ static int wm9713_poll_touch(struct wm97xx *wm, struct wm97xx_data *data) if (rc != RC_VALID) return rc; } else { - rc = wm9713_poll_sample(wm, WM9713_ADCSEL_X, &data->x); + rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x); if (rc != RC_VALID) return rc; - rc = wm9713_poll_sample(wm, WM9713_ADCSEL_Y, &data->y); + rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y); if (rc != RC_VALID) return rc; if (pil) { - rc = wm9713_poll_sample(wm, WM9713_ADCSEL_PRES, + rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN, &data->p); if (rc != RC_VALID) return rc; diff --git a/drivers/input/touchscreen/zylonite-wm97xx.c b/drivers/input/touchscreen/zylonite-wm97xx.c index 5b0f15ec874a..f6328c0cded6 100644 --- a/drivers/input/touchscreen/zylonite-wm97xx.c +++ b/drivers/input/touchscreen/zylonite-wm97xx.c @@ -122,9 +122,9 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm) x, y, p); /* are samples valid */ - if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X || - (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y || - (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES) + if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X || + (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y || + (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES) goto up; /* coordinate is good */ diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index 48e9cc0369b1..1f73d7f7e024 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -2532,6 +2532,9 @@ static void _isdn_setup(struct net_device *dev) /* Setup the generic properties */ dev->flags = IFF_NOARP|IFF_POINTOPOINT; + + /* isdn prepends a header in the tx path, can't share skbs */ + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->header_ops = NULL; dev->netdev_ops = &isdn_netdev_ops; diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 574b09afedd3..0dc6546b77a8 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -29,7 +29,6 @@ #include "md.h" #include "bitmap.h" -#include <linux/dm-dirty-log.h> /* debug macros */ #define DEBUG 0 @@ -775,10 +774,8 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon * 0 or page 1 */ static inline struct page *filemap_get_page(struct bitmap *bitmap, - unsigned long chunk) + unsigned long chunk) { - if (bitmap->filemap == NULL) - return NULL; if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; return bitmap->filemap[file_page_index(bitmap, chunk) @@ -878,28 +875,19 @@ enum bitmap_page_attr { static inline void set_page_attr(struct bitmap *bitmap, struct page *page, enum bitmap_page_attr attr) { - if (page) - __set_bit((page->index<<2) + attr, bitmap->filemap_attr); - else - __set_bit(attr, &bitmap->logattrs); + __set_bit((page->index<<2) + attr, bitmap->filemap_attr); } static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, enum bitmap_page_attr attr) { - if (page) - __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); - else - __clear_bit(attr, &bitmap->logattrs); + __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); } static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, enum bitmap_page_attr attr) { - if (page) - return test_bit((page->index<<2) + attr, bitmap->filemap_attr); - else - return test_bit(attr, &bitmap->logattrs); + return test_bit((page->index<<2) + attr, bitmap->filemap_attr); } /* @@ -912,30 +900,26 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; - struct page *page = NULL; + struct page *page; void *kaddr; unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); - if (!bitmap->filemap) { - struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; - if (log) - log->type->mark_region(log, chunk); - } else { + if (!bitmap->filemap) + return; - page = filemap_get_page(bitmap, chunk); - if (!page) - return; - bit = file_page_offset(bitmap, chunk); + page = filemap_get_page(bitmap, chunk); + if (!page) + return; + bit = file_page_offset(bitmap, chunk); - /* set the bit */ - kaddr = kmap_atomic(page, KM_USER0); - if (bitmap->flags & BITMAP_HOSTENDIAN) - set_bit(bit, kaddr); - else - __test_and_set_bit_le(bit, kaddr); - kunmap_atomic(kaddr, KM_USER0); - PRINTK("set file bit %lu page %lu\n", bit, page->index); - } + /* set the bit */ + kaddr = kmap_atomic(page, KM_USER0); + if (bitmap->flags & BITMAP_HOSTENDIAN) + set_bit(bit, kaddr); + else + __set_bit_le(bit, kaddr); + kunmap_atomic(kaddr, KM_USER0); + PRINTK("set file bit %lu page %lu\n", bit, page->index); /* record page number so it gets flushed to disk when unplug occurs */ set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); } @@ -952,16 +936,6 @@ void bitmap_unplug(struct bitmap *bitmap) if (!bitmap) return; - if (!bitmap->filemap) { - /* Must be using a dirty_log */ - struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; - dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs); - need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs); - if (dirty || need_write) - if (log->type->flush(log)) - bitmap->flags |= BITMAP_WRITE_ERROR; - goto out; - } /* look at each page to see if there are any set bits that need to be * flushed out to disk */ @@ -990,7 +964,6 @@ void bitmap_unplug(struct bitmap *bitmap) else md_super_wait(bitmap->mddev); } -out: if (bitmap->flags & BITMAP_WRITE_ERROR) bitmap_file_kick(bitmap); } @@ -1199,7 +1172,6 @@ void bitmap_daemon_work(mddev_t *mddev) struct page *page = NULL, *lastpage = NULL; sector_t blocks; void *paddr; - struct dm_dirty_log *log = mddev->bitmap_info.log; /* Use a mutex to guard daemon_work against * bitmap_destroy. @@ -1224,12 +1196,11 @@ void bitmap_daemon_work(mddev_t *mddev) spin_lock_irqsave(&bitmap->lock, flags); for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; - if (!bitmap->filemap) { - if (!log) - /* error or shutdown */ - break; - } else - page = filemap_get_page(bitmap, j); + if (!bitmap->filemap) + /* error or shutdown */ + break; + + page = filemap_get_page(bitmap, j); if (page != lastpage) { /* skip this page unless it's marked as needing cleaning */ @@ -1298,17 +1269,16 @@ void bitmap_daemon_work(mddev_t *mddev) -1); /* clear the bit */ - if (page) { - paddr = kmap_atomic(page, KM_USER0); - if (bitmap->flags & BITMAP_HOSTENDIAN) - clear_bit(file_page_offset(bitmap, j), - paddr); - else - __test_and_clear_bit_le(file_page_offset(bitmap, j), - paddr); - kunmap_atomic(paddr, KM_USER0); - } else - log->type->clear_region(log, j); + paddr = kmap_atomic(page, KM_USER0); + if (bitmap->flags & BITMAP_HOSTENDIAN) + clear_bit(file_page_offset(bitmap, j), + paddr); + else + __clear_bit_le( + file_page_offset(bitmap, + j), + paddr); + kunmap_atomic(paddr, KM_USER0); } } else j |= PAGE_COUNTER_MASK; @@ -1316,16 +1286,12 @@ void bitmap_daemon_work(mddev_t *mddev) spin_unlock_irqrestore(&bitmap->lock, flags); /* now sync the final page */ - if (lastpage != NULL || log != NULL) { + if (lastpage != NULL) { spin_lock_irqsave(&bitmap->lock, flags); if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - if (lastpage) - write_page(bitmap, lastpage, 0); - else - if (log->type->flush(log)) - bitmap->flags |= BITMAP_WRITE_ERROR; + write_page(bitmap, lastpage, 0); } else { set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); @@ -1767,12 +1733,10 @@ int bitmap_create(mddev_t *mddev) BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); if (!file - && !mddev->bitmap_info.offset - && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */ + && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */ return 0; BUG_ON(file && mddev->bitmap_info.offset); - BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log); bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); if (!bitmap) @@ -1863,6 +1827,7 @@ int bitmap_create(mddev_t *mddev) int bitmap_load(mddev_t *mddev) { int err = 0; + sector_t start = 0; sector_t sector = 0; struct bitmap *bitmap = mddev->bitmap; @@ -1881,24 +1846,14 @@ int bitmap_load(mddev_t *mddev) } bitmap_close_sync(bitmap); - if (mddev->bitmap_info.log) { - unsigned long i; - struct dm_dirty_log *log = mddev->bitmap_info.log; - for (i = 0; i < bitmap->chunks; i++) - if (!log->type->in_sync(log, i, 1)) - bitmap_set_memory_bits(bitmap, - (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), - 1); - } else { - sector_t start = 0; - if (mddev->degraded == 0 - || bitmap->events_cleared == mddev->events) - /* no need to keep dirty bits to optimise a - * re-add of a missing device */ - start = mddev->recovery_cp; - - err = bitmap_init_from_disk(bitmap, start); - } + if (mddev->degraded == 0 + || bitmap->events_cleared == mddev->events) + /* no need to keep dirty bits to optimise a + * re-add of a missing device */ + start = mddev->recovery_cp; + + err = bitmap_init_from_disk(bitmap, start); + if (err) goto out; diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index b2a127e891ac..a28f2e5588c6 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h @@ -212,10 +212,6 @@ struct bitmap { unsigned long file_pages; /* number of pages in the file */ int last_page_size; /* bytes in the last page */ - unsigned long logattrs; /* used when filemap_attr doesn't exist - * because we are working with a dirty_log - */ - unsigned long flags; int allclean; @@ -237,7 +233,6 @@ struct bitmap { wait_queue_head_t behind_wait; struct sysfs_dirent *sysfs_can_clear; - }; /* the bitmap API */ diff --git a/drivers/md/md.c b/drivers/md/md.c index dfc9425db70b..8e221a20f5d9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -215,6 +215,55 @@ struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, } EXPORT_SYMBOL_GPL(bio_clone_mddev); +void md_trim_bio(struct bio *bio, int offset, int size) +{ + /* 'bio' is a cloned bio which we need to trim to match + * the given offset and size. + * This requires adjusting bi_sector, bi_size, and bi_io_vec + */ + int i; + struct bio_vec *bvec; + int sofar = 0; + + size <<= 9; + if (offset == 0 && size == bio->bi_size) + return; + + bio->bi_sector += offset; + bio->bi_size = size; + offset <<= 9; + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + + while (bio->bi_idx < bio->bi_vcnt && + bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { + /* remove this whole bio_vec */ + offset -= bio->bi_io_vec[bio->bi_idx].bv_len; + bio->bi_idx++; + } + if (bio->bi_idx < bio->bi_vcnt) { + bio->bi_io_vec[bio->bi_idx].bv_offset += offset; + bio->bi_io_vec[bio->bi_idx].bv_len -= offset; + } + /* avoid any complications with bi_idx being non-zero*/ + if (bio->bi_idx) { + memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, + (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); + bio->bi_vcnt -= bio->bi_idx; + bio->bi_idx = 0; + } + /* Make sure vcnt and last bv are not too big */ + bio_for_each_segment(bvec, bio, i) { + if (sofar + bvec->bv_len > size) + bvec->bv_len = size - sofar; + if (bvec->bv_len == 0) { + bio->bi_vcnt = i; + break; + } + sofar += bvec->bv_len; + } +} +EXPORT_SYMBOL_GPL(md_trim_bio); + /* * We have a system wide 'event count' that is incremented * on any 'interesting' event, and readers of /proc/mdstat @@ -757,6 +806,10 @@ static void free_disk_sb(mdk_rdev_t * rdev) rdev->sb_start = 0; rdev->sectors = 0; } + if (rdev->bb_page) { + put_page(rdev->bb_page); + rdev->bb_page = NULL; + } } @@ -1025,7 +1078,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version ret = -EINVAL; bdevname(rdev->bdev, b); - sb = (mdp_super_t*)page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); if (sb->md_magic != MD_SB_MAGIC) { printk(KERN_ERR "md: invalid raid superblock magic on %s\n", @@ -1054,6 +1107,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version rdev->preferred_minor = sb->md_minor; rdev->data_offset = 0; rdev->sb_size = MD_SB_BYTES; + rdev->badblocks.shift = -1; if (sb->level == LEVEL_MULTIPATH) rdev->desc_nr = -1; @@ -1064,7 +1118,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version ret = 1; } else { __u64 ev1, ev2; - mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page); + mdp_super_t *refsb = page_address(refdev->sb_page); if (!uuid_equal(refsb, sb)) { printk(KERN_WARNING "md: %s has different UUID to %s\n", b, bdevname(refdev->bdev,b2)); @@ -1099,7 +1153,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) { mdp_disk_t *desc; - mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); + mdp_super_t *sb = page_address(rdev->sb_page); __u64 ev1 = md_event(sb); rdev->raid_disk = -1; @@ -1230,7 +1284,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) rdev->sb_size = MD_SB_BYTES; - sb = (mdp_super_t*)page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); memset(sb, 0, sizeof(*sb)); @@ -1395,6 +1449,8 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb) return cpu_to_le32(csum); } +static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, + int acknowledged); static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) { struct mdp_superblock_1 *sb; @@ -1435,7 +1491,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (ret) return ret; - sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); if (sb->magic != cpu_to_le32(MD_SB_MAGIC) || sb->major_version != cpu_to_le32(1) || @@ -1473,12 +1529,52 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) else rdev->desc_nr = le32_to_cpu(sb->dev_number); + if (!rdev->bb_page) { + rdev->bb_page = alloc_page(GFP_KERNEL); + if (!rdev->bb_page) + return -ENOMEM; + } + if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) && + rdev->badblocks.count == 0) { + /* need to load the bad block list. + * Currently we limit it to one page. + */ + s32 offset; + sector_t bb_sector; + u64 *bbp; + int i; + int sectors = le16_to_cpu(sb->bblog_size); + if (sectors > (PAGE_SIZE / 512)) + return -EINVAL; + offset = le32_to_cpu(sb->bblog_offset); + if (offset == 0) + return -EINVAL; + bb_sector = (long long)offset; + if (!sync_page_io(rdev, bb_sector, sectors << 9, + rdev->bb_page, READ, true)) + return -EIO; + bbp = (u64 *)page_address(rdev->bb_page); + rdev->badblocks.shift = sb->bblog_shift; + for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) { + u64 bb = le64_to_cpu(*bbp); + int count = bb & (0x3ff); + u64 sector = bb >> 10; + sector <<= sb->bblog_shift; + count <<= sb->bblog_shift; + if (bb + 1 == 0) + break; + if (md_set_badblocks(&rdev->badblocks, + sector, count, 1) == 0) + return -EINVAL; + } + } else if (sb->bblog_offset == 0) + rdev->badblocks.shift = -1; + if (!refdev) { ret = 1; } else { __u64 ev1, ev2; - struct mdp_superblock_1 *refsb = - (struct mdp_superblock_1*)page_address(refdev->sb_page); + struct mdp_superblock_1 *refsb = page_address(refdev->sb_page); if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 || sb->level != refsb->level || @@ -1513,7 +1609,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) { - struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); + struct mdp_superblock_1 *sb = page_address(rdev->sb_page); __u64 ev1 = le64_to_cpu(sb->events); rdev->raid_disk = -1; @@ -1619,13 +1715,12 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) int max_dev, i; /* make rdev->sb match mddev and rdev data. */ - sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); sb->feature_map = 0; sb->pad0 = 0; sb->recovery_offset = cpu_to_le64(0); memset(sb->pad1, 0, sizeof(sb->pad1)); - memset(sb->pad2, 0, sizeof(sb->pad2)); memset(sb->pad3, 0, sizeof(sb->pad3)); sb->utime = cpu_to_le64((__u64)mddev->utime); @@ -1665,6 +1760,40 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors); } + if (rdev->badblocks.count == 0) + /* Nothing to do for bad blocks*/ ; + else if (sb->bblog_offset == 0) + /* Cannot record bad blocks on this device */ + md_error(mddev, rdev); + else { + struct badblocks *bb = &rdev->badblocks; + u64 *bbp = (u64 *)page_address(rdev->bb_page); + u64 *p = bb->page; + sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS); + if (bb->changed) { + unsigned seq; + +retry: + seq = read_seqbegin(&bb->lock); + + memset(bbp, 0xff, PAGE_SIZE); + + for (i = 0 ; i < bb->count ; i++) { + u64 internal_bb = *p++; + u64 store_bb = ((BB_OFFSET(internal_bb) << 10) + | BB_LEN(internal_bb)); + *bbp++ = cpu_to_le64(store_bb); + } + if (read_seqretry(&bb->lock, seq)) + goto retry; + + bb->sector = (rdev->sb_start + + (int)le32_to_cpu(sb->bblog_offset)); + bb->size = le16_to_cpu(sb->bblog_size); + bb->changed = 0; + } + } + max_dev = 0; list_for_each_entry(rdev2, &mddev->disks, same_set) if (rdev2->desc_nr+1 > max_dev) @@ -1724,7 +1853,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) num_sectors = max_sectors; rdev->sb_start = sb_start; } - sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); sb->super_offset = rdev->sb_start; sb->sb_csum = calc_sb_1_csum(sb); @@ -1922,7 +2051,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) bd_link_disk_holder(rdev->bdev, mddev->gendisk); /* May as well allow recovery to be retried once */ - mddev->recovery_disabled = 0; + mddev->recovery_disabled++; return 0; @@ -1953,6 +2082,9 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) sysfs_remove_link(&rdev->kobj, "block"); sysfs_put(rdev->sysfs_state); rdev->sysfs_state = NULL; + kfree(rdev->badblocks.page); + rdev->badblocks.count = 0; + rdev->badblocks.page = NULL; /* We need to delay this, otherwise we can deadlock when * writing to 'remove' to "dev/state". We also need * to delay it due to rcu usage. @@ -2127,10 +2259,10 @@ static void print_rdev(mdk_rdev_t *rdev, int major_version) printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version); switch (major_version) { case 0: - print_sb_90((mdp_super_t*)page_address(rdev->sb_page)); + print_sb_90(page_address(rdev->sb_page)); break; case 1: - print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page)); + print_sb_1(page_address(rdev->sb_page)); break; } } else @@ -2194,6 +2326,7 @@ static void md_update_sb(mddev_t * mddev, int force_change) mdk_rdev_t *rdev; int sync_req; int nospares = 0; + int any_badblocks_changed = 0; repeat: /* First make sure individual recovery_offsets are correct */ @@ -2208,8 +2341,18 @@ repeat: if (!mddev->persistent) { clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); - if (!mddev->external) + if (!mddev->external) { clear_bit(MD_CHANGE_PENDING, &mddev->flags); + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->badblocks.changed) { + md_ack_all_badblocks(&rdev->badblocks); + md_error(mddev, rdev); + } + clear_bit(Blocked, &rdev->flags); + clear_bit(BlockedBadBlocks, &rdev->flags); + wake_up(&rdev->blocked_wait); + } + } wake_up(&mddev->sb_wait); return; } @@ -2265,6 +2408,14 @@ repeat: MD_BUG(); mddev->events --; } + + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->badblocks.changed) + any_badblocks_changed++; + if (test_bit(Faulty, &rdev->flags)) + set_bit(FaultRecorded, &rdev->flags); + } + sync_sbs(mddev, nospares); spin_unlock_irq(&mddev->write_lock); @@ -2290,6 +2441,13 @@ repeat: bdevname(rdev->bdev,b), (unsigned long long)rdev->sb_start); rdev->sb_events = mddev->events; + if (rdev->badblocks.size) { + md_super_write(mddev, rdev, + rdev->badblocks.sector, + rdev->badblocks.size << 9, + rdev->bb_page); + rdev->badblocks.size = 0; + } } else dprintk(")\n"); @@ -2313,6 +2471,15 @@ repeat: if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) sysfs_notify(&mddev->kobj, NULL, "sync_completed"); + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (test_and_clear_bit(FaultRecorded, &rdev->flags)) + clear_bit(Blocked, &rdev->flags); + + if (any_badblocks_changed) + md_ack_all_badblocks(&rdev->badblocks); + clear_bit(BlockedBadBlocks, &rdev->flags); + wake_up(&rdev->blocked_wait); + } } /* words written to sysfs files may, or may not, be \n terminated. @@ -2347,7 +2514,8 @@ state_show(mdk_rdev_t *rdev, char *page) char *sep = ""; size_t len = 0; - if (test_bit(Faulty, &rdev->flags)) { + if (test_bit(Faulty, &rdev->flags) || + rdev->badblocks.unacked_exist) { len+= sprintf(page+len, "%sfaulty",sep); sep = ","; } @@ -2359,7 +2527,8 @@ state_show(mdk_rdev_t *rdev, char *page) len += sprintf(page+len, "%swrite_mostly",sep); sep = ","; } - if (test_bit(Blocked, &rdev->flags)) { + if (test_bit(Blocked, &rdev->flags) || + rdev->badblocks.unacked_exist) { len += sprintf(page+len, "%sblocked", sep); sep = ","; } @@ -2368,6 +2537,10 @@ state_show(mdk_rdev_t *rdev, char *page) len += sprintf(page+len, "%sspare", sep); sep = ","; } + if (test_bit(WriteErrorSeen, &rdev->flags)) { + len += sprintf(page+len, "%swrite_error", sep); + sep = ","; + } return len+sprintf(page+len, "\n"); } @@ -2375,13 +2548,15 @@ static ssize_t state_store(mdk_rdev_t *rdev, const char *buf, size_t len) { /* can write - * faulty - simulates and error + * faulty - simulates an error * remove - disconnects the device * writemostly - sets write_mostly * -writemostly - clears write_mostly - * blocked - sets the Blocked flag - * -blocked - clears the Blocked flag + * blocked - sets the Blocked flags + * -blocked - clears the Blocked and possibly simulates an error * insync - sets Insync providing device isn't active + * write_error - sets WriteErrorSeen + * -write_error - clears WriteErrorSeen */ int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { @@ -2408,7 +2583,15 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) set_bit(Blocked, &rdev->flags); err = 0; } else if (cmd_match(buf, "-blocked")) { + if (!test_bit(Faulty, &rdev->flags) && + test_bit(BlockedBadBlocks, &rdev->flags)) { + /* metadata handler doesn't understand badblocks, + * so we need to fail the device + */ + md_error(rdev->mddev, rdev); + } clear_bit(Blocked, &rdev->flags); + clear_bit(BlockedBadBlocks, &rdev->flags); wake_up(&rdev->blocked_wait); set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); @@ -2417,6 +2600,12 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { set_bit(In_sync, &rdev->flags); err = 0; + } else if (cmd_match(buf, "write_error")) { + set_bit(WriteErrorSeen, &rdev->flags); + err = 0; + } else if (cmd_match(buf, "-write_error")) { + clear_bit(WriteErrorSeen, &rdev->flags); + err = 0; } if (!err) sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -2459,7 +2648,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) { char *e; int err; - char nm[20]; int slot = simple_strtoul(buf, &e, 10); if (strncmp(buf, "none", 4)==0) slot = -1; @@ -2482,8 +2670,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) hot_remove_disk(rdev->mddev, rdev->raid_disk); if (err) return err; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&rdev->mddev->kobj, nm); + sysfs_unlink_rdev(rdev->mddev, rdev); rdev->raid_disk = -1; set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); @@ -2522,8 +2709,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) return err; } else sysfs_notify_dirent_safe(rdev->sysfs_state); - sprintf(nm, "rd%d", rdev->raid_disk); - if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) + if (sysfs_link_rdev(rdev->mddev, rdev)) /* failure here is OK */; /* don't wakeup anyone, leave that to userspace. */ } else { @@ -2712,6 +2898,39 @@ static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t le static struct rdev_sysfs_entry rdev_recovery_start = __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store); + +static ssize_t +badblocks_show(struct badblocks *bb, char *page, int unack); +static ssize_t +badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack); + +static ssize_t bb_show(mdk_rdev_t *rdev, char *page) +{ + return badblocks_show(&rdev->badblocks, page, 0); +} +static ssize_t bb_store(mdk_rdev_t *rdev, const char *page, size_t len) +{ + int rv = badblocks_store(&rdev->badblocks, page, len, 0); + /* Maybe that ack was all we needed */ + if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags)) + wake_up(&rdev->blocked_wait); + return rv; +} +static struct rdev_sysfs_entry rdev_bad_blocks = +__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store); + + +static ssize_t ubb_show(mdk_rdev_t *rdev, char *page) +{ + return badblocks_show(&rdev->badblocks, page, 1); +} +static ssize_t ubb_store(mdk_rdev_t *rdev, const char *page, size_t len) +{ + return badblocks_store(&rdev->badblocks, page, len, 1); +} +static struct rdev_sysfs_entry rdev_unack_bad_blocks = +__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store); + static struct attribute *rdev_default_attrs[] = { &rdev_state.attr, &rdev_errors.attr, @@ -2719,6 +2938,8 @@ static struct attribute *rdev_default_attrs[] = { &rdev_offset.attr, &rdev_size.attr, &rdev_recovery_start.attr, + &rdev_bad_blocks.attr, + &rdev_unack_bad_blocks.attr, NULL, }; static ssize_t @@ -2782,7 +3003,7 @@ static struct kobj_type rdev_ktype = { .default_attrs = rdev_default_attrs, }; -void md_rdev_init(mdk_rdev_t *rdev) +int md_rdev_init(mdk_rdev_t *rdev) { rdev->desc_nr = -1; rdev->saved_raid_disk = -1; @@ -2792,12 +3013,27 @@ void md_rdev_init(mdk_rdev_t *rdev) rdev->sb_events = 0; rdev->last_read_error.tv_sec = 0; rdev->last_read_error.tv_nsec = 0; + rdev->sb_loaded = 0; + rdev->bb_page = NULL; atomic_set(&rdev->nr_pending, 0); atomic_set(&rdev->read_errors, 0); atomic_set(&rdev->corrected_errors, 0); INIT_LIST_HEAD(&rdev->same_set); init_waitqueue_head(&rdev->blocked_wait); + + /* Add space to store bad block list. + * This reserves the space even on arrays where it cannot + * be used - I wonder if that matters + */ + rdev->badblocks.count = 0; + rdev->badblocks.shift = 0; + rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL); + seqlock_init(&rdev->badblocks.lock); + if (rdev->badblocks.page == NULL) + return -ENOMEM; + + return 0; } EXPORT_SYMBOL_GPL(md_rdev_init); /* @@ -2823,8 +3059,11 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi return ERR_PTR(-ENOMEM); } - md_rdev_init(rdev); - if ((err = alloc_disk_sb(rdev))) + err = md_rdev_init(rdev); + if (err) + goto abort_free; + err = alloc_disk_sb(rdev); + if (err) goto abort_free; err = lock_rdev(rdev, newdev, super_format == -2); @@ -2860,15 +3099,17 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi goto abort_free; } } + if (super_format == -1) + /* hot-add for 0.90, or non-persistent: so no badblocks */ + rdev->badblocks.shift = -1; return rdev; abort_free: - if (rdev->sb_page) { - if (rdev->bdev) - unlock_rdev(rdev); - free_disk_sb(rdev); - } + if (rdev->bdev) + unlock_rdev(rdev); + free_disk_sb(rdev); + kfree(rdev->badblocks.page); kfree(rdev); return ERR_PTR(err); } @@ -3149,15 +3390,13 @@ level_store(mddev_t *mddev, const char *buf, size_t len) } list_for_each_entry(rdev, &mddev->disks, same_set) { - char nm[20]; if (rdev->raid_disk < 0) continue; if (rdev->new_raid_disk >= mddev->raid_disks) rdev->new_raid_disk = -1; if (rdev->new_raid_disk == rdev->raid_disk) continue; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); } list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->raid_disk < 0) @@ -3168,11 +3407,10 @@ level_store(mddev_t *mddev, const char *buf, size_t len) if (rdev->raid_disk < 0) clear_bit(In_sync, &rdev->flags); else { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - if(sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) - printk("md: cannot register %s for %s after level change\n", - nm, mdname(mddev)); + if (sysfs_link_rdev(mddev, rdev)) + printk(KERN_WARNING "md: cannot register rd%d" + " for %s after level change\n", + rdev->raid_disk, mdname(mddev)); } } @@ -4504,7 +4742,8 @@ int md_run(mddev_t *mddev) } if (mddev->bio_set == NULL) - mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev)); + mddev->bio_set = bioset_create(BIO_POOL_SIZE, + sizeof(mddev_t *)); spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); @@ -4621,12 +4860,9 @@ int md_run(mddev_t *mddev) smp_wmb(); mddev->ready = 1; list_for_each_entry(rdev, &mddev->disks, same_set) - if (rdev->raid_disk >= 0) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) + if (rdev->raid_disk >= 0) + if (sysfs_link_rdev(mddev, rdev)) /* failure here is OK */; - } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -4854,11 +5090,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) sysfs_notify_dirent_safe(mddev->sysfs_state); list_for_each_entry(rdev, &mddev->disks, same_set) - if (rdev->raid_disk >= 0) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); - } + if (rdev->raid_disk >= 0) + sysfs_unlink_rdev(mddev, rdev); set_capacity(disk, 0); mutex_unlock(&mddev->open_mutex); @@ -6198,18 +6431,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) if (!rdev || test_bit(Faulty, &rdev->flags)) return; - if (mddev->external) - set_bit(Blocked, &rdev->flags); -/* - dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", - mdname(mddev), - MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev), - __builtin_return_address(0),__builtin_return_address(1), - __builtin_return_address(2),__builtin_return_address(3)); -*/ - if (!mddev->pers) - return; - if (!mddev->pers->error_handler) + if (!mddev->pers || !mddev->pers->error_handler) return; mddev->pers->error_handler(mddev,rdev); if (mddev->degraded) @@ -6933,11 +7155,14 @@ void md_do_sync(mddev_t *mddev) atomic_add(sectors, &mddev->recovery_active); } + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) + break; + j += sectors; if (j>1) mddev->curr_resync = j; mddev->curr_mark_cnt = io_sectors; if (last_check == 0) - /* this is the earliers that rebuilt will be + /* this is the earliest that rebuild will be * visible in /proc/mdstat */ md_new_event(mddev); @@ -6946,10 +7171,6 @@ void md_do_sync(mddev_t *mddev) continue; last_check = io_sectors; - - if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) - break; - repeat: if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) { /* step marks */ @@ -7067,29 +7288,23 @@ static int remove_and_add_spares(mddev_t *mddev) atomic_read(&rdev->nr_pending)==0) { if (mddev->pers->hot_remove_disk( mddev, rdev->raid_disk)==0) { - char nm[20]; - sprintf(nm,"rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; } } - if (mddev->degraded && !mddev->recovery_disabled) { + if (mddev->degraded) { list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(Blocked, &rdev->flags)) + !test_bit(Faulty, &rdev->flags)) spares++; if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) { rdev->recovery_offset = 0; if (mddev->pers-> hot_add_disk(mddev, rdev) == 0) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - if (sysfs_create_link(&mddev->kobj, - &rdev->kobj, nm)) + if (sysfs_link_rdev(mddev, rdev)) /* failure here is OK */; spares++; md_new_event(mddev); @@ -7138,6 +7353,8 @@ static void reap_sync_thread(mddev_t *mddev) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); sysfs_notify_dirent_safe(mddev->sysfs_action); md_new_event(mddev); + if (mddev->event_work.func) + queue_work(md_misc_wq, &mddev->event_work); } /* @@ -7170,9 +7387,6 @@ void md_check_recovery(mddev_t *mddev) if (mddev->bitmap) bitmap_daemon_work(mddev); - if (mddev->ro) - return; - if (signal_pending(current)) { if (mddev->pers->sync_request && !mddev->external) { printk(KERN_INFO "md: %s in immediate safe mode\n", @@ -7209,9 +7423,7 @@ void md_check_recovery(mddev_t *mddev) atomic_read(&rdev->nr_pending)==0) { if (mddev->pers->hot_remove_disk( mddev, rdev->raid_disk)==0) { - char nm[20]; - sprintf(nm,"rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; } } @@ -7331,12 +7543,499 @@ void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) { sysfs_notify_dirent_safe(rdev->sysfs_state); wait_event_timeout(rdev->blocked_wait, - !test_bit(Blocked, &rdev->flags), + !test_bit(Blocked, &rdev->flags) && + !test_bit(BlockedBadBlocks, &rdev->flags), msecs_to_jiffies(5000)); rdev_dec_pending(rdev, mddev); } EXPORT_SYMBOL(md_wait_for_blocked_rdev); + +/* Bad block management. + * We can record which blocks on each device are 'bad' and so just + * fail those blocks, or that stripe, rather than the whole device. + * Entries in the bad-block table are 64bits wide. This comprises: + * Length of bad-range, in sectors: 0-511 for lengths 1-512 + * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) + * A 'shift' can be set so that larger blocks are tracked and + * consequently larger devices can be covered. + * 'Acknowledged' flag - 1 bit. - the most significant bit. + * + * Locking of the bad-block table uses a seqlock so md_is_badblock + * might need to retry if it is very unlucky. + * We will sometimes want to check for bad blocks in a bi_end_io function, + * so we use the write_seqlock_irq variant. + * + * When looking for a bad block we specify a range and want to + * know if any block in the range is bad. So we binary-search + * to the last range that starts at-or-before the given endpoint, + * (or "before the sector after the target range") + * then see if it ends after the given start. + * We return + * 0 if there are no known bad blocks in the range + * 1 if there are known bad block which are all acknowledged + * -1 if there are bad blocks which have not yet been acknowledged in metadata. + * plus the start/length of the first bad section we overlap. + */ +int md_is_badblock(struct badblocks *bb, sector_t s, int sectors, + sector_t *first_bad, int *bad_sectors) +{ + int hi; + int lo = 0; + u64 *p = bb->page; + int rv = 0; + sector_t target = s + sectors; + unsigned seq; + + if (bb->shift > 0) { + /* round the start down, and the end up */ + s >>= bb->shift; + target += (1<<bb->shift) - 1; + target >>= bb->shift; + sectors = target - s; + } + /* 'target' is now the first block after the bad range */ + +retry: + seq = read_seqbegin(&bb->lock); + + hi = bb->count; + + /* Binary search between lo and hi for 'target' + * i.e. for the last range that starts before 'target' + */ + /* INVARIANT: ranges before 'lo' and at-or-after 'hi' + * are known not to be the last range before target. + * VARIANT: hi-lo is the number of possible + * ranges, and decreases until it reaches 1 + */ + while (hi - lo > 1) { + int mid = (lo + hi) / 2; + sector_t a = BB_OFFSET(p[mid]); + if (a < target) + /* This could still be the one, earlier ranges + * could not. */ + lo = mid; + else + /* This and later ranges are definitely out. */ + hi = mid; + } + /* 'lo' might be the last that started before target, but 'hi' isn't */ + if (hi > lo) { + /* need to check all range that end after 's' to see if + * any are unacknowledged. + */ + while (lo >= 0 && + BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { + if (BB_OFFSET(p[lo]) < target) { + /* starts before the end, and finishes after + * the start, so they must overlap + */ + if (rv != -1 && BB_ACK(p[lo])) + rv = 1; + else + rv = -1; + *first_bad = BB_OFFSET(p[lo]); + *bad_sectors = BB_LEN(p[lo]); + } + lo--; + } + } + + if (read_seqretry(&bb->lock, seq)) + goto retry; + + return rv; +} +EXPORT_SYMBOL_GPL(md_is_badblock); + +/* + * Add a range of bad blocks to the table. + * This might extend the table, or might contract it + * if two adjacent ranges can be merged. + * We binary-search to find the 'insertion' point, then + * decide how best to handle it. + */ +static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, + int acknowledged) +{ + u64 *p; + int lo, hi; + int rv = 1; + + if (bb->shift < 0) + /* badblocks are disabled */ + return 0; + + if (bb->shift) { + /* round the start down, and the end up */ + sector_t next = s + sectors; + s >>= bb->shift; + next += (1<<bb->shift) - 1; + next >>= bb->shift; + sectors = next - s; + } + + write_seqlock_irq(&bb->lock); + + p = bb->page; + lo = 0; + hi = bb->count; + /* Find the last range that starts at-or-before 's' */ + while (hi - lo > 1) { + int mid = (lo + hi) / 2; + sector_t a = BB_OFFSET(p[mid]); + if (a <= s) + lo = mid; + else + hi = mid; + } + if (hi > lo && BB_OFFSET(p[lo]) > s) + hi = lo; + + if (hi > lo) { + /* we found a range that might merge with the start + * of our new range + */ + sector_t a = BB_OFFSET(p[lo]); + sector_t e = a + BB_LEN(p[lo]); + int ack = BB_ACK(p[lo]); + if (e >= s) { + /* Yes, we can merge with a previous range */ + if (s == a && s + sectors >= e) + /* new range covers old */ + ack = acknowledged; + else + ack = ack && acknowledged; + + if (e < s + sectors) + e = s + sectors; + if (e - a <= BB_MAX_LEN) { + p[lo] = BB_MAKE(a, e-a, ack); + s = e; + } else { + /* does not all fit in one range, + * make p[lo] maximal + */ + if (BB_LEN(p[lo]) != BB_MAX_LEN) + p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); + s = a + BB_MAX_LEN; + } + sectors = e - s; + } + } + if (sectors && hi < bb->count) { + /* 'hi' points to the first range that starts after 's'. + * Maybe we can merge with the start of that range */ + sector_t a = BB_OFFSET(p[hi]); + sector_t e = a + BB_LEN(p[hi]); + int ack = BB_ACK(p[hi]); + if (a <= s + sectors) { + /* merging is possible */ + if (e <= s + sectors) { + /* full overlap */ + e = s + sectors; + ack = acknowledged; + } else + ack = ack && acknowledged; + + a = s; + if (e - a <= BB_MAX_LEN) { + p[hi] = BB_MAKE(a, e-a, ack); + s = e; + } else { + p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); + s = a + BB_MAX_LEN; + } + sectors = e - s; + lo = hi; + hi++; + } + } + if (sectors == 0 && hi < bb->count) { + /* we might be able to combine lo and hi */ + /* Note: 's' is at the end of 'lo' */ + sector_t a = BB_OFFSET(p[hi]); + int lolen = BB_LEN(p[lo]); + int hilen = BB_LEN(p[hi]); + int newlen = lolen + hilen - (s - a); + if (s >= a && newlen < BB_MAX_LEN) { + /* yes, we can combine them */ + int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); + p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); + memmove(p + hi, p + hi + 1, + (bb->count - hi - 1) * 8); + bb->count--; + } + } + while (sectors) { + /* didn't merge (it all). + * Need to add a range just before 'hi' */ + if (bb->count >= MD_MAX_BADBLOCKS) { + /* No room for more */ + rv = 0; + break; + } else { + int this_sectors = sectors; + memmove(p + hi + 1, p + hi, + (bb->count - hi) * 8); + bb->count++; + + if (this_sectors > BB_MAX_LEN) + this_sectors = BB_MAX_LEN; + p[hi] = BB_MAKE(s, this_sectors, acknowledged); + sectors -= this_sectors; + s += this_sectors; + } + } + + bb->changed = 1; + if (!acknowledged) + bb->unacked_exist = 1; + write_sequnlock_irq(&bb->lock); + + return rv; +} + +int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors, + int acknowledged) +{ + int rv = md_set_badblocks(&rdev->badblocks, + s + rdev->data_offset, sectors, acknowledged); + if (rv) { + /* Make sure they get written out promptly */ + set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags); + md_wakeup_thread(rdev->mddev->thread); + } + return rv; +} +EXPORT_SYMBOL_GPL(rdev_set_badblocks); + +/* + * Remove a range of bad blocks from the table. + * This may involve extending the table if we spilt a region, + * but it must not fail. So if the table becomes full, we just + * drop the remove request. + */ +static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors) +{ + u64 *p; + int lo, hi; + sector_t target = s + sectors; + int rv = 0; + + if (bb->shift > 0) { + /* When clearing we round the start up and the end down. + * This should not matter as the shift should align with + * the block size and no rounding should ever be needed. + * However it is better the think a block is bad when it + * isn't than to think a block is not bad when it is. + */ + s += (1<<bb->shift) - 1; + s >>= bb->shift; + target >>= bb->shift; + sectors = target - s; + } + + write_seqlock_irq(&bb->lock); + + p = bb->page; + lo = 0; + hi = bb->count; + /* Find the last range that starts before 'target' */ + while (hi - lo > 1) { + int mid = (lo + hi) / 2; + sector_t a = BB_OFFSET(p[mid]); + if (a < target) + lo = mid; + else + hi = mid; + } + if (hi > lo) { + /* p[lo] is the last range that could overlap the + * current range. Earlier ranges could also overlap, + * but only this one can overlap the end of the range. + */ + if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) { + /* Partial overlap, leave the tail of this range */ + int ack = BB_ACK(p[lo]); + sector_t a = BB_OFFSET(p[lo]); + sector_t end = a + BB_LEN(p[lo]); + + if (a < s) { + /* we need to split this range */ + if (bb->count >= MD_MAX_BADBLOCKS) { + rv = 0; + goto out; + } + memmove(p+lo+1, p+lo, (bb->count - lo) * 8); + bb->count++; + p[lo] = BB_MAKE(a, s-a, ack); + lo++; + } + p[lo] = BB_MAKE(target, end - target, ack); + /* there is no longer an overlap */ + hi = lo; + lo--; + } + while (lo >= 0 && + BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { + /* This range does overlap */ + if (BB_OFFSET(p[lo]) < s) { + /* Keep the early parts of this range. */ + int ack = BB_ACK(p[lo]); + sector_t start = BB_OFFSET(p[lo]); + p[lo] = BB_MAKE(start, s - start, ack); + /* now low doesn't overlap, so.. */ + break; + } + lo--; + } + /* 'lo' is strictly before, 'hi' is strictly after, + * anything between needs to be discarded + */ + if (hi - lo > 1) { + memmove(p+lo+1, p+hi, (bb->count - hi) * 8); + bb->count -= (hi - lo - 1); + } + } + + bb->changed = 1; +out: + write_sequnlock_irq(&bb->lock); + return rv; +} + +int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors) +{ + return md_clear_badblocks(&rdev->badblocks, + s + rdev->data_offset, + sectors); +} +EXPORT_SYMBOL_GPL(rdev_clear_badblocks); + +/* + * Acknowledge all bad blocks in a list. + * This only succeeds if ->changed is clear. It is used by + * in-kernel metadata updates + */ +void md_ack_all_badblocks(struct badblocks *bb) +{ + if (bb->page == NULL || bb->changed) + /* no point even trying */ + return; + write_seqlock_irq(&bb->lock); + + if (bb->changed == 0) { + u64 *p = bb->page; + int i; + for (i = 0; i < bb->count ; i++) { + if (!BB_ACK(p[i])) { + sector_t start = BB_OFFSET(p[i]); + int len = BB_LEN(p[i]); + p[i] = BB_MAKE(start, len, 1); + } + } + bb->unacked_exist = 0; + } + write_sequnlock_irq(&bb->lock); +} +EXPORT_SYMBOL_GPL(md_ack_all_badblocks); + +/* sysfs access to bad-blocks list. + * We present two files. + * 'bad-blocks' lists sector numbers and lengths of ranges that + * are recorded as bad. The list is truncated to fit within + * the one-page limit of sysfs. + * Writing "sector length" to this file adds an acknowledged + * bad block list. + * 'unacknowledged-bad-blocks' lists bad blocks that have not yet + * been acknowledged. Writing to this file adds bad blocks + * without acknowledging them. This is largely for testing. + */ + +static ssize_t +badblocks_show(struct badblocks *bb, char *page, int unack) +{ + size_t len; + int i; + u64 *p = bb->page; + unsigned seq; + + if (bb->shift < 0) + return 0; + +retry: + seq = read_seqbegin(&bb->lock); + + len = 0; + i = 0; + + while (len < PAGE_SIZE && i < bb->count) { + sector_t s = BB_OFFSET(p[i]); + unsigned int length = BB_LEN(p[i]); + int ack = BB_ACK(p[i]); + i++; + + if (unack && ack) + continue; + + len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", + (unsigned long long)s << bb->shift, + length << bb->shift); + } + if (unack && len == 0) + bb->unacked_exist = 0; + + if (read_seqretry(&bb->lock, seq)) + goto retry; + + return len; +} + +#define DO_DEBUG 1 + +static ssize_t +badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack) +{ + unsigned long long sector; + int length; + char newline; +#ifdef DO_DEBUG + /* Allow clearing via sysfs *only* for testing/debugging. + * Normally only a successful write may clear a badblock + */ + int clear = 0; + if (page[0] == '-') { + clear = 1; + page++; + } +#endif /* DO_DEBUG */ + + switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { + case 3: + if (newline != '\n') + return -EINVAL; + case 2: + if (length <= 0) + return -EINVAL; + break; + default: + return -EINVAL; + } + +#ifdef DO_DEBUG + if (clear) { + md_clear_badblocks(bb, sector, length); + return len; + } +#endif /* DO_DEBUG */ + if (md_set_badblocks(bb, sector, length, !unack)) + return len; + else + return -ENOSPC; +} + static int md_notify_reboot(struct notifier_block *this, unsigned long code, void *x) { diff --git a/drivers/md/md.h b/drivers/md/md.h index 1c26c7a08ae6..1e586bb4452e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -29,6 +29,13 @@ typedef struct mddev_s mddev_t; typedef struct mdk_rdev_s mdk_rdev_t; +/* Bad block numbers are stored sorted in a single page. + * 64bits is used for each block or extent. + * 54 bits are sector number, 9 bits are extent size, + * 1 bit is an 'acknowledged' flag. + */ +#define MD_MAX_BADBLOCKS (PAGE_SIZE/8) + /* * MD's 'extended' device */ @@ -48,7 +55,7 @@ struct mdk_rdev_s struct block_device *meta_bdev; struct block_device *bdev; /* block device handle */ - struct page *sb_page; + struct page *sb_page, *bb_page; int sb_loaded; __u64 sb_events; sector_t data_offset; /* start of data in array */ @@ -74,9 +81,29 @@ struct mdk_rdev_s #define In_sync 2 /* device is in_sync with rest of array */ #define WriteMostly 4 /* Avoid reading if at all possible */ #define AutoDetected 7 /* added by auto-detect */ -#define Blocked 8 /* An error occurred on an externally - * managed array, don't allow writes +#define Blocked 8 /* An error occurred but has not yet + * been acknowledged by the metadata + * handler, so don't allow writes * until it is cleared */ +#define WriteErrorSeen 9 /* A write error has been seen on this + * device + */ +#define FaultRecorded 10 /* Intermediate state for clearing + * Blocked. The Fault is/will-be + * recorded in the metadata, but that + * metadata hasn't been stored safely + * on disk yet. + */ +#define BlockedBadBlocks 11 /* A writer is blocked because they + * found an unacknowledged bad-block. + * This can safely be cleared at any + * time, and the writer will re-check. + * It may be set at any time, and at + * worst the writer will timeout and + * re-check. So setting it as + * accurately as possible is good, but + * not absolutely critical. + */ wait_queue_head_t blocked_wait; int desc_nr; /* descriptor index in the superblock */ @@ -111,8 +138,54 @@ struct mdk_rdev_s struct sysfs_dirent *sysfs_state; /* handle for 'state' * sysfs entry */ + + struct badblocks { + int count; /* count of bad blocks */ + int unacked_exist; /* there probably are unacknowledged + * bad blocks. This is only cleared + * when a read discovers none + */ + int shift; /* shift from sectors to block size + * a -ve shift means badblocks are + * disabled.*/ + u64 *page; /* badblock list */ + int changed; + seqlock_t lock; + + sector_t sector; + sector_t size; /* in sectors */ + } badblocks; }; +#define BB_LEN_MASK (0x00000000000001FFULL) +#define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL) +#define BB_ACK_MASK (0x8000000000000000ULL) +#define BB_MAX_LEN 512 +#define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) +#define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) +#define BB_ACK(x) (!!((x) & BB_ACK_MASK)) +#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) + +extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors, + sector_t *first_bad, int *bad_sectors); +static inline int is_badblock(mdk_rdev_t *rdev, sector_t s, int sectors, + sector_t *first_bad, int *bad_sectors) +{ + if (unlikely(rdev->badblocks.count)) { + int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s, + sectors, + first_bad, bad_sectors); + if (rv) + *first_bad -= rdev->data_offset; + return rv; + } + return 0; +} +extern int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors, + int acknowledged); +extern int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors); +extern void md_ack_all_badblocks(struct badblocks *bb); + struct mddev_s { void *private; @@ -239,9 +312,12 @@ struct mddev_s #define MD_RECOVERY_FROZEN 9 unsigned long recovery; - int recovery_disabled; /* if we detect that recovery - * will always fail, set this - * so we don't loop trying */ + /* If a RAID personality determines that recovery (of a particular + * device) will fail due to a read error on the source device, it + * takes a copy of this number and does not attempt recovery again + * until this number changes. + */ + int recovery_disabled; int in_sync; /* know to not need resync */ /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so @@ -304,11 +380,6 @@ struct mddev_s * hot-adding a bitmap. It should * eventually be settable by sysfs. */ - /* When md is serving under dm, it might use a - * dirty_log to store the bits. - */ - struct dm_dirty_log *log; - struct mutex mutex; unsigned long chunksize; unsigned long daemon_sleep; /* how many jiffies between updates? */ @@ -413,6 +484,20 @@ static inline char * mdname (mddev_t * mddev) return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; } +static inline int sysfs_link_rdev(mddev_t *mddev, mdk_rdev_t *rdev) +{ + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); +} + +static inline void sysfs_unlink_rdev(mddev_t *mddev, mdk_rdev_t *rdev) +{ + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); +} + /* * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. @@ -505,7 +590,7 @@ extern void mddev_init(mddev_t *mddev); extern int md_run(mddev_t *mddev); extern void md_stop(mddev_t *mddev); extern void md_stop_writes(mddev_t *mddev); -extern void md_rdev_init(mdk_rdev_t *rdev); +extern int md_rdev_init(mdk_rdev_t *rdev); extern void mddev_suspend(mddev_t *mddev); extern void mddev_resume(mddev_t *mddev); @@ -514,4 +599,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, mddev_t *mddev); extern int mddev_check_plugged(mddev_t *mddev); +extern void md_trim_bio(struct bio *bio, int offset, int size); #endif /* _MD_MD_H */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f7431b6d8447..32323f0afd89 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -35,16 +35,13 @@ #include <linux/delay.h> #include <linux/blkdev.h> #include <linux/seq_file.h> +#include <linux/ratelimit.h> #include "md.h" #include "raid1.h" #include "bitmap.h" #define DEBUG 0 -#if DEBUG -#define PRINTK(x...) printk(x) -#else -#define PRINTK(x...) -#endif +#define PRINTK(x...) do { if (DEBUG) printk(x); } while (0) /* * Number of guaranteed r1bios in case of extreme VM load: @@ -166,7 +163,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) for (i = 0; i < conf->raid_disks; i++) { struct bio **bio = r1_bio->bios + i; - if (*bio && *bio != IO_BLOCKED) + if (!BIO_SPECIAL(*bio)) bio_put(*bio); *bio = NULL; } @@ -176,12 +173,6 @@ static void free_r1bio(r1bio_t *r1_bio) { conf_t *conf = r1_bio->mddev->private; - /* - * Wake up any possible resync thread that waits for the device - * to go idle. - */ - allow_barrier(conf); - put_all_bios(conf, r1_bio); mempool_free(r1_bio, conf->r1bio_pool); } @@ -222,6 +213,33 @@ static void reschedule_retry(r1bio_t *r1_bio) * operation and are ready to return a success/failure code to the buffer * cache layer. */ +static void call_bio_endio(r1bio_t *r1_bio) +{ + struct bio *bio = r1_bio->master_bio; + int done; + conf_t *conf = r1_bio->mddev->private; + + if (bio->bi_phys_segments) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); + bio->bi_phys_segments--; + done = (bio->bi_phys_segments == 0); + spin_unlock_irqrestore(&conf->device_lock, flags); + } else + done = 1; + + if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) + clear_bit(BIO_UPTODATE, &bio->bi_flags); + if (done) { + bio_endio(bio, 0); + /* + * Wake up any possible resync thread that waits for the device + * to go idle. + */ + allow_barrier(conf); + } +} + static void raid_end_bio_io(r1bio_t *r1_bio) { struct bio *bio = r1_bio->master_bio; @@ -234,8 +252,7 @@ static void raid_end_bio_io(r1bio_t *r1_bio) (unsigned long long) bio->bi_sector + (bio->bi_size >> 9) - 1); - bio_endio(bio, - test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO); + call_bio_endio(r1_bio); } free_r1bio(r1_bio); } @@ -287,36 +304,52 @@ static void raid1_end_read_request(struct bio *bio, int error) * oops, read error: */ char b[BDEVNAME_SIZE]; - if (printk_ratelimit()) - printk(KERN_ERR "md/raid1:%s: %s: rescheduling sector %llu\n", - mdname(conf->mddev), - bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector); + printk_ratelimited( + KERN_ERR "md/raid1:%s: %s: " + "rescheduling sector %llu\n", + mdname(conf->mddev), + bdevname(conf->mirrors[mirror].rdev->bdev, + b), + (unsigned long long)r1_bio->sector); + set_bit(R1BIO_ReadError, &r1_bio->state); reschedule_retry(r1_bio); } rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); } +static void close_write(r1bio_t *r1_bio) +{ + /* it really is the end of this request */ + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + /* free extra copy of the data pages */ + int i = r1_bio->behind_page_count; + while (i--) + safe_put_page(r1_bio->behind_bvecs[i].bv_page); + kfree(r1_bio->behind_bvecs); + r1_bio->behind_bvecs = NULL; + } + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, + r1_bio->sectors, + !test_bit(R1BIO_Degraded, &r1_bio->state), + test_bit(R1BIO_BehindIO, &r1_bio->state)); + md_write_end(r1_bio->mddev); +} + static void r1_bio_write_done(r1bio_t *r1_bio) { - if (atomic_dec_and_test(&r1_bio->remaining)) - { - /* it really is the end of this request */ - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { - /* free extra copy of the data pages */ - int i = r1_bio->behind_page_count; - while (i--) - safe_put_page(r1_bio->behind_pages[i]); - kfree(r1_bio->behind_pages); - r1_bio->behind_pages = NULL; - } - /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, - r1_bio->sectors, - !test_bit(R1BIO_Degraded, &r1_bio->state), - test_bit(R1BIO_BehindIO, &r1_bio->state)); - md_write_end(r1_bio->mddev); - raid_end_bio_io(r1_bio); + if (!atomic_dec_and_test(&r1_bio->remaining)) + return; + + if (test_bit(R1BIO_WriteError, &r1_bio->state)) + reschedule_retry(r1_bio); + else { + close_write(r1_bio); + if (test_bit(R1BIO_MadeGood, &r1_bio->state)) + reschedule_retry(r1_bio); + else + raid_end_bio_io(r1_bio); } } @@ -336,13 +369,11 @@ static void raid1_end_write_request(struct bio *bio, int error) /* * 'one mirror IO has finished' event handler: */ - r1_bio->bios[mirror] = NULL; - to_put = bio; if (!uptodate) { - md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); - /* an I/O failed, we can't clear the bitmap */ - set_bit(R1BIO_Degraded, &r1_bio->state); - } else + set_bit(WriteErrorSeen, + &conf->mirrors[mirror].rdev->flags); + set_bit(R1BIO_WriteError, &r1_bio->state); + } else { /* * Set R1BIO_Uptodate in our master bio, so that we * will return a good error code for to the higher @@ -353,8 +384,22 @@ static void raid1_end_write_request(struct bio *bio, int error) * to user-side. So if something waits for IO, then it * will wait for the 'master' bio. */ + sector_t first_bad; + int bad_sectors; + + r1_bio->bios[mirror] = NULL; + to_put = bio; set_bit(R1BIO_Uptodate, &r1_bio->state); + /* Maybe we can clear some bad blocks. */ + if (is_badblock(conf->mirrors[mirror].rdev, + r1_bio->sector, r1_bio->sectors, + &first_bad, &bad_sectors)) { + r1_bio->bios[mirror] = IO_MADE_GOOD; + set_bit(R1BIO_MadeGood, &r1_bio->state); + } + } + update_head_pos(mirror, r1_bio); if (behind) { @@ -377,11 +422,13 @@ static void raid1_end_write_request(struct bio *bio, int error) (unsigned long long) mbio->bi_sector, (unsigned long long) mbio->bi_sector + (mbio->bi_size >> 9) - 1); - bio_endio(mbio, 0); + call_bio_endio(r1_bio); } } } - rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); + if (r1_bio->bios[mirror] == NULL) + rdev_dec_pending(conf->mirrors[mirror].rdev, + conf->mddev); /* * Let's see if all mirrored write operations have finished @@ -408,10 +455,11 @@ static void raid1_end_write_request(struct bio *bio, int error) * * The rdev for the device selected will have nr_pending incremented. */ -static int read_balance(conf_t *conf, r1bio_t *r1_bio) +static int read_balance(conf_t *conf, r1bio_t *r1_bio, int *max_sectors) { const sector_t this_sector = r1_bio->sector; - const int sectors = r1_bio->sectors; + int sectors; + int best_good_sectors; int start_disk; int best_disk; int i; @@ -426,8 +474,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) * We take the first readable disk when above the resync window. */ retry: + sectors = r1_bio->sectors; best_disk = -1; best_dist = MaxSector; + best_good_sectors = 0; + if (conf->mddev->recovery_cp < MaxSector && (this_sector + sectors >= conf->next_resync)) { choose_first = 1; @@ -439,6 +490,9 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) for (i = 0 ; i < conf->raid_disks ; i++) { sector_t dist; + sector_t first_bad; + int bad_sectors; + int disk = start_disk + i; if (disk >= conf->raid_disks) disk -= conf->raid_disks; @@ -461,6 +515,35 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) /* This is a reasonable device to use. It might * even be best. */ + if (is_badblock(rdev, this_sector, sectors, + &first_bad, &bad_sectors)) { + if (best_dist < MaxSector) + /* already have a better device */ + continue; + if (first_bad <= this_sector) { + /* cannot read here. If this is the 'primary' + * device, then we must not read beyond + * bad_sectors from another device.. + */ + bad_sectors -= (this_sector - first_bad); + if (choose_first && sectors > bad_sectors) + sectors = bad_sectors; + if (best_good_sectors > sectors) + best_good_sectors = sectors; + + } else { + sector_t good_sectors = first_bad - this_sector; + if (good_sectors > best_good_sectors) { + best_good_sectors = good_sectors; + best_disk = disk; + } + if (choose_first) + break; + } + continue; + } else + best_good_sectors = sectors; + dist = abs(this_sector - conf->mirrors[disk].head_position); if (choose_first /* Don't change to another disk for sequential reads */ @@ -489,10 +572,12 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) rdev_dec_pending(rdev, conf->mddev); goto retry; } + sectors = best_good_sectors; conf->next_seq_sect = this_sector + sectors; conf->last_used = best_disk; } rcu_read_unlock(); + *max_sectors = sectors; return best_disk; } @@ -672,30 +757,31 @@ static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio) { int i; struct bio_vec *bvec; - struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*), + struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), GFP_NOIO); - if (unlikely(!pages)) + if (unlikely(!bvecs)) return; bio_for_each_segment(bvec, bio, i) { - pages[i] = alloc_page(GFP_NOIO); - if (unlikely(!pages[i])) + bvecs[i] = *bvec; + bvecs[i].bv_page = alloc_page(GFP_NOIO); + if (unlikely(!bvecs[i].bv_page)) goto do_sync_io; - memcpy(kmap(pages[i]) + bvec->bv_offset, - kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); - kunmap(pages[i]); + memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset, + kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); + kunmap(bvecs[i].bv_page); kunmap(bvec->bv_page); } - r1_bio->behind_pages = pages; + r1_bio->behind_bvecs = bvecs; r1_bio->behind_page_count = bio->bi_vcnt; set_bit(R1BIO_BehindIO, &r1_bio->state); return; do_sync_io: for (i = 0; i < bio->bi_vcnt; i++) - if (pages[i]) - put_page(pages[i]); - kfree(pages); + if (bvecs[i].bv_page) + put_page(bvecs[i].bv_page); + kfree(bvecs); PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); } @@ -705,7 +791,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) mirror_info_t *mirror; r1bio_t *r1_bio; struct bio *read_bio; - int i, targets = 0, disks; + int i, disks; struct bitmap *bitmap; unsigned long flags; const int rw = bio_data_dir(bio); @@ -713,6 +799,9 @@ static int make_request(mddev_t *mddev, struct bio * bio) const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); mdk_rdev_t *blocked_rdev; int plugged; + int first_clone; + int sectors_handled; + int max_sectors; /* * Register the new request and wait if the reconstruction @@ -759,11 +848,24 @@ static int make_request(mddev_t *mddev, struct bio * bio) r1_bio->mddev = mddev; r1_bio->sector = bio->bi_sector; + /* We might need to issue multiple reads to different + * devices if there are bad blocks around, so we keep + * track of the number of reads in bio->bi_phys_segments. + * If this is 0, there is only one r1_bio and no locking + * will be needed when requests complete. If it is + * non-zero, then it is the number of not-completed requests. + */ + bio->bi_phys_segments = 0; + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + if (rw == READ) { /* * read balancing logic: */ - int rdisk = read_balance(conf, r1_bio); + int rdisk; + +read_again: + rdisk = read_balance(conf, r1_bio, &max_sectors); if (rdisk < 0) { /* couldn't find anywhere to read from */ @@ -784,6 +886,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) r1_bio->read_disk = rdisk; read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); + md_trim_bio(read_bio, r1_bio->sector - bio->bi_sector, + max_sectors); r1_bio->bios[rdisk] = read_bio; @@ -793,16 +897,52 @@ static int make_request(mddev_t *mddev, struct bio * bio) read_bio->bi_rw = READ | do_sync; read_bio->bi_private = r1_bio; - generic_make_request(read_bio); + if (max_sectors < r1_bio->sectors) { + /* could not read all from this device, so we will + * need another r1_bio. + */ + + sectors_handled = (r1_bio->sector + max_sectors + - bio->bi_sector); + r1_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + /* Cannot call generic_make_request directly + * as that will be queued in __make_request + * and subsequent mempool_alloc might block waiting + * for it. So hand bio over to raid1d. + */ + reschedule_retry(r1_bio); + + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + + r1_bio->master_bio = bio; + r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; + r1_bio->state = 0; + r1_bio->mddev = mddev; + r1_bio->sector = bio->bi_sector + sectors_handled; + goto read_again; + } else + generic_make_request(read_bio); return 0; } /* * WRITE: */ - /* first select target devices under spinlock and + /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio + * If there are known/acknowledged bad blocks on any device on + * which we have seen a write error, we want to avoid writing those + * blocks. + * This potentially requires several writes to write around + * the bad blocks. Each set of writes gets it's own r1bio + * with a set of bios attached. */ plugged = mddev_check_plugged(mddev); @@ -810,6 +950,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) retry_write: blocked_rdev = NULL; rcu_read_lock(); + max_sectors = r1_bio->sectors; for (i = 0; i < disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { @@ -817,17 +958,56 @@ static int make_request(mddev_t *mddev, struct bio * bio) blocked_rdev = rdev; break; } - if (rdev && !test_bit(Faulty, &rdev->flags)) { - atomic_inc(&rdev->nr_pending); - if (test_bit(Faulty, &rdev->flags)) { + r1_bio->bios[i] = NULL; + if (!rdev || test_bit(Faulty, &rdev->flags)) { + set_bit(R1BIO_Degraded, &r1_bio->state); + continue; + } + + atomic_inc(&rdev->nr_pending); + if (test_bit(WriteErrorSeen, &rdev->flags)) { + sector_t first_bad; + int bad_sectors; + int is_bad; + + is_bad = is_badblock(rdev, r1_bio->sector, + max_sectors, + &first_bad, &bad_sectors); + if (is_bad < 0) { + /* mustn't write here until the bad block is + * acknowledged*/ + set_bit(BlockedBadBlocks, &rdev->flags); + blocked_rdev = rdev; + break; + } + if (is_bad && first_bad <= r1_bio->sector) { + /* Cannot write here at all */ + bad_sectors -= (r1_bio->sector - first_bad); + if (bad_sectors < max_sectors) + /* mustn't write more than bad_sectors + * to other devices yet + */ + max_sectors = bad_sectors; rdev_dec_pending(rdev, mddev); - r1_bio->bios[i] = NULL; - } else { - r1_bio->bios[i] = bio; - targets++; + /* We don't set R1BIO_Degraded as that + * only applies if the disk is + * missing, so it might be re-added, + * and we want to know to recover this + * chunk. + * In this case the device is here, + * and the fact that this chunk is not + * in-sync is recorded in the bad + * block log + */ + continue; } - } else - r1_bio->bios[i] = NULL; + if (is_bad) { + int good_sectors = first_bad - r1_bio->sector; + if (good_sectors < max_sectors) + max_sectors = good_sectors; + } + } + r1_bio->bios[i] = bio; } rcu_read_unlock(); @@ -838,51 +1018,57 @@ static int make_request(mddev_t *mddev, struct bio * bio) for (j = 0; j < i; j++) if (r1_bio->bios[j]) rdev_dec_pending(conf->mirrors[j].rdev, mddev); - + r1_bio->state = 0; allow_barrier(conf); md_wait_for_blocked_rdev(blocked_rdev, mddev); wait_barrier(conf); goto retry_write; } - BUG_ON(targets == 0); /* we never fail the last device */ - - if (targets < conf->raid_disks) { - /* array is degraded, we will not clear the bitmap - * on I/O completion (see raid1_end_write_request) */ - set_bit(R1BIO_Degraded, &r1_bio->state); + if (max_sectors < r1_bio->sectors) { + /* We are splitting this write into multiple parts, so + * we need to prepare for allocating another r1_bio. + */ + r1_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); } - - /* do behind I/O ? - * Not if there are too many, or cannot allocate memory, - * or a reader on WriteMostly is waiting for behind writes - * to flush */ - if (bitmap && - (atomic_read(&bitmap->behind_writes) - < mddev->bitmap_info.max_write_behind) && - !waitqueue_active(&bitmap->behind_wait)) - alloc_behind_pages(bio, r1_bio); + sectors_handled = r1_bio->sector + max_sectors - bio->bi_sector; atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->behind_remaining, 0); - bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors, - test_bit(R1BIO_BehindIO, &r1_bio->state)); + first_clone = 1; for (i = 0; i < disks; i++) { struct bio *mbio; if (!r1_bio->bios[i]) continue; mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); - r1_bio->bios[i] = mbio; - - mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; - mbio->bi_bdev = conf->mirrors[i].rdev->bdev; - mbio->bi_end_io = raid1_end_write_request; - mbio->bi_rw = WRITE | do_flush_fua | do_sync; - mbio->bi_private = r1_bio; - - if (r1_bio->behind_pages) { + md_trim_bio(mbio, r1_bio->sector - bio->bi_sector, max_sectors); + + if (first_clone) { + /* do behind I/O ? + * Not if there are too many, or cannot + * allocate memory, or a reader on WriteMostly + * is waiting for behind writes to flush */ + if (bitmap && + (atomic_read(&bitmap->behind_writes) + < mddev->bitmap_info.max_write_behind) && + !waitqueue_active(&bitmap->behind_wait)) + alloc_behind_pages(mbio, r1_bio); + + bitmap_startwrite(bitmap, r1_bio->sector, + r1_bio->sectors, + test_bit(R1BIO_BehindIO, + &r1_bio->state)); + first_clone = 0; + } + if (r1_bio->behind_bvecs) { struct bio_vec *bvec; int j; @@ -894,11 +1080,20 @@ static int make_request(mddev_t *mddev, struct bio * bio) * them all */ __bio_for_each_segment(bvec, mbio, j, 0) - bvec->bv_page = r1_bio->behind_pages[j]; + bvec->bv_page = r1_bio->behind_bvecs[j].bv_page; if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) atomic_inc(&r1_bio->behind_remaining); } + r1_bio->bios[i] = mbio; + + mbio->bi_sector = (r1_bio->sector + + conf->mirrors[i].rdev->data_offset); + mbio->bi_bdev = conf->mirrors[i].rdev->bdev; + mbio->bi_end_io = raid1_end_write_request; + mbio->bi_rw = WRITE | do_flush_fua | do_sync; + mbio->bi_private = r1_bio; + atomic_inc(&r1_bio->remaining); spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); @@ -909,6 +1104,19 @@ static int make_request(mddev_t *mddev, struct bio * bio) /* In case raid1d snuck in to freeze_array */ wake_up(&conf->wait_barrier); + if (sectors_handled < (bio->bi_size >> 9)) { + /* We need another r1_bio. It has already been counted + * in bio->bi_phys_segments + */ + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + r1_bio->master_bio = bio; + r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; + r1_bio->state = 0; + r1_bio->mddev = mddev; + r1_bio->sector = bio->bi_sector + sectors_handled; + goto retry_write; + } + if (do_sync || !bitmap || !plugged) md_wakeup_thread(mddev->thread); @@ -952,9 +1160,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) * However don't try a recovery from this drive as * it is very likely to fail. */ - mddev->recovery_disabled = 1; + conf->recovery_disabled = mddev->recovery_disabled; return; } + set_bit(Blocked, &rdev->flags); if (test_and_clear_bit(In_sync, &rdev->flags)) { unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); @@ -1027,7 +1236,7 @@ static int raid1_spare_active(mddev_t *mddev) && !test_bit(Faulty, &rdev->flags) && !test_and_set_bit(In_sync, &rdev->flags)) { count++; - sysfs_notify_dirent(rdev->sysfs_state); + sysfs_notify_dirent_safe(rdev->sysfs_state); } } spin_lock_irqsave(&conf->device_lock, flags); @@ -1048,6 +1257,9 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) int first = 0; int last = mddev->raid_disks - 1; + if (mddev->recovery_disabled == conf->recovery_disabled) + return -EBUSY; + if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; @@ -1103,7 +1315,7 @@ static int raid1_remove_disk(mddev_t *mddev, int number) * is not possible. */ if (!test_bit(Faulty, &rdev->flags) && - !mddev->recovery_disabled && + mddev->recovery_disabled != conf->recovery_disabled && mddev->degraded < conf->raid_disks) { err = -EBUSY; goto abort; @@ -1155,6 +1367,8 @@ static void end_sync_write(struct bio *bio, int error) conf_t *conf = mddev->private; int i; int mirror=0; + sector_t first_bad; + int bad_sectors; for (i = 0; i < conf->raid_disks; i++) if (r1_bio->bios[i] == bio) { @@ -1172,18 +1386,48 @@ static void end_sync_write(struct bio *bio, int error) s += sync_blocks; sectors_to_go -= sync_blocks; } while (sectors_to_go > 0); - md_error(mddev, conf->mirrors[mirror].rdev); - } + set_bit(WriteErrorSeen, + &conf->mirrors[mirror].rdev->flags); + set_bit(R1BIO_WriteError, &r1_bio->state); + } else if (is_badblock(conf->mirrors[mirror].rdev, + r1_bio->sector, + r1_bio->sectors, + &first_bad, &bad_sectors) && + !is_badblock(conf->mirrors[r1_bio->read_disk].rdev, + r1_bio->sector, + r1_bio->sectors, + &first_bad, &bad_sectors) + ) + set_bit(R1BIO_MadeGood, &r1_bio->state); update_head_pos(mirror, r1_bio); if (atomic_dec_and_test(&r1_bio->remaining)) { - sector_t s = r1_bio->sectors; - put_buf(r1_bio); - md_done_sync(mddev, s, uptodate); + int s = r1_bio->sectors; + if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + reschedule_retry(r1_bio); + else { + put_buf(r1_bio); + md_done_sync(mddev, s, uptodate); + } } } +static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector, + int sectors, struct page *page, int rw) +{ + if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) + /* success */ + return 1; + if (rw == WRITE) + set_bit(WriteErrorSeen, &rdev->flags); + /* need to record an error - either for the block or the device */ + if (!rdev_set_badblocks(rdev, sector, sectors, 0)) + md_error(rdev->mddev, rdev); + return 0; +} + static int fix_sync_read_error(r1bio_t *r1_bio) { /* Try some synchronous reads of other devices to get @@ -1193,6 +1437,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio) * We don't need to freeze the array, because being in an * active sync request, there is no normal IO, and * no overlapping syncs. + * We don't need to check is_badblock() again as we + * made sure that anything with a bad block in range + * will have bi_end_io clear. */ mddev_t *mddev = r1_bio->mddev; conf_t *conf = mddev->private; @@ -1217,9 +1464,7 @@ static int fix_sync_read_error(r1bio_t *r1_bio) * active, and resync is currently active */ rdev = conf->mirrors[d].rdev; - if (sync_page_io(rdev, - sect, - s<<9, + if (sync_page_io(rdev, sect, s<<9, bio->bi_io_vec[idx].bv_page, READ, false)) { success = 1; @@ -1233,16 +1478,36 @@ static int fix_sync_read_error(r1bio_t *r1_bio) if (!success) { char b[BDEVNAME_SIZE]; - /* Cannot read from anywhere, array is toast */ - md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); + int abort = 0; + /* Cannot read from anywhere, this block is lost. + * Record a bad block on each device. If that doesn't + * work just disable and interrupt the recovery. + * Don't fail devices as that won't really help. + */ printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" " for block %llu\n", mdname(mddev), bdevname(bio->bi_bdev, b), (unsigned long long)r1_bio->sector); - md_done_sync(mddev, r1_bio->sectors, 0); - put_buf(r1_bio); - return 0; + for (d = 0; d < conf->raid_disks; d++) { + rdev = conf->mirrors[d].rdev; + if (!rdev || test_bit(Faulty, &rdev->flags)) + continue; + if (!rdev_set_badblocks(rdev, sect, s, 0)) + abort = 1; + } + if (abort) { + mddev->recovery_disabled = 1; + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + md_done_sync(mddev, r1_bio->sectors, 0); + put_buf(r1_bio); + return 0; + } + /* Try next page */ + sectors -= s; + sect += s; + idx++; + continue; } start = d; @@ -1254,16 +1519,12 @@ static int fix_sync_read_error(r1bio_t *r1_bio) if (r1_bio->bios[d]->bi_end_io != end_sync_read) continue; rdev = conf->mirrors[d].rdev; - if (sync_page_io(rdev, - sect, - s<<9, - bio->bi_io_vec[idx].bv_page, - WRITE, false) == 0) { + if (r1_sync_page_io(rdev, sect, s, + bio->bi_io_vec[idx].bv_page, + WRITE) == 0) { r1_bio->bios[d]->bi_end_io = NULL; rdev_dec_pending(rdev, mddev); - md_error(mddev, rdev); - } else - atomic_add(s, &rdev->corrected_errors); + } } d = start; while (d != r1_bio->read_disk) { @@ -1273,12 +1534,10 @@ static int fix_sync_read_error(r1bio_t *r1_bio) if (r1_bio->bios[d]->bi_end_io != end_sync_read) continue; rdev = conf->mirrors[d].rdev; - if (sync_page_io(rdev, - sect, - s<<9, - bio->bi_io_vec[idx].bv_page, - READ, false) == 0) - md_error(mddev, rdev); + if (r1_sync_page_io(rdev, sect, s, + bio->bi_io_vec[idx].bv_page, + READ) != 0) + atomic_add(s, &rdev->corrected_errors); } sectors -= s; sect += s; @@ -1420,7 +1679,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) * * 1. Retries failed read operations on working mirrors. * 2. Updates the raid superblock when problems encounter. - * 3. Performs writes following reads for array syncronising. + * 3. Performs writes following reads for array synchronising. */ static void fix_read_error(conf_t *conf, int read_disk, @@ -1443,9 +1702,14 @@ static void fix_read_error(conf_t *conf, int read_disk, * which is the thread that might remove * a device. If raid1d ever becomes multi-threaded.... */ + sector_t first_bad; + int bad_sectors; + rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags) && + is_badblock(rdev, sect, s, + &first_bad, &bad_sectors) == 0 && sync_page_io(rdev, sect, s<<9, conf->tmppage, READ, false)) success = 1; @@ -1457,8 +1721,10 @@ static void fix_read_error(conf_t *conf, int read_disk, } while (!success && d != read_disk); if (!success) { - /* Cannot read from anywhere -- bye bye array */ - md_error(mddev, conf->mirrors[read_disk].rdev); + /* Cannot read from anywhere - mark it bad */ + mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev; + if (!rdev_set_badblocks(rdev, sect, s, 0)) + md_error(mddev, rdev); break; } /* write it back and re-read */ @@ -1469,13 +1735,9 @@ static void fix_read_error(conf_t *conf, int read_disk, d--; rdev = conf->mirrors[d].rdev; if (rdev && - test_bit(In_sync, &rdev->flags)) { - if (sync_page_io(rdev, sect, s<<9, - conf->tmppage, WRITE, false) - == 0) - /* Well, this device is dead */ - md_error(mddev, rdev); - } + test_bit(In_sync, &rdev->flags)) + r1_sync_page_io(rdev, sect, s, + conf->tmppage, WRITE); } d = start; while (d != read_disk) { @@ -1486,12 +1748,8 @@ static void fix_read_error(conf_t *conf, int read_disk, rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags)) { - if (sync_page_io(rdev, sect, s<<9, - conf->tmppage, READ, false) - == 0) - /* Well, this device is dead */ - md_error(mddev, rdev); - else { + if (r1_sync_page_io(rdev, sect, s, + conf->tmppage, READ)) { atomic_add(s, &rdev->corrected_errors); printk(KERN_INFO "md/raid1:%s: read error corrected " @@ -1508,21 +1766,255 @@ static void fix_read_error(conf_t *conf, int read_disk, } } +static void bi_complete(struct bio *bio, int error) +{ + complete((struct completion *)bio->bi_private); +} + +static int submit_bio_wait(int rw, struct bio *bio) +{ + struct completion event; + rw |= REQ_SYNC; + + init_completion(&event); + bio->bi_private = &event; + bio->bi_end_io = bi_complete; + submit_bio(rw, bio); + wait_for_completion(&event); + + return test_bit(BIO_UPTODATE, &bio->bi_flags); +} + +static int narrow_write_error(r1bio_t *r1_bio, int i) +{ + mddev_t *mddev = r1_bio->mddev; + conf_t *conf = mddev->private; + mdk_rdev_t *rdev = conf->mirrors[i].rdev; + int vcnt, idx; + struct bio_vec *vec; + + /* bio has the data to be written to device 'i' where + * we just recently had a write error. + * We repeatedly clone the bio and trim down to one block, + * then try the write. Where the write fails we record + * a bad block. + * It is conceivable that the bio doesn't exactly align with + * blocks. We must handle this somehow. + * + * We currently own a reference on the rdev. + */ + + int block_sectors; + sector_t sector; + int sectors; + int sect_to_write = r1_bio->sectors; + int ok = 1; + + if (rdev->badblocks.shift < 0) + return 0; + + block_sectors = 1 << rdev->badblocks.shift; + sector = r1_bio->sector; + sectors = ((sector + block_sectors) + & ~(sector_t)(block_sectors - 1)) + - sector; + + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + vcnt = r1_bio->behind_page_count; + vec = r1_bio->behind_bvecs; + idx = 0; + while (vec[idx].bv_page == NULL) + idx++; + } else { + vcnt = r1_bio->master_bio->bi_vcnt; + vec = r1_bio->master_bio->bi_io_vec; + idx = r1_bio->master_bio->bi_idx; + } + while (sect_to_write) { + struct bio *wbio; + if (sectors > sect_to_write) + sectors = sect_to_write; + /* Write at 'sector' for 'sectors'*/ + + wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); + memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); + wbio->bi_sector = r1_bio->sector; + wbio->bi_rw = WRITE; + wbio->bi_vcnt = vcnt; + wbio->bi_size = r1_bio->sectors << 9; + wbio->bi_idx = idx; + + md_trim_bio(wbio, sector - r1_bio->sector, sectors); + wbio->bi_sector += rdev->data_offset; + wbio->bi_bdev = rdev->bdev; + if (submit_bio_wait(WRITE, wbio) == 0) + /* failure! */ + ok = rdev_set_badblocks(rdev, sector, + sectors, 0) + && ok; + + bio_put(wbio); + sect_to_write -= sectors; + sector += sectors; + sectors = block_sectors; + } + return ok; +} + +static void handle_sync_write_finished(conf_t *conf, r1bio_t *r1_bio) +{ + int m; + int s = r1_bio->sectors; + for (m = 0; m < conf->raid_disks ; m++) { + mdk_rdev_t *rdev = conf->mirrors[m].rdev; + struct bio *bio = r1_bio->bios[m]; + if (bio->bi_end_io == NULL) + continue; + if (test_bit(BIO_UPTODATE, &bio->bi_flags) && + test_bit(R1BIO_MadeGood, &r1_bio->state)) { + rdev_clear_badblocks(rdev, r1_bio->sector, s); + } + if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && + test_bit(R1BIO_WriteError, &r1_bio->state)) { + if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0)) + md_error(conf->mddev, rdev); + } + } + put_buf(r1_bio); + md_done_sync(conf->mddev, s, 1); +} + +static void handle_write_finished(conf_t *conf, r1bio_t *r1_bio) +{ + int m; + for (m = 0; m < conf->raid_disks ; m++) + if (r1_bio->bios[m] == IO_MADE_GOOD) { + mdk_rdev_t *rdev = conf->mirrors[m].rdev; + rdev_clear_badblocks(rdev, + r1_bio->sector, + r1_bio->sectors); + rdev_dec_pending(rdev, conf->mddev); + } else if (r1_bio->bios[m] != NULL) { + /* This drive got a write error. We need to + * narrow down and record precise write + * errors. + */ + if (!narrow_write_error(r1_bio, m)) { + md_error(conf->mddev, + conf->mirrors[m].rdev); + /* an I/O failed, we can't clear the bitmap */ + set_bit(R1BIO_Degraded, &r1_bio->state); + } + rdev_dec_pending(conf->mirrors[m].rdev, + conf->mddev); + } + if (test_bit(R1BIO_WriteError, &r1_bio->state)) + close_write(r1_bio); + raid_end_bio_io(r1_bio); +} + +static void handle_read_error(conf_t *conf, r1bio_t *r1_bio) +{ + int disk; + int max_sectors; + mddev_t *mddev = conf->mddev; + struct bio *bio; + char b[BDEVNAME_SIZE]; + mdk_rdev_t *rdev; + + clear_bit(R1BIO_ReadError, &r1_bio->state); + /* we got a read error. Maybe the drive is bad. Maybe just + * the block and we can fix it. + * We freeze all other IO, and try reading the block from + * other devices. When we find one, we re-write + * and check it that fixes the read error. + * This is all done synchronously while the array is + * frozen + */ + if (mddev->ro == 0) { + freeze_array(conf); + fix_read_error(conf, r1_bio->read_disk, + r1_bio->sector, r1_bio->sectors); + unfreeze_array(conf); + } else + md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); + + bio = r1_bio->bios[r1_bio->read_disk]; + bdevname(bio->bi_bdev, b); +read_more: + disk = read_balance(conf, r1_bio, &max_sectors); + if (disk == -1) { + printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O" + " read error for block %llu\n", + mdname(mddev), b, (unsigned long long)r1_bio->sector); + raid_end_bio_io(r1_bio); + } else { + const unsigned long do_sync + = r1_bio->master_bio->bi_rw & REQ_SYNC; + if (bio) { + r1_bio->bios[r1_bio->read_disk] = + mddev->ro ? IO_BLOCKED : NULL; + bio_put(bio); + } + r1_bio->read_disk = disk; + bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev); + md_trim_bio(bio, r1_bio->sector - bio->bi_sector, max_sectors); + r1_bio->bios[r1_bio->read_disk] = bio; + rdev = conf->mirrors[disk].rdev; + printk_ratelimited(KERN_ERR + "md/raid1:%s: redirecting sector %llu" + " to other mirror: %s\n", + mdname(mddev), + (unsigned long long)r1_bio->sector, + bdevname(rdev->bdev, b)); + bio->bi_sector = r1_bio->sector + rdev->data_offset; + bio->bi_bdev = rdev->bdev; + bio->bi_end_io = raid1_end_read_request; + bio->bi_rw = READ | do_sync; + bio->bi_private = r1_bio; + if (max_sectors < r1_bio->sectors) { + /* Drat - have to split this up more */ + struct bio *mbio = r1_bio->master_bio; + int sectors_handled = (r1_bio->sector + max_sectors + - mbio->bi_sector); + r1_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (mbio->bi_phys_segments == 0) + mbio->bi_phys_segments = 2; + else + mbio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + generic_make_request(bio); + bio = NULL; + + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + + r1_bio->master_bio = mbio; + r1_bio->sectors = (mbio->bi_size >> 9) + - sectors_handled; + r1_bio->state = 0; + set_bit(R1BIO_ReadError, &r1_bio->state); + r1_bio->mddev = mddev; + r1_bio->sector = mbio->bi_sector + sectors_handled; + + goto read_more; + } else + generic_make_request(bio); + } +} + static void raid1d(mddev_t *mddev) { r1bio_t *r1_bio; - struct bio *bio; unsigned long flags; conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; - mdk_rdev_t *rdev; struct blk_plug plug; md_check_recovery(mddev); blk_start_plug(&plug); for (;;) { - char b[BDEVNAME_SIZE]; if (atomic_read(&mddev->plug_cnt) == 0) flush_pending_writes(conf); @@ -1539,62 +2031,26 @@ static void raid1d(mddev_t *mddev) mddev = r1_bio->mddev; conf = mddev->private; - if (test_bit(R1BIO_IsSync, &r1_bio->state)) - sync_request_write(mddev, r1_bio); - else { - int disk; - - /* we got a read error. Maybe the drive is bad. Maybe just - * the block and we can fix it. - * We freeze all other IO, and try reading the block from - * other devices. When we find one, we re-write - * and check it that fixes the read error. - * This is all done synchronously while the array is - * frozen + if (test_bit(R1BIO_IsSync, &r1_bio->state)) { + if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + handle_sync_write_finished(conf, r1_bio); + else + sync_request_write(mddev, r1_bio); + } else if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + handle_write_finished(conf, r1_bio); + else if (test_bit(R1BIO_ReadError, &r1_bio->state)) + handle_read_error(conf, r1_bio); + else + /* just a partial read to be scheduled from separate + * context */ - if (mddev->ro == 0) { - freeze_array(conf); - fix_read_error(conf, r1_bio->read_disk, - r1_bio->sector, - r1_bio->sectors); - unfreeze_array(conf); - } else - md_error(mddev, - conf->mirrors[r1_bio->read_disk].rdev); - - bio = r1_bio->bios[r1_bio->read_disk]; - if ((disk=read_balance(conf, r1_bio)) == -1) { - printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O" - " read error for block %llu\n", - mdname(mddev), - bdevname(bio->bi_bdev,b), - (unsigned long long)r1_bio->sector); - raid_end_bio_io(r1_bio); - } else { - const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; - r1_bio->bios[r1_bio->read_disk] = - mddev->ro ? IO_BLOCKED : NULL; - r1_bio->read_disk = disk; - bio_put(bio); - bio = bio_clone_mddev(r1_bio->master_bio, - GFP_NOIO, mddev); - r1_bio->bios[r1_bio->read_disk] = bio; - rdev = conf->mirrors[disk].rdev; - if (printk_ratelimit()) - printk(KERN_ERR "md/raid1:%s: redirecting sector %llu to" - " other mirror: %s\n", - mdname(mddev), - (unsigned long long)r1_bio->sector, - bdevname(rdev->bdev,b)); - bio->bi_sector = r1_bio->sector + rdev->data_offset; - bio->bi_bdev = rdev->bdev; - bio->bi_end_io = raid1_end_read_request; - bio->bi_rw = READ | do_sync; - bio->bi_private = r1_bio; - generic_make_request(bio); - } - } + generic_make_request(r1_bio->bios[r1_bio->read_disk]); + cond_resched(); + if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) + md_check_recovery(mddev); } blk_finish_plug(&plug); } @@ -1636,6 +2092,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int write_targets = 0, read_targets = 0; sector_t sync_blocks; int still_degraded = 0; + int good_sectors = RESYNC_SECTORS; + int min_bad = 0; /* number of sectors that are bad in all devices */ if (!conf->r1buf_pool) if (init_resync(conf)) @@ -1723,36 +2181,89 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev == NULL || - test_bit(Faulty, &rdev->flags)) { + test_bit(Faulty, &rdev->flags)) { still_degraded = 1; - continue; } else if (!test_bit(In_sync, &rdev->flags)) { bio->bi_rw = WRITE; bio->bi_end_io = end_sync_write; write_targets ++; } else { /* may need to read from here */ - bio->bi_rw = READ; - bio->bi_end_io = end_sync_read; - if (test_bit(WriteMostly, &rdev->flags)) { - if (wonly < 0) - wonly = i; - } else { - if (disk < 0) - disk = i; + sector_t first_bad = MaxSector; + int bad_sectors; + + if (is_badblock(rdev, sector_nr, good_sectors, + &first_bad, &bad_sectors)) { + if (first_bad > sector_nr) + good_sectors = first_bad - sector_nr; + else { + bad_sectors -= (sector_nr - first_bad); + if (min_bad == 0 || + min_bad > bad_sectors) + min_bad = bad_sectors; + } + } + if (sector_nr < first_bad) { + if (test_bit(WriteMostly, &rdev->flags)) { + if (wonly < 0) + wonly = i; + } else { + if (disk < 0) + disk = i; + } + bio->bi_rw = READ; + bio->bi_end_io = end_sync_read; + read_targets++; } - read_targets++; } - atomic_inc(&rdev->nr_pending); - bio->bi_sector = sector_nr + rdev->data_offset; - bio->bi_bdev = rdev->bdev; - bio->bi_private = r1_bio; + if (bio->bi_end_io) { + atomic_inc(&rdev->nr_pending); + bio->bi_sector = sector_nr + rdev->data_offset; + bio->bi_bdev = rdev->bdev; + bio->bi_private = r1_bio; + } } rcu_read_unlock(); if (disk < 0) disk = wonly; r1_bio->read_disk = disk; + if (read_targets == 0 && min_bad > 0) { + /* These sectors are bad on all InSync devices, so we + * need to mark them bad on all write targets + */ + int ok = 1; + for (i = 0 ; i < conf->raid_disks ; i++) + if (r1_bio->bios[i]->bi_end_io == end_sync_write) { + mdk_rdev_t *rdev = + rcu_dereference(conf->mirrors[i].rdev); + ok = rdev_set_badblocks(rdev, sector_nr, + min_bad, 0 + ) && ok; + } + set_bit(MD_CHANGE_DEVS, &mddev->flags); + *skipped = 1; + put_buf(r1_bio); + + if (!ok) { + /* Cannot record the badblocks, so need to + * abort the resync. + * If there are multiple read targets, could just + * fail the really bad ones ??? + */ + conf->recovery_disabled = mddev->recovery_disabled; + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + return 0; + } else + return min_bad; + + } + if (min_bad > 0 && min_bad < good_sectors) { + /* only resync enough to reach the next bad->good + * transition */ + good_sectors = min_bad; + } + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0) /* extra read targets are also write targets */ write_targets += read_targets-1; @@ -1769,6 +2280,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (max_sector > mddev->resync_max) max_sector = mddev->resync_max; /* Don't do IO beyond here */ + if (max_sector > sector_nr + good_sectors) + max_sector = sector_nr + good_sectors; nr_sectors = 0; sync_blocks = 0; do { @@ -2154,18 +2667,13 @@ static int raid1_reshape(mddev_t *mddev) for (d = d2 = 0; d < conf->raid_disks; d++) { mdk_rdev_t *rdev = conf->mirrors[d].rdev; if (rdev && rdev->raid_disk != d2) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = d2; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); - if (sysfs_create_link(&mddev->kobj, - &rdev->kobj, nm)) + sysfs_unlink_rdev(mddev, rdev); + if (sysfs_link_rdev(mddev, rdev)) printk(KERN_WARNING - "md/raid1:%s: cannot register " - "%s\n", - mdname(mddev), nm); + "md/raid1:%s: cannot register rd%d\n", + mdname(mddev), rdev->raid_disk); } if (rdev) newmirrors[d2++].rdev = rdev; diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index e743a64fac4f..e0d676b48974 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -48,6 +48,12 @@ struct r1_private_data_s { * (fresh device added). * Cleared when a sync completes. */ + int recovery_disabled; /* when the same as + * mddev->recovery_disabled + * we don't allow recovery + * to be attempted as we + * expect a read error + */ wait_queue_head_t wait_barrier; @@ -95,7 +101,7 @@ struct r1bio_s { struct list_head retry_list; /* Next two are only valid when R1BIO_BehindIO is set */ - struct page **behind_pages; + struct bio_vec *behind_bvecs; int behind_page_count; /* * if the IO is in WRITE direction, then multiple bios are used. @@ -110,13 +116,24 @@ struct r1bio_s { * correct the read error. To keep track of bad blocks on a per-bio * level, we store IO_BLOCKED in the appropriate 'bios' pointer */ -#define IO_BLOCKED ((struct bio*)1) +#define IO_BLOCKED ((struct bio *)1) +/* When we successfully write to a known bad-block, we need to remove the + * bad-block marking which must be done from process context. So we record + * the success by setting bios[n] to IO_MADE_GOOD + */ +#define IO_MADE_GOOD ((struct bio *)2) + +#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) /* bits for r1bio.state */ #define R1BIO_Uptodate 0 #define R1BIO_IsSync 1 #define R1BIO_Degraded 2 #define R1BIO_BehindIO 3 +/* Set ReadError on bios that experience a readerror so that + * raid1d knows what to do with them. + */ +#define R1BIO_ReadError 4 /* For write-behind requests, we call bi_end_io when * the last non-write-behind device completes, providing * any write was successful. Otherwise we call when @@ -125,6 +142,11 @@ struct r1bio_s { * Record that bi_end_io was called with this flag... */ #define R1BIO_Returned 6 +/* If a write for this request means we can clear some + * known-bad-block records, we set this flag + */ +#define R1BIO_MadeGood 7 +#define R1BIO_WriteError 8 extern int md_raid1_congested(mddev_t *mddev, int bits); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6e846688962f..8b29cd4f01c8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -22,6 +22,7 @@ #include <linux/delay.h> #include <linux/blkdev.h> #include <linux/seq_file.h> +#include <linux/ratelimit.h> #include "md.h" #include "raid10.h" #include "raid0.h" @@ -123,7 +124,14 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) for (j = 0 ; j < nalloc; j++) { bio = r10_bio->devs[j].bio; for (i = 0; i < RESYNC_PAGES; i++) { - page = alloc_page(gfp_flags); + if (j == 1 && !test_bit(MD_RECOVERY_SYNC, + &conf->mddev->recovery)) { + /* we can share bv_page's during recovery */ + struct bio *rbio = r10_bio->devs[0].bio; + page = rbio->bi_io_vec[i].bv_page; + get_page(page); + } else + page = alloc_page(gfp_flags); if (unlikely(!page)) goto out_free_pages; @@ -173,7 +181,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) for (i = 0; i < conf->copies; i++) { struct bio **bio = & r10_bio->devs[i].bio; - if (*bio && *bio != IO_BLOCKED) + if (!BIO_SPECIAL(*bio)) bio_put(*bio); *bio = NULL; } @@ -183,12 +191,6 @@ static void free_r10bio(r10bio_t *r10_bio) { conf_t *conf = r10_bio->mddev->private; - /* - * Wake up any possible resync thread that waits for the device - * to go idle. - */ - allow_barrier(conf); - put_all_bios(conf, r10_bio); mempool_free(r10_bio, conf->r10bio_pool); } @@ -227,9 +229,27 @@ static void reschedule_retry(r10bio_t *r10_bio) static void raid_end_bio_io(r10bio_t *r10_bio) { struct bio *bio = r10_bio->master_bio; + int done; + conf_t *conf = r10_bio->mddev->private; - bio_endio(bio, - test_bit(R10BIO_Uptodate, &r10_bio->state) ? 0 : -EIO); + if (bio->bi_phys_segments) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); + bio->bi_phys_segments--; + done = (bio->bi_phys_segments == 0); + spin_unlock_irqrestore(&conf->device_lock, flags); + } else + done = 1; + if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) + clear_bit(BIO_UPTODATE, &bio->bi_flags); + if (done) { + bio_endio(bio, 0); + /* + * Wake up any possible resync thread that waits for the device + * to go idle. + */ + allow_barrier(conf); + } free_r10bio(r10_bio); } @@ -244,6 +264,26 @@ static inline void update_head_pos(int slot, r10bio_t *r10_bio) r10_bio->devs[slot].addr + (r10_bio->sectors); } +/* + * Find the disk number which triggered given bio + */ +static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio, + struct bio *bio, int *slotp) +{ + int slot; + + for (slot = 0; slot < conf->copies; slot++) + if (r10_bio->devs[slot].bio == bio) + break; + + BUG_ON(slot == conf->copies); + update_head_pos(slot, r10_bio); + + if (slotp) + *slotp = slot; + return r10_bio->devs[slot].devnum; +} + static void raid10_end_read_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -277,34 +317,45 @@ static void raid10_end_read_request(struct bio *bio, int error) * oops, read error - keep the refcount on the rdev */ char b[BDEVNAME_SIZE]; - if (printk_ratelimit()) - printk(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n", - mdname(conf->mddev), - bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); + printk_ratelimited(KERN_ERR + "md/raid10:%s: %s: rescheduling sector %llu\n", + mdname(conf->mddev), + bdevname(conf->mirrors[dev].rdev->bdev, b), + (unsigned long long)r10_bio->sector); + set_bit(R10BIO_ReadError, &r10_bio->state); reschedule_retry(r10_bio); } } +static void close_write(r10bio_t *r10_bio) +{ + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, + r10_bio->sectors, + !test_bit(R10BIO_Degraded, &r10_bio->state), + 0); + md_write_end(r10_bio->mddev); +} + static void raid10_end_write_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t *r10_bio = bio->bi_private; - int slot, dev; + int dev; + int dec_rdev = 1; conf_t *conf = r10_bio->mddev->private; + int slot; - for (slot = 0; slot < conf->copies; slot++) - if (r10_bio->devs[slot].bio == bio) - break; - dev = r10_bio->devs[slot].devnum; + dev = find_bio_disk(conf, r10_bio, bio, &slot); /* * this branch is our 'one mirror IO has finished' event handler: */ if (!uptodate) { - md_error(r10_bio->mddev, conf->mirrors[dev].rdev); - /* an I/O failed, we can't clear the bitmap */ - set_bit(R10BIO_Degraded, &r10_bio->state); - } else + set_bit(WriteErrorSeen, &conf->mirrors[dev].rdev->flags); + set_bit(R10BIO_WriteError, &r10_bio->state); + dec_rdev = 0; + } else { /* * Set R10BIO_Uptodate in our master bio, so that * we will return a good error code for to the higher @@ -314,9 +365,22 @@ static void raid10_end_write_request(struct bio *bio, int error) * user-side. So if something waits for IO, then it will * wait for the 'master' bio. */ + sector_t first_bad; + int bad_sectors; + set_bit(R10BIO_Uptodate, &r10_bio->state); - update_head_pos(slot, r10_bio); + /* Maybe we can clear some bad blocks. */ + if (is_badblock(conf->mirrors[dev].rdev, + r10_bio->devs[slot].addr, + r10_bio->sectors, + &first_bad, &bad_sectors)) { + bio_put(bio); + r10_bio->devs[slot].bio = IO_MADE_GOOD; + dec_rdev = 0; + set_bit(R10BIO_MadeGood, &r10_bio->state); + } + } /* * @@ -324,16 +388,18 @@ static void raid10_end_write_request(struct bio *bio, int error) * already. */ if (atomic_dec_and_test(&r10_bio->remaining)) { - /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, - r10_bio->sectors, - !test_bit(R10BIO_Degraded, &r10_bio->state), - 0); - md_write_end(r10_bio->mddev); - raid_end_bio_io(r10_bio); + if (test_bit(R10BIO_WriteError, &r10_bio->state)) + reschedule_retry(r10_bio); + else { + close_write(r10_bio); + if (test_bit(R10BIO_MadeGood, &r10_bio->state)) + reschedule_retry(r10_bio); + else + raid_end_bio_io(r10_bio); + } } - - rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); + if (dec_rdev) + rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); } @@ -484,11 +550,12 @@ static int raid10_mergeable_bvec(struct request_queue *q, * FIXME: possibly should rethink readbalancing and do it differently * depending on near_copies / far_copies geometry. */ -static int read_balance(conf_t *conf, r10bio_t *r10_bio) +static int read_balance(conf_t *conf, r10bio_t *r10_bio, int *max_sectors) { const sector_t this_sector = r10_bio->sector; int disk, slot; - const int sectors = r10_bio->sectors; + int sectors = r10_bio->sectors; + int best_good_sectors; sector_t new_distance, best_dist; mdk_rdev_t *rdev; int do_balance; @@ -497,8 +564,10 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) raid10_find_phys(conf, r10_bio); rcu_read_lock(); retry: + sectors = r10_bio->sectors; best_slot = -1; best_dist = MaxSector; + best_good_sectors = 0; do_balance = 1; /* * Check if we can balance. We can balance on the whole @@ -511,6 +580,10 @@ retry: do_balance = 0; for (slot = 0; slot < conf->copies ; slot++) { + sector_t first_bad; + int bad_sectors; + sector_t dev_sector; + if (r10_bio->devs[slot].bio == IO_BLOCKED) continue; disk = r10_bio->devs[slot].devnum; @@ -520,6 +593,37 @@ retry: if (!test_bit(In_sync, &rdev->flags)) continue; + dev_sector = r10_bio->devs[slot].addr; + if (is_badblock(rdev, dev_sector, sectors, + &first_bad, &bad_sectors)) { + if (best_dist < MaxSector) + /* Already have a better slot */ + continue; + if (first_bad <= dev_sector) { + /* Cannot read here. If this is the + * 'primary' device, then we must not read + * beyond 'bad_sectors' from another device. + */ + bad_sectors -= (dev_sector - first_bad); + if (!do_balance && sectors > bad_sectors) + sectors = bad_sectors; + if (best_good_sectors > sectors) + best_good_sectors = sectors; + } else { + sector_t good_sectors = + first_bad - dev_sector; + if (good_sectors > best_good_sectors) { + best_good_sectors = good_sectors; + best_slot = slot; + } + if (!do_balance) + /* Must read from here */ + break; + } + continue; + } else + best_good_sectors = sectors; + if (!do_balance) break; @@ -561,6 +665,7 @@ retry: } else disk = -1; rcu_read_unlock(); + *max_sectors = best_good_sectors; return disk; } @@ -734,6 +839,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) unsigned long flags; mdk_rdev_t *blocked_rdev; int plugged; + int sectors_handled; + int max_sectors; if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); @@ -808,12 +915,26 @@ static int make_request(mddev_t *mddev, struct bio * bio) r10_bio->sector = bio->bi_sector; r10_bio->state = 0; + /* We might need to issue multiple reads to different + * devices if there are bad blocks around, so we keep + * track of the number of reads in bio->bi_phys_segments. + * If this is 0, there is only one r10_bio and no locking + * will be needed when the request completes. If it is + * non-zero, then it is the number of not-completed requests. + */ + bio->bi_phys_segments = 0; + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + if (rw == READ) { /* * read balancing logic: */ - int disk = read_balance(conf, r10_bio); - int slot = r10_bio->read_slot; + int disk; + int slot; + +read_again: + disk = read_balance(conf, r10_bio, &max_sectors); + slot = r10_bio->read_slot; if (disk < 0) { raid_end_bio_io(r10_bio); return 0; @@ -821,6 +942,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) mirror = conf->mirrors + disk; read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); + md_trim_bio(read_bio, r10_bio->sector - bio->bi_sector, + max_sectors); r10_bio->devs[slot].bio = read_bio; @@ -831,7 +954,37 @@ static int make_request(mddev_t *mddev, struct bio * bio) read_bio->bi_rw = READ | do_sync; read_bio->bi_private = r10_bio; - generic_make_request(read_bio); + if (max_sectors < r10_bio->sectors) { + /* Could not read all from this device, so we will + * need another r10_bio. + */ + sectors_handled = (r10_bio->sectors + max_sectors + - bio->bi_sector); + r10_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock(&conf->device_lock); + /* Cannot call generic_make_request directly + * as that will be queued in __generic_make_request + * and subsequent mempool_alloc might block + * waiting for it. so hand bio over to raid10d. + */ + reschedule_retry(r10_bio); + + r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); + + r10_bio->master_bio = bio; + r10_bio->sectors = ((bio->bi_size >> 9) + - sectors_handled); + r10_bio->state = 0; + r10_bio->mddev = mddev; + r10_bio->sector = bio->bi_sector + sectors_handled; + goto read_again; + } else + generic_make_request(read_bio); return 0; } @@ -841,13 +994,22 @@ static int make_request(mddev_t *mddev, struct bio * bio) /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio + * If there are known/acknowledged bad blocks on any device + * on which we have seen a write error, we want to avoid + * writing to those blocks. This potentially requires several + * writes to write around the bad blocks. Each set of writes + * gets its own r10_bio with a set of bios attached. The number + * of r10_bios is recored in bio->bi_phys_segments just as with + * the read case. */ plugged = mddev_check_plugged(mddev); raid10_find_phys(conf, r10_bio); - retry_write: +retry_write: blocked_rdev = NULL; rcu_read_lock(); + max_sectors = r10_bio->sectors; + for (i = 0; i < conf->copies; i++) { int d = r10_bio->devs[i].devnum; mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev); @@ -856,13 +1018,55 @@ static int make_request(mddev_t *mddev, struct bio * bio) blocked_rdev = rdev; break; } - if (rdev && !test_bit(Faulty, &rdev->flags)) { - atomic_inc(&rdev->nr_pending); - r10_bio->devs[i].bio = bio; - } else { - r10_bio->devs[i].bio = NULL; + r10_bio->devs[i].bio = NULL; + if (!rdev || test_bit(Faulty, &rdev->flags)) { set_bit(R10BIO_Degraded, &r10_bio->state); + continue; } + if (test_bit(WriteErrorSeen, &rdev->flags)) { + sector_t first_bad; + sector_t dev_sector = r10_bio->devs[i].addr; + int bad_sectors; + int is_bad; + + is_bad = is_badblock(rdev, dev_sector, + max_sectors, + &first_bad, &bad_sectors); + if (is_bad < 0) { + /* Mustn't write here until the bad block + * is acknowledged + */ + atomic_inc(&rdev->nr_pending); + set_bit(BlockedBadBlocks, &rdev->flags); + blocked_rdev = rdev; + break; + } + if (is_bad && first_bad <= dev_sector) { + /* Cannot write here at all */ + bad_sectors -= (dev_sector - first_bad); + if (bad_sectors < max_sectors) + /* Mustn't write more than bad_sectors + * to other devices yet + */ + max_sectors = bad_sectors; + /* We don't set R10BIO_Degraded as that + * only applies if the disk is missing, + * so it might be re-added, and we want to + * know to recover this chunk. + * In this case the device is here, and the + * fact that this chunk is not in-sync is + * recorded in the bad block log. + */ + continue; + } + if (is_bad) { + int good_sectors = first_bad - dev_sector; + if (good_sectors < max_sectors) + max_sectors = good_sectors; + } + } + r10_bio->devs[i].bio = bio; + atomic_inc(&rdev->nr_pending); } rcu_read_unlock(); @@ -882,8 +1086,22 @@ static int make_request(mddev_t *mddev, struct bio * bio) goto retry_write; } + if (max_sectors < r10_bio->sectors) { + /* We are splitting this into multiple parts, so + * we need to prepare for allocating another r10_bio. + */ + r10_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + } + sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector; + atomic_set(&r10_bio->remaining, 1); - bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); + bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); for (i = 0; i < conf->copies; i++) { struct bio *mbio; @@ -892,10 +1110,12 @@ static int make_request(mddev_t *mddev, struct bio * bio) continue; mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); + md_trim_bio(mbio, r10_bio->sector - bio->bi_sector, + max_sectors); r10_bio->devs[i].bio = mbio; - mbio->bi_sector = r10_bio->devs[i].addr+ - conf->mirrors[d].rdev->data_offset; + mbio->bi_sector = (r10_bio->devs[i].addr+ + conf->mirrors[d].rdev->data_offset); mbio->bi_bdev = conf->mirrors[d].rdev->bdev; mbio->bi_end_io = raid10_end_write_request; mbio->bi_rw = WRITE | do_sync | do_fua; @@ -920,6 +1140,21 @@ static int make_request(mddev_t *mddev, struct bio * bio) /* In case raid10d snuck in to freeze_array */ wake_up(&conf->wait_barrier); + if (sectors_handled < (bio->bi_size >> 9)) { + /* We need another r10_bio. It has already been counted + * in bio->bi_phys_segments. + */ + r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); + + r10_bio->master_bio = bio; + r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled; + + r10_bio->mddev = mddev; + r10_bio->sector = bio->bi_sector + sectors_handled; + r10_bio->state = 0; + goto retry_write; + } + if (do_sync || !mddev->bitmap || !plugged) md_wakeup_thread(mddev->thread); return 0; @@ -949,6 +1184,30 @@ static void status(struct seq_file *seq, mddev_t *mddev) seq_printf(seq, "]"); } +/* check if there are enough drives for + * every block to appear on atleast one. + * Don't consider the device numbered 'ignore' + * as we might be about to remove it. + */ +static int enough(conf_t *conf, int ignore) +{ + int first = 0; + + do { + int n = conf->copies; + int cnt = 0; + while (n--) { + if (conf->mirrors[first].rdev && + first != ignore) + cnt++; + first = (first+1) % conf->raid_disks; + } + if (cnt == 0) + return 0; + } while (first != 0); + return 1; +} + static void error(mddev_t *mddev, mdk_rdev_t *rdev) { char b[BDEVNAME_SIZE]; @@ -961,13 +1220,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) * else mark the drive as failed */ if (test_bit(In_sync, &rdev->flags) - && conf->raid_disks-mddev->degraded == 1) + && !enough(conf, rdev->raid_disk)) /* * Don't fail the drive, just return an IO error. - * The test should really be more sophisticated than - * "working_disks == 1", but it isn't critical, and - * can wait until we do more sophisticated "is the drive - * really dead" tests... */ return; if (test_and_clear_bit(In_sync, &rdev->flags)) { @@ -980,6 +1235,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); } + set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT @@ -1022,27 +1278,6 @@ static void close_sync(conf_t *conf) conf->r10buf_pool = NULL; } -/* check if there are enough drives for - * every block to appear on atleast one - */ -static int enough(conf_t *conf) -{ - int first = 0; - - do { - int n = conf->copies; - int cnt = 0; - while (n--) { - if (conf->mirrors[first].rdev) - cnt++; - first = (first+1) % conf->raid_disks; - } - if (cnt == 0) - return 0; - } while (first != 0); - return 1; -} - static int raid10_spare_active(mddev_t *mddev) { int i; @@ -1078,7 +1313,6 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) conf_t *conf = mddev->private; int err = -EEXIST; int mirror; - mirror_info_t *p; int first = 0; int last = conf->raid_disks - 1; @@ -1087,44 +1321,47 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * very different from resync */ return -EBUSY; - if (!enough(conf)) + if (!enough(conf, -1)) return -EINVAL; if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; - if (rdev->saved_raid_disk >= 0 && - rdev->saved_raid_disk >= first && + if (rdev->saved_raid_disk >= first && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) mirror = rdev->saved_raid_disk; else mirror = first; - for ( ; mirror <= last ; mirror++) - if ( !(p=conf->mirrors+mirror)->rdev) { - - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must - * never risk violating it, so limit - * ->max_segments to one lying with a single - * page, as a one page request is never in - * violation. - */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { - blk_queue_max_segments(mddev->queue, 1); - blk_queue_segment_boundary(mddev->queue, - PAGE_CACHE_SIZE - 1); - } + for ( ; mirror <= last ; mirror++) { + mirror_info_t *p = &conf->mirrors[mirror]; + if (p->recovery_disabled == mddev->recovery_disabled) + continue; + if (!p->rdev) + continue; - p->head_position = 0; - rdev->raid_disk = mirror; - err = 0; - if (rdev->saved_raid_disk != mirror) - conf->fullsync = 1; - rcu_assign_pointer(p->rdev, rdev); - break; + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + /* as we don't honour merge_bvec_fn, we must + * never risk violating it, so limit + * ->max_segments to one lying with a single + * page, as a one page request is never in + * violation. + */ + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); } + p->head_position = 0; + rdev->raid_disk = mirror; + err = 0; + if (rdev->saved_raid_disk != mirror) + conf->fullsync = 1; + rcu_assign_pointer(p->rdev, rdev); + break; + } + md_integrity_add_rdev(rdev, mddev); print_conf(conf); return err; @@ -1149,7 +1386,8 @@ static int raid10_remove_disk(mddev_t *mddev, int number) * is not possible. */ if (!test_bit(Faulty, &rdev->flags) && - enough(conf)) { + mddev->recovery_disabled != p->recovery_disabled && + enough(conf, -1)) { err = -EBUSY; goto abort; } @@ -1174,24 +1412,18 @@ static void end_sync_read(struct bio *bio, int error) { r10bio_t *r10_bio = bio->bi_private; conf_t *conf = r10_bio->mddev->private; - int i,d; + int d; - for (i=0; i<conf->copies; i++) - if (r10_bio->devs[i].bio == bio) - break; - BUG_ON(i == conf->copies); - update_head_pos(i, r10_bio); - d = r10_bio->devs[i].devnum; + d = find_bio_disk(conf, r10_bio, bio, NULL); if (test_bit(BIO_UPTODATE, &bio->bi_flags)) set_bit(R10BIO_Uptodate, &r10_bio->state); - else { + else + /* The write handler will notice the lack of + * R10BIO_Uptodate and record any errors etc + */ atomic_add(r10_bio->sectors, &conf->mirrors[d].rdev->corrected_errors); - if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery)) - md_error(r10_bio->mddev, - conf->mirrors[d].rdev); - } /* for reconstruct, we always reschedule after a read. * for resync, only after all reads @@ -1206,40 +1438,60 @@ static void end_sync_read(struct bio *bio, int error) } } -static void end_sync_write(struct bio *bio, int error) +static void end_sync_request(r10bio_t *r10_bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - r10bio_t *r10_bio = bio->bi_private; mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev->private; - int i,d; - - for (i = 0; i < conf->copies; i++) - if (r10_bio->devs[i].bio == bio) - break; - d = r10_bio->devs[i].devnum; - if (!uptodate) - md_error(mddev, conf->mirrors[d].rdev); - - update_head_pos(i, r10_bio); - - rdev_dec_pending(conf->mirrors[d].rdev, mddev); while (atomic_dec_and_test(&r10_bio->remaining)) { if (r10_bio->master_bio == NULL) { /* the primary of several recovery bios */ sector_t s = r10_bio->sectors; - put_buf(r10_bio); + if (test_bit(R10BIO_MadeGood, &r10_bio->state) || + test_bit(R10BIO_WriteError, &r10_bio->state)) + reschedule_retry(r10_bio); + else + put_buf(r10_bio); md_done_sync(mddev, s, 1); break; } else { r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio; - put_buf(r10_bio); + if (test_bit(R10BIO_MadeGood, &r10_bio->state) || + test_bit(R10BIO_WriteError, &r10_bio->state)) + reschedule_retry(r10_bio); + else + put_buf(r10_bio); r10_bio = r10_bio2; } } } +static void end_sync_write(struct bio *bio, int error) +{ + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + r10bio_t *r10_bio = bio->bi_private; + mddev_t *mddev = r10_bio->mddev; + conf_t *conf = mddev->private; + int d; + sector_t first_bad; + int bad_sectors; + int slot; + + d = find_bio_disk(conf, r10_bio, bio, &slot); + + if (!uptodate) { + set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags); + set_bit(R10BIO_WriteError, &r10_bio->state); + } else if (is_badblock(conf->mirrors[d].rdev, + r10_bio->devs[slot].addr, + r10_bio->sectors, + &first_bad, &bad_sectors)) + set_bit(R10BIO_MadeGood, &r10_bio->state); + + rdev_dec_pending(conf->mirrors[d].rdev, mddev); + + end_sync_request(r10_bio); +} + /* * Note: sync and recover and handled very differently for raid10 * This code is for resync. @@ -1299,11 +1551,12 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) if (j == vcnt) continue; mddev->resync_mismatches += r10_bio->sectors; + if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) + /* Don't fix anything. */ + continue; } - if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) - /* Don't fix anything. */ - continue; - /* Ok, we need to write this bio + /* Ok, we need to write this bio, either to correct an + * inconsistency or to correct an unreadable block. * First we need to fixup bv_offset, bv_len and * bi_vecs, as the read request might have corrupted these */ @@ -1355,32 +1608,107 @@ done: * The second for writing. * */ +static void fix_recovery_read_error(r10bio_t *r10_bio) +{ + /* We got a read error during recovery. + * We repeat the read in smaller page-sized sections. + * If a read succeeds, write it to the new device or record + * a bad block if we cannot. + * If a read fails, record a bad block on both old and + * new devices. + */ + mddev_t *mddev = r10_bio->mddev; + conf_t *conf = mddev->private; + struct bio *bio = r10_bio->devs[0].bio; + sector_t sect = 0; + int sectors = r10_bio->sectors; + int idx = 0; + int dr = r10_bio->devs[0].devnum; + int dw = r10_bio->devs[1].devnum; + + while (sectors) { + int s = sectors; + mdk_rdev_t *rdev; + sector_t addr; + int ok; + + if (s > (PAGE_SIZE>>9)) + s = PAGE_SIZE >> 9; + + rdev = conf->mirrors[dr].rdev; + addr = r10_bio->devs[0].addr + sect, + ok = sync_page_io(rdev, + addr, + s << 9, + bio->bi_io_vec[idx].bv_page, + READ, false); + if (ok) { + rdev = conf->mirrors[dw].rdev; + addr = r10_bio->devs[1].addr + sect; + ok = sync_page_io(rdev, + addr, + s << 9, + bio->bi_io_vec[idx].bv_page, + WRITE, false); + if (!ok) + set_bit(WriteErrorSeen, &rdev->flags); + } + if (!ok) { + /* We don't worry if we cannot set a bad block - + * it really is bad so there is no loss in not + * recording it yet + */ + rdev_set_badblocks(rdev, addr, s, 0); + + if (rdev != conf->mirrors[dw].rdev) { + /* need bad block on destination too */ + mdk_rdev_t *rdev2 = conf->mirrors[dw].rdev; + addr = r10_bio->devs[1].addr + sect; + ok = rdev_set_badblocks(rdev2, addr, s, 0); + if (!ok) { + /* just abort the recovery */ + printk(KERN_NOTICE + "md/raid10:%s: recovery aborted" + " due to read error\n", + mdname(mddev)); + + conf->mirrors[dw].recovery_disabled + = mddev->recovery_disabled; + set_bit(MD_RECOVERY_INTR, + &mddev->recovery); + break; + } + } + } + + sectors -= s; + sect += s; + idx++; + } +} static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) { conf_t *conf = mddev->private; - int i, d; - struct bio *bio, *wbio; + int d; + struct bio *wbio; + if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) { + fix_recovery_read_error(r10_bio); + end_sync_request(r10_bio); + return; + } - /* move the pages across to the second bio + /* + * share the pages with the first bio * and submit the write request */ - bio = r10_bio->devs[0].bio; wbio = r10_bio->devs[1].bio; - for (i=0; i < wbio->bi_vcnt; i++) { - struct page *p = bio->bi_io_vec[i].bv_page; - bio->bi_io_vec[i].bv_page = wbio->bi_io_vec[i].bv_page; - wbio->bi_io_vec[i].bv_page = p; - } d = r10_bio->devs[1].devnum; atomic_inc(&conf->mirrors[d].rdev->nr_pending); md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); - if (test_bit(R10BIO_Uptodate, &r10_bio->state)) - generic_make_request(wbio); - else - bio_endio(wbio, -EIO); + generic_make_request(wbio); } @@ -1421,6 +1749,26 @@ static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev) atomic_set(&rdev->read_errors, read_errors >> hours_since_last); } +static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector, + int sectors, struct page *page, int rw) +{ + sector_t first_bad; + int bad_sectors; + + if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) + && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags))) + return -1; + if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) + /* success */ + return 1; + if (rw == WRITE) + set_bit(WriteErrorSeen, &rdev->flags); + /* need to record an error - either for the block or the device */ + if (!rdev_set_badblocks(rdev, sector, sectors, 0)) + md_error(rdev->mddev, rdev); + return 0; +} + /* * This is a kernel thread which: * @@ -1476,10 +1824,15 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) rcu_read_lock(); do { + sector_t first_bad; + int bad_sectors; + d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && - test_bit(In_sync, &rdev->flags)) { + test_bit(In_sync, &rdev->flags) && + is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, + &first_bad, &bad_sectors) == 0) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); success = sync_page_io(rdev, @@ -1499,9 +1852,19 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) rcu_read_unlock(); if (!success) { - /* Cannot read from anywhere -- bye bye array */ + /* Cannot read from anywhere, just mark the block + * as bad on the first device to discourage future + * reads. + */ int dn = r10_bio->devs[r10_bio->read_slot].devnum; - md_error(mddev, conf->mirrors[dn].rdev); + rdev = conf->mirrors[dn].rdev; + + if (!rdev_set_badblocks( + rdev, + r10_bio->devs[r10_bio->read_slot].addr + + sect, + s, 0)) + md_error(mddev, rdev); break; } @@ -1516,80 +1879,82 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) sl--; d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); - if (rdev && - test_bit(In_sync, &rdev->flags)) { - atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); - atomic_add(s, &rdev->corrected_errors); - if (sync_page_io(rdev, - r10_bio->devs[sl].addr + - sect, - s<<9, conf->tmppage, WRITE, false) - == 0) { - /* Well, this device is dead */ - printk(KERN_NOTICE - "md/raid10:%s: read correction " - "write failed" - " (%d sectors at %llu on %s)\n", - mdname(mddev), s, - (unsigned long long)( - sect + rdev->data_offset), - bdevname(rdev->bdev, b)); - printk(KERN_NOTICE "md/raid10:%s: %s: failing " - "drive\n", - mdname(mddev), - bdevname(rdev->bdev, b)); - md_error(mddev, rdev); - } - rdev_dec_pending(rdev, mddev); - rcu_read_lock(); + if (!rdev || + !test_bit(In_sync, &rdev->flags)) + continue; + + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + if (r10_sync_page_io(rdev, + r10_bio->devs[sl].addr + + sect, + s<<9, conf->tmppage, WRITE) + == 0) { + /* Well, this device is dead */ + printk(KERN_NOTICE + "md/raid10:%s: read correction " + "write failed" + " (%d sectors at %llu on %s)\n", + mdname(mddev), s, + (unsigned long long)( + sect + rdev->data_offset), + bdevname(rdev->bdev, b)); + printk(KERN_NOTICE "md/raid10:%s: %s: failing " + "drive\n", + mdname(mddev), + bdevname(rdev->bdev, b)); } + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); } sl = start; while (sl != r10_bio->read_slot) { + char b[BDEVNAME_SIZE]; if (sl==0) sl = conf->copies; sl--; d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); - if (rdev && - test_bit(In_sync, &rdev->flags)) { - char b[BDEVNAME_SIZE]; - atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); - if (sync_page_io(rdev, - r10_bio->devs[sl].addr + - sect, - s<<9, conf->tmppage, - READ, false) == 0) { - /* Well, this device is dead */ - printk(KERN_NOTICE - "md/raid10:%s: unable to read back " - "corrected sectors" - " (%d sectors at %llu on %s)\n", - mdname(mddev), s, - (unsigned long long)( - sect + rdev->data_offset), - bdevname(rdev->bdev, b)); - printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", - mdname(mddev), - bdevname(rdev->bdev, b)); - - md_error(mddev, rdev); - } else { - printk(KERN_INFO - "md/raid10:%s: read error corrected" - " (%d sectors at %llu on %s)\n", - mdname(mddev), s, - (unsigned long long)( - sect + rdev->data_offset), - bdevname(rdev->bdev, b)); - } + if (!rdev || + !test_bit(In_sync, &rdev->flags)) + continue; - rdev_dec_pending(rdev, mddev); - rcu_read_lock(); + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + switch (r10_sync_page_io(rdev, + r10_bio->devs[sl].addr + + sect, + s<<9, conf->tmppage, + READ)) { + case 0: + /* Well, this device is dead */ + printk(KERN_NOTICE + "md/raid10:%s: unable to read back " + "corrected sectors" + " (%d sectors at %llu on %s)\n", + mdname(mddev), s, + (unsigned long long)( + sect + rdev->data_offset), + bdevname(rdev->bdev, b)); + printk(KERN_NOTICE "md/raid10:%s: %s: failing " + "drive\n", + mdname(mddev), + bdevname(rdev->bdev, b)); + break; + case 1: + printk(KERN_INFO + "md/raid10:%s: read error corrected" + " (%d sectors at %llu on %s)\n", + mdname(mddev), s, + (unsigned long long)( + sect + rdev->data_offset), + bdevname(rdev->bdev, b)); + atomic_add(s, &rdev->corrected_errors); } + + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); } rcu_read_unlock(); @@ -1598,21 +1963,254 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) } } +static void bi_complete(struct bio *bio, int error) +{ + complete((struct completion *)bio->bi_private); +} + +static int submit_bio_wait(int rw, struct bio *bio) +{ + struct completion event; + rw |= REQ_SYNC; + + init_completion(&event); + bio->bi_private = &event; + bio->bi_end_io = bi_complete; + submit_bio(rw, bio); + wait_for_completion(&event); + + return test_bit(BIO_UPTODATE, &bio->bi_flags); +} + +static int narrow_write_error(r10bio_t *r10_bio, int i) +{ + struct bio *bio = r10_bio->master_bio; + mddev_t *mddev = r10_bio->mddev; + conf_t *conf = mddev->private; + mdk_rdev_t *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev; + /* bio has the data to be written to slot 'i' where + * we just recently had a write error. + * We repeatedly clone the bio and trim down to one block, + * then try the write. Where the write fails we record + * a bad block. + * It is conceivable that the bio doesn't exactly align with + * blocks. We must handle this. + * + * We currently own a reference to the rdev. + */ + + int block_sectors; + sector_t sector; + int sectors; + int sect_to_write = r10_bio->sectors; + int ok = 1; + + if (rdev->badblocks.shift < 0) + return 0; + + block_sectors = 1 << rdev->badblocks.shift; + sector = r10_bio->sector; + sectors = ((r10_bio->sector + block_sectors) + & ~(sector_t)(block_sectors - 1)) + - sector; + + while (sect_to_write) { + struct bio *wbio; + if (sectors > sect_to_write) + sectors = sect_to_write; + /* Write at 'sector' for 'sectors' */ + wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); + md_trim_bio(wbio, sector - bio->bi_sector, sectors); + wbio->bi_sector = (r10_bio->devs[i].addr+ + rdev->data_offset+ + (sector - r10_bio->sector)); + wbio->bi_bdev = rdev->bdev; + if (submit_bio_wait(WRITE, wbio) == 0) + /* Failure! */ + ok = rdev_set_badblocks(rdev, sector, + sectors, 0) + && ok; + + bio_put(wbio); + sect_to_write -= sectors; + sector += sectors; + sectors = block_sectors; + } + return ok; +} + +static void handle_read_error(mddev_t *mddev, r10bio_t *r10_bio) +{ + int slot = r10_bio->read_slot; + int mirror = r10_bio->devs[slot].devnum; + struct bio *bio; + conf_t *conf = mddev->private; + mdk_rdev_t *rdev; + char b[BDEVNAME_SIZE]; + unsigned long do_sync; + int max_sectors; + + /* we got a read error. Maybe the drive is bad. Maybe just + * the block and we can fix it. + * We freeze all other IO, and try reading the block from + * other devices. When we find one, we re-write + * and check it that fixes the read error. + * This is all done synchronously while the array is + * frozen. + */ + if (mddev->ro == 0) { + freeze_array(conf); + fix_read_error(conf, mddev, r10_bio); + unfreeze_array(conf); + } + rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); + + bio = r10_bio->devs[slot].bio; + bdevname(bio->bi_bdev, b); + r10_bio->devs[slot].bio = + mddev->ro ? IO_BLOCKED : NULL; +read_more: + mirror = read_balance(conf, r10_bio, &max_sectors); + if (mirror == -1) { + printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O" + " read error for block %llu\n", + mdname(mddev), b, + (unsigned long long)r10_bio->sector); + raid_end_bio_io(r10_bio); + bio_put(bio); + return; + } + + do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); + if (bio) + bio_put(bio); + slot = r10_bio->read_slot; + rdev = conf->mirrors[mirror].rdev; + printk_ratelimited( + KERN_ERR + "md/raid10:%s: %s: redirecting" + "sector %llu to another mirror\n", + mdname(mddev), + bdevname(rdev->bdev, b), + (unsigned long long)r10_bio->sector); + bio = bio_clone_mddev(r10_bio->master_bio, + GFP_NOIO, mddev); + md_trim_bio(bio, + r10_bio->sector - bio->bi_sector, + max_sectors); + r10_bio->devs[slot].bio = bio; + bio->bi_sector = r10_bio->devs[slot].addr + + rdev->data_offset; + bio->bi_bdev = rdev->bdev; + bio->bi_rw = READ | do_sync; + bio->bi_private = r10_bio; + bio->bi_end_io = raid10_end_read_request; + if (max_sectors < r10_bio->sectors) { + /* Drat - have to split this up more */ + struct bio *mbio = r10_bio->master_bio; + int sectors_handled = + r10_bio->sector + max_sectors + - mbio->bi_sector; + r10_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (mbio->bi_phys_segments == 0) + mbio->bi_phys_segments = 2; + else + mbio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + generic_make_request(bio); + bio = NULL; + + r10_bio = mempool_alloc(conf->r10bio_pool, + GFP_NOIO); + r10_bio->master_bio = mbio; + r10_bio->sectors = (mbio->bi_size >> 9) + - sectors_handled; + r10_bio->state = 0; + set_bit(R10BIO_ReadError, + &r10_bio->state); + r10_bio->mddev = mddev; + r10_bio->sector = mbio->bi_sector + + sectors_handled; + + goto read_more; + } else + generic_make_request(bio); +} + +static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio) +{ + /* Some sort of write request has finished and it + * succeeded in writing where we thought there was a + * bad block. So forget the bad block. + * Or possibly if failed and we need to record + * a bad block. + */ + int m; + mdk_rdev_t *rdev; + + if (test_bit(R10BIO_IsSync, &r10_bio->state) || + test_bit(R10BIO_IsRecover, &r10_bio->state)) { + for (m = 0; m < conf->copies; m++) { + int dev = r10_bio->devs[m].devnum; + rdev = conf->mirrors[dev].rdev; + if (r10_bio->devs[m].bio == NULL) + continue; + if (test_bit(BIO_UPTODATE, + &r10_bio->devs[m].bio->bi_flags)) { + rdev_clear_badblocks( + rdev, + r10_bio->devs[m].addr, + r10_bio->sectors); + } else { + if (!rdev_set_badblocks( + rdev, + r10_bio->devs[m].addr, + r10_bio->sectors, 0)) + md_error(conf->mddev, rdev); + } + } + put_buf(r10_bio); + } else { + for (m = 0; m < conf->copies; m++) { + int dev = r10_bio->devs[m].devnum; + struct bio *bio = r10_bio->devs[m].bio; + rdev = conf->mirrors[dev].rdev; + if (bio == IO_MADE_GOOD) { + rdev_clear_badblocks( + rdev, + r10_bio->devs[m].addr, + r10_bio->sectors); + rdev_dec_pending(rdev, conf->mddev); + } else if (bio != NULL && + !test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (!narrow_write_error(r10_bio, m)) { + md_error(conf->mddev, rdev); + set_bit(R10BIO_Degraded, + &r10_bio->state); + } + rdev_dec_pending(rdev, conf->mddev); + } + } + if (test_bit(R10BIO_WriteError, + &r10_bio->state)) + close_write(r10_bio); + raid_end_bio_io(r10_bio); + } +} + static void raid10d(mddev_t *mddev) { r10bio_t *r10_bio; - struct bio *bio; unsigned long flags; conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; - mdk_rdev_t *rdev; struct blk_plug plug; md_check_recovery(mddev); blk_start_plug(&plug); for (;;) { - char b[BDEVNAME_SIZE]; flush_pending_writes(conf); @@ -1628,64 +2226,26 @@ static void raid10d(mddev_t *mddev) mddev = r10_bio->mddev; conf = mddev->private; - if (test_bit(R10BIO_IsSync, &r10_bio->state)) + if (test_bit(R10BIO_MadeGood, &r10_bio->state) || + test_bit(R10BIO_WriteError, &r10_bio->state)) + handle_write_completed(conf, r10_bio); + else if (test_bit(R10BIO_IsSync, &r10_bio->state)) sync_request_write(mddev, r10_bio); else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) recovery_request_write(mddev, r10_bio); + else if (test_bit(R10BIO_ReadError, &r10_bio->state)) + handle_read_error(mddev, r10_bio); else { - int slot = r10_bio->read_slot; - int mirror = r10_bio->devs[slot].devnum; - /* we got a read error. Maybe the drive is bad. Maybe just - * the block and we can fix it. - * We freeze all other IO, and try reading the block from - * other devices. When we find one, we re-write - * and check it that fixes the read error. - * This is all done synchronously while the array is - * frozen. + /* just a partial read to be scheduled from a + * separate context */ - if (mddev->ro == 0) { - freeze_array(conf); - fix_read_error(conf, mddev, r10_bio); - unfreeze_array(conf); - } - rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); - - bio = r10_bio->devs[slot].bio; - r10_bio->devs[slot].bio = - mddev->ro ? IO_BLOCKED : NULL; - mirror = read_balance(conf, r10_bio); - if (mirror == -1) { - printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O" - " read error for block %llu\n", - mdname(mddev), - bdevname(bio->bi_bdev,b), - (unsigned long long)r10_bio->sector); - raid_end_bio_io(r10_bio); - bio_put(bio); - } else { - const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); - bio_put(bio); - slot = r10_bio->read_slot; - rdev = conf->mirrors[mirror].rdev; - if (printk_ratelimit()) - printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to" - " another mirror\n", - mdname(mddev), - bdevname(rdev->bdev,b), - (unsigned long long)r10_bio->sector); - bio = bio_clone_mddev(r10_bio->master_bio, - GFP_NOIO, mddev); - r10_bio->devs[slot].bio = bio; - bio->bi_sector = r10_bio->devs[slot].addr - + rdev->data_offset; - bio->bi_bdev = rdev->bdev; - bio->bi_rw = READ | do_sync; - bio->bi_private = r10_bio; - bio->bi_end_io = raid10_end_read_request; - generic_make_request(bio); - } + int slot = r10_bio->read_slot; + generic_make_request(r10_bio->devs[slot].bio); } + cond_resched(); + if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) + md_check_recovery(mddev); } blk_finish_plug(&plug); } @@ -1746,7 +2306,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int i; int max_sync; sector_t sync_blocks; - sector_t sectors_skipped = 0; int chunks_skipped = 0; @@ -1828,7 +2387,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { /* recovery... the complicated one */ - int j, k; + int j; r10_bio = NULL; for (i=0 ; i<conf->raid_disks; i++) { @@ -1836,6 +2395,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, r10bio_t *rb2; sector_t sect; int must_sync; + int any_working; if (conf->mirrors[i].rdev == NULL || test_bit(In_sync, &conf->mirrors[i].rdev->flags)) @@ -1887,19 +2447,42 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, must_sync = bitmap_start_sync(mddev->bitmap, sect, &sync_blocks, still_degraded); + any_working = 0; for (j=0; j<conf->copies;j++) { + int k; int d = r10_bio->devs[j].devnum; + sector_t from_addr, to_addr; + mdk_rdev_t *rdev; + sector_t sector, first_bad; + int bad_sectors; if (!conf->mirrors[d].rdev || !test_bit(In_sync, &conf->mirrors[d].rdev->flags)) continue; /* This is where we read from */ + any_working = 1; + rdev = conf->mirrors[d].rdev; + sector = r10_bio->devs[j].addr; + + if (is_badblock(rdev, sector, max_sync, + &first_bad, &bad_sectors)) { + if (first_bad > sector) + max_sync = first_bad - sector; + else { + bad_sectors -= (sector + - first_bad); + if (max_sync > bad_sectors) + max_sync = bad_sectors; + continue; + } + } bio = r10_bio->devs[0].bio; bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; bio->bi_rw = READ; - bio->bi_sector = r10_bio->devs[j].addr + + from_addr = r10_bio->devs[j].addr; + bio->bi_sector = from_addr + conf->mirrors[d].rdev->data_offset; bio->bi_bdev = conf->mirrors[d].rdev->bdev; atomic_inc(&conf->mirrors[d].rdev->nr_pending); @@ -1916,26 +2499,48 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, bio->bi_private = r10_bio; bio->bi_end_io = end_sync_write; bio->bi_rw = WRITE; - bio->bi_sector = r10_bio->devs[k].addr + + to_addr = r10_bio->devs[k].addr; + bio->bi_sector = to_addr + conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; r10_bio->devs[0].devnum = d; + r10_bio->devs[0].addr = from_addr; r10_bio->devs[1].devnum = i; + r10_bio->devs[1].addr = to_addr; break; } if (j == conf->copies) { - /* Cannot recover, so abort the recovery */ + /* Cannot recover, so abort the recovery or + * record a bad block */ put_buf(r10_bio); if (rb2) atomic_dec(&rb2->remaining); r10_bio = rb2; - if (!test_and_set_bit(MD_RECOVERY_INTR, - &mddev->recovery)) - printk(KERN_INFO "md/raid10:%s: insufficient " - "working devices for recovery.\n", - mdname(mddev)); + if (any_working) { + /* problem is that there are bad blocks + * on other device(s) + */ + int k; + for (k = 0; k < conf->copies; k++) + if (r10_bio->devs[k].devnum == i) + break; + if (!rdev_set_badblocks( + conf->mirrors[i].rdev, + r10_bio->devs[k].addr, + max_sync, 0)) + any_working = 0; + } + if (!any_working) { + if (!test_and_set_bit(MD_RECOVERY_INTR, + &mddev->recovery)) + printk(KERN_INFO "md/raid10:%s: insufficient " + "working devices for recovery.\n", + mdname(mddev)); + conf->mirrors[i].recovery_disabled + = mddev->recovery_disabled; + } break; } } @@ -1979,12 +2584,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, for (i=0; i<conf->copies; i++) { int d = r10_bio->devs[i].devnum; + sector_t first_bad, sector; + int bad_sectors; + bio = r10_bio->devs[i].bio; bio->bi_end_io = NULL; clear_bit(BIO_UPTODATE, &bio->bi_flags); if (conf->mirrors[d].rdev == NULL || test_bit(Faulty, &conf->mirrors[d].rdev->flags)) continue; + sector = r10_bio->devs[i].addr; + if (is_badblock(conf->mirrors[d].rdev, + sector, max_sync, + &first_bad, &bad_sectors)) { + if (first_bad > sector) + max_sync = first_bad - sector; + else { + bad_sectors -= (sector - first_bad); + if (max_sync > bad_sectors) + max_sync = max_sync; + continue; + } + } atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&r10_bio->remaining); bio->bi_next = biolist; @@ -1992,7 +2613,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; bio->bi_rw = READ; - bio->bi_sector = r10_bio->devs[i].addr + + bio->bi_sector = sector + conf->mirrors[d].rdev->data_offset; bio->bi_bdev = conf->mirrors[d].rdev->bdev; count++; @@ -2079,7 +2700,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, return sectors_skipped + nr_sectors; giveup: /* There is nowhere to write, so all non-sync - * drives must be failed, so try the next chunk... + * drives must be failed or in resync, all drives + * have a bad block, so try the next chunk... */ if (sector_nr + max_sync < max_sector) max_sector = sector_nr + max_sync; @@ -2249,6 +2871,7 @@ static int run(mddev_t *mddev) (conf->raid_disks / conf->near_copies)); list_for_each_entry(rdev, &mddev->disks, same_set) { + disk_idx = rdev->raid_disk; if (disk_idx >= conf->raid_disks || disk_idx < 0) @@ -2271,7 +2894,7 @@ static int run(mddev_t *mddev) disk->head_position = 0; } /* need to check that every block has at least one working mirror */ - if (!enough(conf)) { + if (!enough(conf, -1)) { printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", mdname(mddev)); goto out_free_conf; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 944b1104d3b4..79cb52a0d4a2 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -6,6 +6,11 @@ typedef struct mirror_info mirror_info_t; struct mirror_info { mdk_rdev_t *rdev; sector_t head_position; + int recovery_disabled; /* matches + * mddev->recovery_disabled + * when we shouldn't try + * recovering this device. + */ }; typedef struct r10bio_s r10bio_t; @@ -113,10 +118,26 @@ struct r10bio_s { * level, we store IO_BLOCKED in the appropriate 'bios' pointer */ #define IO_BLOCKED ((struct bio*)1) +/* When we successfully write to a known bad-block, we need to remove the + * bad-block marking which must be done from process context. So we record + * the success by setting devs[n].bio to IO_MADE_GOOD + */ +#define IO_MADE_GOOD ((struct bio *)2) + +#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) /* bits for r10bio.state */ #define R10BIO_Uptodate 0 #define R10BIO_IsSync 1 #define R10BIO_IsRecover 2 #define R10BIO_Degraded 3 +/* Set ReadError on bios that experience a read error + * so that raid10d knows what to do with them. + */ +#define R10BIO_ReadError 4 +/* If a write for this request means we can clear some + * known-bad-block records, we set this flag. + */ +#define R10BIO_MadeGood 5 +#define R10BIO_WriteError 6 #endif diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b72edf35ec54..dbae459fb02d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -51,6 +51,7 @@ #include <linux/seq_file.h> #include <linux/cpu.h> #include <linux/slab.h> +#include <linux/ratelimit.h> #include "md.h" #include "raid5.h" #include "raid0.h" @@ -96,8 +97,6 @@ #define __inline__ #endif -#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) - /* * We maintain a biased count of active stripes in the bottom 16 bits of * bi_phys_segments, and a count of processed stripes in the upper 16 bits @@ -341,7 +340,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) (unsigned long long)sh->sector, i, dev->toread, dev->read, dev->towrite, dev->written, test_bit(R5_LOCKED, &dev->flags)); - BUG(); + WARN_ON(1); } dev->flags = 0; raid5_build_block(sh, i, previous); @@ -527,6 +526,36 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) atomic_inc(&rdev->nr_pending); rcu_read_unlock(); + /* We have already checked bad blocks for reads. Now + * need to check for writes. + */ + while ((rw & WRITE) && rdev && + test_bit(WriteErrorSeen, &rdev->flags)) { + sector_t first_bad; + int bad_sectors; + int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS, + &first_bad, &bad_sectors); + if (!bad) + break; + + if (bad < 0) { + set_bit(BlockedBadBlocks, &rdev->flags); + if (!conf->mddev->external && + conf->mddev->flags) { + /* It is very unlikely, but we might + * still need to write out the + * bad block log - better give it + * a chance*/ + md_check_recovery(conf->mddev); + } + md_wait_for_blocked_rdev(rdev, conf->mddev); + } else { + /* Acknowledged bad block - skip the write */ + rdev_dec_pending(rdev, conf->mddev); + rdev = NULL; + } + } + if (rdev) { if (s->syncing || s->expanding || s->expanded) md_sync_acct(rdev->bdev, STRIPE_SECTORS); @@ -548,10 +577,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) bi->bi_io_vec[0].bv_offset = 0; bi->bi_size = STRIPE_SIZE; bi->bi_next = NULL; - if ((rw & WRITE) && - test_bit(R5_ReWrite, &sh->dev[i].flags)) - atomic_add(STRIPE_SECTORS, - &rdev->corrected_errors); generic_make_request(bi); } else { if (rw & WRITE) @@ -1020,12 +1045,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { struct bio *wbi; - spin_lock(&sh->lock); + spin_lock_irq(&sh->raid_conf->device_lock); chosen = dev->towrite; dev->towrite = NULL; BUG_ON(dev->written); wbi = dev->written = chosen; - spin_unlock(&sh->lock); + spin_unlock_irq(&sh->raid_conf->device_lock); while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { @@ -1315,12 +1340,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) static int grow_one_stripe(raid5_conf_t *conf) { struct stripe_head *sh; - sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); + sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL); if (!sh) return 0; - memset(sh, 0, sizeof(*sh) + (conf->pool_size-1)*sizeof(struct r5dev)); + sh->raid_conf = conf; - spin_lock_init(&sh->lock); #ifdef CONFIG_MULTICORE_RAID456 init_waitqueue_head(&sh->ops.wait_for_ops); #endif @@ -1435,14 +1459,11 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) return -ENOMEM; for (i = conf->max_nr_stripes; i; i--) { - nsh = kmem_cache_alloc(sc, GFP_KERNEL); + nsh = kmem_cache_zalloc(sc, GFP_KERNEL); if (!nsh) break; - memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev)); - nsh->raid_conf = conf; - spin_lock_init(&nsh->lock); #ifdef CONFIG_MULTICORE_RAID456 init_waitqueue_head(&nsh->ops.wait_for_ops); #endif @@ -1587,12 +1608,15 @@ static void raid5_end_read_request(struct bio * bi, int error) set_bit(R5_UPTODATE, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) { rdev = conf->disks[i].rdev; - printk_rl(KERN_INFO "md/raid:%s: read error corrected" - " (%lu sectors at %llu on %s)\n", - mdname(conf->mddev), STRIPE_SECTORS, - (unsigned long long)(sh->sector - + rdev->data_offset), - bdevname(rdev->bdev, b)); + printk_ratelimited( + KERN_INFO + "md/raid:%s: read error corrected" + " (%lu sectors at %llu on %s)\n", + mdname(conf->mddev), STRIPE_SECTORS, + (unsigned long long)(sh->sector + + rdev->data_offset), + bdevname(rdev->bdev, b)); + atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); } @@ -1606,22 +1630,24 @@ static void raid5_end_read_request(struct bio * bi, int error) clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&rdev->read_errors); if (conf->mddev->degraded >= conf->max_degraded) - printk_rl(KERN_WARNING - "md/raid:%s: read error not correctable " - "(sector %llu on %s).\n", - mdname(conf->mddev), - (unsigned long long)(sh->sector - + rdev->data_offset), - bdn); + printk_ratelimited( + KERN_WARNING + "md/raid:%s: read error not correctable " + "(sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)(sh->sector + + rdev->data_offset), + bdn); else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) /* Oh, no!!! */ - printk_rl(KERN_WARNING - "md/raid:%s: read error NOT corrected!! " - "(sector %llu on %s).\n", - mdname(conf->mddev), - (unsigned long long)(sh->sector - + rdev->data_offset), - bdn); + printk_ratelimited( + KERN_WARNING + "md/raid:%s: read error NOT corrected!! " + "(sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)(sh->sector + + rdev->data_offset), + bdn); else if (atomic_read(&rdev->read_errors) > conf->max_nr_stripes) printk(KERN_WARNING @@ -1649,6 +1675,8 @@ static void raid5_end_write_request(struct bio *bi, int error) raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks, i; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); + sector_t first_bad; + int bad_sectors; for (i=0 ; i<disks; i++) if (bi == &sh->dev[i].req) @@ -1662,8 +1690,12 @@ static void raid5_end_write_request(struct bio *bi, int error) return; } - if (!uptodate) - md_error(conf->mddev, conf->disks[i].rdev); + if (!uptodate) { + set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags); + set_bit(R5_WriteError, &sh->dev[i].flags); + } else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS, + &first_bad, &bad_sectors)) + set_bit(R5_MadeGood, &sh->dev[i].flags); rdev_dec_pending(conf->disks[i].rdev, conf->mddev); @@ -1710,6 +1742,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); } + set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT @@ -1760,7 +1793,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, /* * Select the parity disk based on the user selected algorithm. */ - pd_idx = qd_idx = ~0; + pd_idx = qd_idx = -1; switch(conf->level) { case 4: pd_idx = data_disks; @@ -2143,12 +2176,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in raid5_conf_t *conf = sh->raid_conf; int firstwrite=0; - pr_debug("adding bh b#%llu to stripe s#%llu\n", + pr_debug("adding bi b#%llu to stripe s#%llu\n", (unsigned long long)bi->bi_sector, (unsigned long long)sh->sector); - spin_lock(&sh->lock); spin_lock_irq(&conf->device_lock); if (forwrite) { bip = &sh->dev[dd_idx].towrite; @@ -2169,19 +2201,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in bi->bi_next = *bip; *bip = bi; bi->bi_phys_segments++; - spin_unlock_irq(&conf->device_lock); - spin_unlock(&sh->lock); - - pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", - (unsigned long long)bi->bi_sector, - (unsigned long long)sh->sector, dd_idx); - - if (conf->mddev->bitmap && firstwrite) { - bitmap_startwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0); - sh->bm_seq = conf->seq_flush+1; - set_bit(STRIPE_BIT_DELAY, &sh->state); - } if (forwrite) { /* check if page is covered */ @@ -2196,12 +2215,23 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); } + spin_unlock_irq(&conf->device_lock); + + pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", + (unsigned long long)(*bip)->bi_sector, + (unsigned long long)sh->sector, dd_idx); + + if (conf->mddev->bitmap && firstwrite) { + bitmap_startwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, 0); + sh->bm_seq = conf->seq_flush+1; + set_bit(STRIPE_BIT_DELAY, &sh->state); + } return 1; overlap: set_bit(R5_Overlap, &sh->dev[dd_idx].flags); spin_unlock_irq(&conf->device_lock); - spin_unlock(&sh->lock); return 0; } @@ -2238,9 +2268,18 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, rcu_read_lock(); rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) - /* multiple read failures in one stripe */ - md_error(conf->mddev, rdev); + atomic_inc(&rdev->nr_pending); + else + rdev = NULL; rcu_read_unlock(); + if (rdev) { + if (!rdev_set_badblocks( + rdev, + sh->sector, + STRIPE_SECTORS, 0)) + md_error(conf->mddev, rdev); + rdev_dec_pending(rdev, conf->mddev); + } } spin_lock_irq(&conf->device_lock); /* fail all writes first */ @@ -2308,6 +2347,10 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, if (bitmap_end) bitmap_endwrite(conf->mddev->bitmap, sh->sector, STRIPE_SECTORS, 0, 0); + /* If we were in the middle of a write the parity block might + * still be locked - so just clear all R5_LOCKED flags + */ + clear_bit(R5_LOCKED, &sh->dev[i].flags); } if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) @@ -2315,109 +2358,73 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, md_wakeup_thread(conf->mddev->thread); } -/* fetch_block5 - checks the given member device to see if its data needs - * to be read or computed to satisfy a request. - * - * Returns 1 when no more member devices need to be checked, otherwise returns - * 0 to tell the loop in handle_stripe_fill5 to continue - */ -static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s, - int disk_idx, int disks) -{ - struct r5dev *dev = &sh->dev[disk_idx]; - struct r5dev *failed_dev = &sh->dev[s->failed_num]; - - /* is the data in this block needed, and can we get it? */ - if (!test_bit(R5_LOCKED, &dev->flags) && - !test_bit(R5_UPTODATE, &dev->flags) && - (dev->toread || - (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || - s->syncing || s->expanding || - (s->failed && - (failed_dev->toread || - (failed_dev->towrite && - !test_bit(R5_OVERWRITE, &failed_dev->flags)))))) { - /* We would like to get this block, possibly by computing it, - * otherwise read it if the backing disk is insync - */ - if ((s->uptodate == disks - 1) && - (s->failed && disk_idx == s->failed_num)) { - set_bit(STRIPE_COMPUTE_RUN, &sh->state); - set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); - set_bit(R5_Wantcompute, &dev->flags); - sh->ops.target = disk_idx; - sh->ops.target2 = -1; - s->req_compute = 1; - /* Careful: from this point on 'uptodate' is in the eye - * of raid_run_ops which services 'compute' operations - * before writes. R5_Wantcompute flags a block that will - * be R5_UPTODATE by the time it is needed for a - * subsequent operation. - */ - s->uptodate++; - return 1; /* uptodate + compute == disks */ - } else if (test_bit(R5_Insync, &dev->flags)) { - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - s->locked++; - pr_debug("Reading block %d (sync=%d)\n", disk_idx, - s->syncing); - } - } - - return 0; -} - -/** - * handle_stripe_fill5 - read or compute data to satisfy pending requests. - */ -static void handle_stripe_fill5(struct stripe_head *sh, - struct stripe_head_state *s, int disks) +static void +handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh, + struct stripe_head_state *s) { + int abort = 0; int i; - /* look for blocks to read/compute, skip this if a compute - * is already in flight, or if the stripe contents are in the - * midst of changing due to a write + md_done_sync(conf->mddev, STRIPE_SECTORS, 0); + clear_bit(STRIPE_SYNCING, &sh->state); + s->syncing = 0; + /* There is nothing more to do for sync/check/repair. + * For recover we need to record a bad block on all + * non-sync devices, or abort the recovery */ - if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && - !sh->reconstruct_state) - for (i = disks; i--; ) - if (fetch_block5(sh, s, i, disks)) - break; - set_bit(STRIPE_HANDLE, &sh->state); + if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) + return; + /* During recovery devices cannot be removed, so locking and + * refcounting of rdevs is not needed + */ + for (i = 0; i < conf->raid_disks; i++) { + mdk_rdev_t *rdev = conf->disks[i].rdev; + if (!rdev + || test_bit(Faulty, &rdev->flags) + || test_bit(In_sync, &rdev->flags)) + continue; + if (!rdev_set_badblocks(rdev, sh->sector, + STRIPE_SECTORS, 0)) + abort = 1; + } + if (abort) { + conf->recovery_disabled = conf->mddev->recovery_disabled; + set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery); + } } -/* fetch_block6 - checks the given member device to see if its data needs +/* fetch_block - checks the given member device to see if its data needs * to be read or computed to satisfy a request. * * Returns 1 when no more member devices need to be checked, otherwise returns - * 0 to tell the loop in handle_stripe_fill6 to continue + * 0 to tell the loop in handle_stripe_fill to continue */ -static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, - struct r6_state *r6s, int disk_idx, int disks) +static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, + int disk_idx, int disks) { struct r5dev *dev = &sh->dev[disk_idx]; - struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]], - &sh->dev[r6s->failed_num[1]] }; + struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]], + &sh->dev[s->failed_num[1]] }; + /* is the data in this block needed, and can we get it? */ if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread || (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || s->syncing || s->expanding || - (s->failed >= 1 && - (fdev[0]->toread || s->to_write)) || - (s->failed >= 2 && - (fdev[1]->toread || s->to_write)))) { + (s->failed >= 1 && fdev[0]->toread) || + (s->failed >= 2 && fdev[1]->toread) || + (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && + !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || + (sh->raid_conf->level == 6 && s->failed && s->to_write))) { /* we would like to get this block, possibly by computing it, * otherwise read it if the backing disk is insync */ BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); BUG_ON(test_bit(R5_Wantread, &dev->flags)); if ((s->uptodate == disks - 1) && - (s->failed && (disk_idx == r6s->failed_num[0] || - disk_idx == r6s->failed_num[1]))) { + (s->failed && (disk_idx == s->failed_num[0] || + disk_idx == s->failed_num[1]))) { /* have disk failed, and we're requested to fetch it; * do compute it */ @@ -2429,6 +2436,12 @@ static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, sh->ops.target = disk_idx; sh->ops.target2 = -1; /* no 2nd target */ s->req_compute = 1; + /* Careful: from this point on 'uptodate' is in the eye + * of raid_run_ops which services 'compute' operations + * before writes. R5_Wantcompute flags a block that will + * be R5_UPTODATE by the time it is needed for a + * subsequent operation. + */ s->uptodate++; return 1; } else if (s->uptodate == disks-2 && s->failed >= 2) { @@ -2469,11 +2482,11 @@ static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, } /** - * handle_stripe_fill6 - read or compute data to satisfy pending requests. + * handle_stripe_fill - read or compute data to satisfy pending requests. */ -static void handle_stripe_fill6(struct stripe_head *sh, - struct stripe_head_state *s, struct r6_state *r6s, - int disks) +static void handle_stripe_fill(struct stripe_head *sh, + struct stripe_head_state *s, + int disks) { int i; @@ -2484,7 +2497,7 @@ static void handle_stripe_fill6(struct stripe_head *sh, if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && !sh->reconstruct_state) for (i = disks; i--; ) - if (fetch_block6(sh, s, r6s, i, disks)) + if (fetch_block(sh, s, i, disks)) break; set_bit(STRIPE_HANDLE, &sh->state); } @@ -2540,11 +2553,19 @@ static void handle_stripe_clean_event(raid5_conf_t *conf, md_wakeup_thread(conf->mddev->thread); } -static void handle_stripe_dirtying5(raid5_conf_t *conf, - struct stripe_head *sh, struct stripe_head_state *s, int disks) +static void handle_stripe_dirtying(raid5_conf_t *conf, + struct stripe_head *sh, + struct stripe_head_state *s, + int disks) { int rmw = 0, rcw = 0, i; - for (i = disks; i--; ) { + if (conf->max_degraded == 2) { + /* RAID6 requires 'rcw' in current implementation + * Calculate the real rcw later - for now fake it + * look like rcw is cheaper + */ + rcw = 1; rmw = 2; + } else for (i = disks; i--; ) { /* would I have to read this buffer for read_modify_write */ struct r5dev *dev = &sh->dev[i]; if ((dev->towrite || i == sh->pd_idx) && @@ -2591,16 +2612,19 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, } } } - if (rcw <= rmw && rcw > 0) + if (rcw <= rmw && rcw > 0) { /* want reconstruct write, but need to get some data */ + rcw = 0; for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (!test_bit(R5_OVERWRITE, &dev->flags) && - i != sh->pd_idx && + i != sh->pd_idx && i != sh->qd_idx && !test_bit(R5_LOCKED, &dev->flags) && !(test_bit(R5_UPTODATE, &dev->flags) || - test_bit(R5_Wantcompute, &dev->flags)) && - test_bit(R5_Insync, &dev->flags)) { + test_bit(R5_Wantcompute, &dev->flags))) { + rcw++; + if (!test_bit(R5_Insync, &dev->flags)) + continue; /* it's a failed drive */ if ( test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { pr_debug("Read_old block " @@ -2614,6 +2638,7 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, } } } + } /* now if nothing is locked, and if we have enough data, * we can start a write request */ @@ -2630,53 +2655,6 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, schedule_reconstruction(sh, s, rcw == 0, 0); } -static void handle_stripe_dirtying6(raid5_conf_t *conf, - struct stripe_head *sh, struct stripe_head_state *s, - struct r6_state *r6s, int disks) -{ - int rcw = 0, pd_idx = sh->pd_idx, i; - int qd_idx = sh->qd_idx; - - set_bit(STRIPE_HANDLE, &sh->state); - for (i = disks; i--; ) { - struct r5dev *dev = &sh->dev[i]; - /* check if we haven't enough data */ - if (!test_bit(R5_OVERWRITE, &dev->flags) && - i != pd_idx && i != qd_idx && - !test_bit(R5_LOCKED, &dev->flags) && - !(test_bit(R5_UPTODATE, &dev->flags) || - test_bit(R5_Wantcompute, &dev->flags))) { - rcw++; - if (!test_bit(R5_Insync, &dev->flags)) - continue; /* it's a failed drive */ - - if ( - test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { - pr_debug("Read_old stripe %llu " - "block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - s->locked++; - } else { - pr_debug("Request delayed stripe %llu " - "block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); - set_bit(STRIPE_DELAYED, &sh->state); - set_bit(STRIPE_HANDLE, &sh->state); - } - } - } - /* now if nothing is locked, and if we have enough data, we can start a - * write request - */ - if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && - s->locked == 0 && rcw == 0 && - !test_bit(STRIPE_BIT_DELAY, &sh->state)) { - schedule_reconstruction(sh, s, 1, 0); - } -} - static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, struct stripe_head_state *s, int disks) { @@ -2695,7 +2673,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, s->uptodate--; break; } - dev = &sh->dev[s->failed_num]; + dev = &sh->dev[s->failed_num[0]]; /* fall through */ case check_state_compute_result: sh->check_state = check_state_idle; @@ -2767,7 +2745,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, struct stripe_head_state *s, - struct r6_state *r6s, int disks) + int disks) { int pd_idx = sh->pd_idx; int qd_idx = sh->qd_idx; @@ -2786,14 +2764,14 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, switch (sh->check_state) { case check_state_idle: /* start a new check operation if there are < 2 failures */ - if (s->failed == r6s->q_failed) { + if (s->failed == s->q_failed) { /* The only possible failed device holds Q, so it * makes sense to check P (If anything else were failed, * we would have used P to recreate it). */ sh->check_state = check_state_run; } - if (!r6s->q_failed && s->failed < 2) { + if (!s->q_failed && s->failed < 2) { /* Q is not failed, and we didn't use it to generate * anything, so it makes sense to check it */ @@ -2835,13 +2813,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, */ BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ if (s->failed == 2) { - dev = &sh->dev[r6s->failed_num[1]]; + dev = &sh->dev[s->failed_num[1]]; s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); } if (s->failed >= 1) { - dev = &sh->dev[r6s->failed_num[0]]; + dev = &sh->dev[s->failed_num[0]]; s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); @@ -2928,8 +2906,7 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, } } -static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, - struct r6_state *r6s) +static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh) { int i; @@ -2971,7 +2948,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); for (j = 0; j < conf->raid_disks; j++) if (j != sh2->pd_idx && - (!r6s || j != sh2->qd_idx) && + j != sh2->qd_idx && !test_bit(R5_Expanded, &sh2->dev[j].flags)) break; if (j == conf->raid_disks) { @@ -3006,43 +2983,35 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, * */ -static void handle_stripe5(struct stripe_head *sh) +static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) { raid5_conf_t *conf = sh->raid_conf; - int disks = sh->disks, i; - struct bio *return_bi = NULL; - struct stripe_head_state s; + int disks = sh->disks; struct r5dev *dev; - mdk_rdev_t *blocked_rdev = NULL; - int prexor; - int dec_preread_active = 0; + int i; - memset(&s, 0, sizeof(s)); - pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d " - "reconstruct:%d\n", (unsigned long long)sh->sector, sh->state, - atomic_read(&sh->count), sh->pd_idx, sh->check_state, - sh->reconstruct_state); + memset(s, 0, sizeof(*s)); - spin_lock(&sh->lock); - clear_bit(STRIPE_HANDLE, &sh->state); - clear_bit(STRIPE_DELAYED, &sh->state); - - s.syncing = test_bit(STRIPE_SYNCING, &sh->state); - s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); - s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); + s->syncing = test_bit(STRIPE_SYNCING, &sh->state); + s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); + s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); + s->failed_num[0] = -1; + s->failed_num[1] = -1; /* Now to look around and see what can be done */ rcu_read_lock(); + spin_lock_irq(&conf->device_lock); for (i=disks; i--; ) { mdk_rdev_t *rdev; + sector_t first_bad; + int bad_sectors; + int is_bad = 0; dev = &sh->dev[i]; - pr_debug("check %d: state 0x%lx toread %p read %p write %p " - "written %p\n", i, dev->flags, dev->toread, dev->read, - dev->towrite, dev->written); - - /* maybe we can request a biofill operation + pr_debug("check %d: state 0x%lx read %p write %p written %p\n", + i, dev->flags, dev->toread, dev->towrite, dev->written); + /* maybe we can reply to a read * * new wantfill requests are only permitted while * ops_complete_biofill is guaranteed to be inactive @@ -3052,37 +3021,74 @@ static void handle_stripe5(struct stripe_head *sh) set_bit(R5_Wantfill, &dev->flags); /* now count some things */ - if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; - if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; - if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++; + if (test_bit(R5_LOCKED, &dev->flags)) + s->locked++; + if (test_bit(R5_UPTODATE, &dev->flags)) + s->uptodate++; + if (test_bit(R5_Wantcompute, &dev->flags)) { + s->compute++; + BUG_ON(s->compute > 2); + } if (test_bit(R5_Wantfill, &dev->flags)) - s.to_fill++; + s->to_fill++; else if (dev->toread) - s.to_read++; + s->to_read++; if (dev->towrite) { - s.to_write++; + s->to_write++; if (!test_bit(R5_OVERWRITE, &dev->flags)) - s.non_overwrite++; + s->non_overwrite++; } if (dev->written) - s.written++; + s->written++; rdev = rcu_dereference(conf->disks[i].rdev); - if (blocked_rdev == NULL && - rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - blocked_rdev = rdev; - atomic_inc(&rdev->nr_pending); + if (rdev) { + is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS, + &first_bad, &bad_sectors); + if (s->blocked_rdev == NULL + && (test_bit(Blocked, &rdev->flags) + || is_bad < 0)) { + if (is_bad < 0) + set_bit(BlockedBadBlocks, + &rdev->flags); + s->blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + } } clear_bit(R5_Insync, &dev->flags); if (!rdev) /* Not in-sync */; - else if (test_bit(In_sync, &rdev->flags)) + else if (is_bad) { + /* also not in-sync */ + if (!test_bit(WriteErrorSeen, &rdev->flags)) { + /* treat as in-sync, but with a read error + * which we can now try to correct + */ + set_bit(R5_Insync, &dev->flags); + set_bit(R5_ReadError, &dev->flags); + } + } else if (test_bit(In_sync, &rdev->flags)) set_bit(R5_Insync, &dev->flags); else { - /* could be in-sync depending on recovery/reshape status */ + /* in sync if before recovery_offset */ if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) set_bit(R5_Insync, &dev->flags); } + if (test_bit(R5_WriteError, &dev->flags)) { + clear_bit(R5_Insync, &dev->flags); + if (!test_bit(Faulty, &rdev->flags)) { + s->handle_bad_blocks = 1; + atomic_inc(&rdev->nr_pending); + } else + clear_bit(R5_WriteError, &dev->flags); + } + if (test_bit(R5_MadeGood, &dev->flags)) { + if (!test_bit(Faulty, &rdev->flags)) { + s->handle_bad_blocks = 1; + atomic_inc(&rdev->nr_pending); + } else + clear_bit(R5_MadeGood, &dev->flags); + } if (!test_bit(R5_Insync, &dev->flags)) { /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); @@ -3091,313 +3097,60 @@ static void handle_stripe5(struct stripe_head *sh) if (test_bit(R5_ReadError, &dev->flags)) clear_bit(R5_Insync, &dev->flags); if (!test_bit(R5_Insync, &dev->flags)) { - s.failed++; - s.failed_num = i; + if (s->failed < 2) + s->failed_num[s->failed] = i; + s->failed++; } } + spin_unlock_irq(&conf->device_lock); rcu_read_unlock(); - - if (unlikely(blocked_rdev)) { - if (s.syncing || s.expanding || s.expanded || - s.to_write || s.written) { - set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; - } - /* There is nothing for the blocked_rdev to block */ - rdev_dec_pending(blocked_rdev, conf->mddev); - blocked_rdev = NULL; - } - - if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { - set_bit(STRIPE_OP_BIOFILL, &s.ops_request); - set_bit(STRIPE_BIOFILL_RUN, &sh->state); - } - - pr_debug("locked=%d uptodate=%d to_read=%d" - " to_write=%d failed=%d failed_num=%d\n", - s.locked, s.uptodate, s.to_read, s.to_write, - s.failed, s.failed_num); - /* check if the array has lost two devices and, if so, some requests might - * need to be failed - */ - if (s.failed > 1 && s.to_read+s.to_write+s.written) - handle_failed_stripe(conf, sh, &s, disks, &return_bi); - if (s.failed > 1 && s.syncing) { - md_done_sync(conf->mddev, STRIPE_SECTORS,0); - clear_bit(STRIPE_SYNCING, &sh->state); - s.syncing = 0; - } - - /* might be able to return some write requests if the parity block - * is safe, or on a failed drive - */ - dev = &sh->dev[sh->pd_idx]; - if ( s.written && - ((test_bit(R5_Insync, &dev->flags) && - !test_bit(R5_LOCKED, &dev->flags) && - test_bit(R5_UPTODATE, &dev->flags)) || - (s.failed == 1 && s.failed_num == sh->pd_idx))) - handle_stripe_clean_event(conf, sh, disks, &return_bi); - - /* Now we might consider reading some blocks, either to check/generate - * parity, or to satisfy requests - * or to load a block that is being partially written. - */ - if (s.to_read || s.non_overwrite || - (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) - handle_stripe_fill5(sh, &s, disks); - - /* Now we check to see if any write operations have recently - * completed - */ - prexor = 0; - if (sh->reconstruct_state == reconstruct_state_prexor_drain_result) - prexor = 1; - if (sh->reconstruct_state == reconstruct_state_drain_result || - sh->reconstruct_state == reconstruct_state_prexor_drain_result) { - sh->reconstruct_state = reconstruct_state_idle; - - /* All the 'written' buffers and the parity block are ready to - * be written back to disk - */ - BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); - for (i = disks; i--; ) { - dev = &sh->dev[i]; - if (test_bit(R5_LOCKED, &dev->flags) && - (i == sh->pd_idx || dev->written)) { - pr_debug("Writing block %d\n", i); - set_bit(R5_Wantwrite, &dev->flags); - if (prexor) - continue; - if (!test_bit(R5_Insync, &dev->flags) || - (i == sh->pd_idx && s.failed == 0)) - set_bit(STRIPE_INSYNC, &sh->state); - } - } - if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - dec_preread_active = 1; - } - - /* Now to consider new write requests and what else, if anything - * should be read. We do not handle new writes when: - * 1/ A 'write' operation (copy+xor) is already in flight. - * 2/ A 'check' operation is in flight, as it may clobber the parity - * block. - */ - if (s.to_write && !sh->reconstruct_state && !sh->check_state) - handle_stripe_dirtying5(conf, sh, &s, disks); - - /* maybe we need to check and possibly fix the parity for this stripe - * Any reads will already have been scheduled, so we just see if enough - * data is available. The parity check is held off while parity - * dependent operations are in flight. - */ - if (sh->check_state || - (s.syncing && s.locked == 0 && - !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && - !test_bit(STRIPE_INSYNC, &sh->state))) - handle_parity_checks5(conf, sh, &s, disks); - - if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { - md_done_sync(conf->mddev, STRIPE_SECTORS,1); - clear_bit(STRIPE_SYNCING, &sh->state); - } - - /* If the failed drive is just a ReadError, then we might need to progress - * the repair/check process - */ - if (s.failed == 1 && !conf->mddev->ro && - test_bit(R5_ReadError, &sh->dev[s.failed_num].flags) - && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags) - && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags) - ) { - dev = &sh->dev[s.failed_num]; - if (!test_bit(R5_ReWrite, &dev->flags)) { - set_bit(R5_Wantwrite, &dev->flags); - set_bit(R5_ReWrite, &dev->flags); - set_bit(R5_LOCKED, &dev->flags); - s.locked++; - } else { - /* let's read it back */ - set_bit(R5_Wantread, &dev->flags); - set_bit(R5_LOCKED, &dev->flags); - s.locked++; - } - } - - /* Finish reconstruct operations initiated by the expansion process */ - if (sh->reconstruct_state == reconstruct_state_result) { - struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1, 1); - if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { - /* sh cannot be written until sh2 has been read. - * so arrange for sh to be delayed a little - */ - set_bit(STRIPE_DELAYED, &sh->state); - set_bit(STRIPE_HANDLE, &sh->state); - if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, - &sh2->state)) - atomic_inc(&conf->preread_active_stripes); - release_stripe(sh2); - goto unlock; - } - if (sh2) - release_stripe(sh2); - - sh->reconstruct_state = reconstruct_state_idle; - clear_bit(STRIPE_EXPANDING, &sh->state); - for (i = conf->raid_disks; i--; ) { - set_bit(R5_Wantwrite, &sh->dev[i].flags); - set_bit(R5_LOCKED, &sh->dev[i].flags); - s.locked++; - } - } - - if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && - !sh->reconstruct_state) { - /* Need to write out all blocks after computing parity */ - sh->disks = conf->raid_disks; - stripe_set_idx(sh->sector, conf, 0, sh); - schedule_reconstruction(sh, &s, 1, 1); - } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { - clear_bit(STRIPE_EXPAND_READY, &sh->state); - atomic_dec(&conf->reshape_stripes); - wake_up(&conf->wait_for_overlap); - md_done_sync(conf->mddev, STRIPE_SECTORS, 1); - } - - if (s.expanding && s.locked == 0 && - !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) - handle_stripe_expansion(conf, sh, NULL); - - unlock: - spin_unlock(&sh->lock); - - /* wait for this device to become unblocked */ - if (unlikely(blocked_rdev)) - md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); - - if (s.ops_request) - raid_run_ops(sh, s.ops_request); - - ops_run_io(sh, &s); - - if (dec_preread_active) { - /* We delay this until after ops_run_io so that if make_request - * is waiting on a flush, it won't continue until the writes - * have actually been submitted. - */ - atomic_dec(&conf->preread_active_stripes); - if (atomic_read(&conf->preread_active_stripes) < - IO_THRESHOLD) - md_wakeup_thread(conf->mddev->thread); - } - return_io(return_bi); } -static void handle_stripe6(struct stripe_head *sh) +static void handle_stripe(struct stripe_head *sh) { + struct stripe_head_state s; raid5_conf_t *conf = sh->raid_conf; + int i; + int prexor; int disks = sh->disks; - struct bio *return_bi = NULL; - int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx; - struct stripe_head_state s; - struct r6_state r6s; - struct r5dev *dev, *pdev, *qdev; - mdk_rdev_t *blocked_rdev = NULL; - int dec_preread_active = 0; + struct r5dev *pdev, *qdev; + + clear_bit(STRIPE_HANDLE, &sh->state); + if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) { + /* already being handled, ensure it gets handled + * again when current action finishes */ + set_bit(STRIPE_HANDLE, &sh->state); + return; + } + + if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { + set_bit(STRIPE_SYNCING, &sh->state); + clear_bit(STRIPE_INSYNC, &sh->state); + } + clear_bit(STRIPE_DELAYED, &sh->state); pr_debug("handling stripe %llu, state=%#lx cnt=%d, " "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", (unsigned long long)sh->sector, sh->state, - atomic_read(&sh->count), pd_idx, qd_idx, + atomic_read(&sh->count), sh->pd_idx, sh->qd_idx, sh->check_state, sh->reconstruct_state); - memset(&s, 0, sizeof(s)); - - spin_lock(&sh->lock); - clear_bit(STRIPE_HANDLE, &sh->state); - clear_bit(STRIPE_DELAYED, &sh->state); - - s.syncing = test_bit(STRIPE_SYNCING, &sh->state); - s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); - s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); - /* Now to look around and see what can be done */ - - rcu_read_lock(); - for (i=disks; i--; ) { - mdk_rdev_t *rdev; - dev = &sh->dev[i]; - pr_debug("check %d: state 0x%lx read %p write %p written %p\n", - i, dev->flags, dev->toread, dev->towrite, dev->written); - /* maybe we can reply to a read - * - * new wantfill requests are only permitted while - * ops_complete_biofill is guaranteed to be inactive - */ - if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && - !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) - set_bit(R5_Wantfill, &dev->flags); + analyse_stripe(sh, &s); - /* now count some things */ - if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; - if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; - if (test_bit(R5_Wantcompute, &dev->flags)) { - s.compute++; - BUG_ON(s.compute > 2); - } - - if (test_bit(R5_Wantfill, &dev->flags)) { - s.to_fill++; - } else if (dev->toread) - s.to_read++; - if (dev->towrite) { - s.to_write++; - if (!test_bit(R5_OVERWRITE, &dev->flags)) - s.non_overwrite++; - } - if (dev->written) - s.written++; - rdev = rcu_dereference(conf->disks[i].rdev); - if (blocked_rdev == NULL && - rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - blocked_rdev = rdev; - atomic_inc(&rdev->nr_pending); - } - clear_bit(R5_Insync, &dev->flags); - if (!rdev) - /* Not in-sync */; - else if (test_bit(In_sync, &rdev->flags)) - set_bit(R5_Insync, &dev->flags); - else { - /* in sync if before recovery_offset */ - if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) - set_bit(R5_Insync, &dev->flags); - } - if (!test_bit(R5_Insync, &dev->flags)) { - /* The ReadError flag will just be confusing now */ - clear_bit(R5_ReadError, &dev->flags); - clear_bit(R5_ReWrite, &dev->flags); - } - if (test_bit(R5_ReadError, &dev->flags)) - clear_bit(R5_Insync, &dev->flags); - if (!test_bit(R5_Insync, &dev->flags)) { - if (s.failed < 2) - r6s.failed_num[s.failed] = i; - s.failed++; - } + if (s.handle_bad_blocks) { + set_bit(STRIPE_HANDLE, &sh->state); + goto finish; } - rcu_read_unlock(); - if (unlikely(blocked_rdev)) { + if (unlikely(s.blocked_rdev)) { if (s.syncing || s.expanding || s.expanded || s.to_write || s.written) { set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; + goto finish; } /* There is nothing for the blocked_rdev to block */ - rdev_dec_pending(blocked_rdev, conf->mddev); - blocked_rdev = NULL; + rdev_dec_pending(s.blocked_rdev, conf->mddev); + s.blocked_rdev = NULL; } if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { @@ -3408,83 +3161,88 @@ static void handle_stripe6(struct stripe_head *sh) pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, - r6s.failed_num[0], r6s.failed_num[1]); - /* check if the array has lost >2 devices and, if so, some requests - * might need to be failed + s.failed_num[0], s.failed_num[1]); + /* check if the array has lost more than max_degraded devices and, + * if so, some requests might need to be failed. */ - if (s.failed > 2 && s.to_read+s.to_write+s.written) - handle_failed_stripe(conf, sh, &s, disks, &return_bi); - if (s.failed > 2 && s.syncing) { - md_done_sync(conf->mddev, STRIPE_SECTORS,0); - clear_bit(STRIPE_SYNCING, &sh->state); - s.syncing = 0; - } + if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written) + handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); + if (s.failed > conf->max_degraded && s.syncing) + handle_failed_sync(conf, sh, &s); /* * might be able to return some write requests if the parity blocks * are safe, or on a failed drive */ - pdev = &sh->dev[pd_idx]; - r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx) - || (s.failed >= 2 && r6s.failed_num[1] == pd_idx); - qdev = &sh->dev[qd_idx]; - r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx) - || (s.failed >= 2 && r6s.failed_num[1] == qd_idx); - - if ( s.written && - ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags) + pdev = &sh->dev[sh->pd_idx]; + s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx) + || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx); + qdev = &sh->dev[sh->qd_idx]; + s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx) + || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx) + || conf->level < 6; + + if (s.written && + (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) && !test_bit(R5_LOCKED, &pdev->flags) && test_bit(R5_UPTODATE, &pdev->flags)))) && - ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags) + (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) && !test_bit(R5_LOCKED, &qdev->flags) && test_bit(R5_UPTODATE, &qdev->flags))))) - handle_stripe_clean_event(conf, sh, disks, &return_bi); + handle_stripe_clean_event(conf, sh, disks, &s.return_bi); /* Now we might consider reading some blocks, either to check/generate * parity, or to satisfy requests * or to load a block that is being partially written. */ - if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || - (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) - handle_stripe_fill6(sh, &s, &r6s, disks); + if (s.to_read || s.non_overwrite + || (conf->level == 6 && s.to_write && s.failed) + || (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) + handle_stripe_fill(sh, &s, disks); /* Now we check to see if any write operations have recently * completed */ - if (sh->reconstruct_state == reconstruct_state_drain_result) { - + prexor = 0; + if (sh->reconstruct_state == reconstruct_state_prexor_drain_result) + prexor = 1; + if (sh->reconstruct_state == reconstruct_state_drain_result || + sh->reconstruct_state == reconstruct_state_prexor_drain_result) { sh->reconstruct_state = reconstruct_state_idle; - /* All the 'written' buffers and the parity blocks are ready to + + /* All the 'written' buffers and the parity block are ready to * be written back to disk */ BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); - BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags)); + BUG_ON(sh->qd_idx >= 0 && + !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags)); for (i = disks; i--; ) { - dev = &sh->dev[i]; + struct r5dev *dev = &sh->dev[i]; if (test_bit(R5_LOCKED, &dev->flags) && - (i == sh->pd_idx || i == qd_idx || - dev->written)) { + (i == sh->pd_idx || i == sh->qd_idx || + dev->written)) { pr_debug("Writing block %d\n", i); - BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); set_bit(R5_Wantwrite, &dev->flags); + if (prexor) + continue; if (!test_bit(R5_Insync, &dev->flags) || - ((i == sh->pd_idx || i == qd_idx) && - s.failed == 0)) + ((i == sh->pd_idx || i == sh->qd_idx) && + s.failed == 0)) set_bit(STRIPE_INSYNC, &sh->state); } } if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - dec_preread_active = 1; + s.dec_preread_active = 1; } /* Now to consider new write requests and what else, if anything * should be read. We do not handle new writes when: - * 1/ A 'write' operation (copy+gen_syndrome) is already in flight. + * 1/ A 'write' operation (copy+xor) is already in flight. * 2/ A 'check' operation is in flight, as it may clobber the parity * block. */ if (s.to_write && !sh->reconstruct_state && !sh->check_state) - handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); + handle_stripe_dirtying(conf, sh, &s, disks); /* maybe we need to check and possibly fix the parity for this stripe * Any reads will already have been scheduled, so we just see if enough @@ -3494,20 +3252,24 @@ static void handle_stripe6(struct stripe_head *sh) if (sh->check_state || (s.syncing && s.locked == 0 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && - !test_bit(STRIPE_INSYNC, &sh->state))) - handle_parity_checks6(conf, sh, &s, &r6s, disks); + !test_bit(STRIPE_INSYNC, &sh->state))) { + if (conf->level == 6) + handle_parity_checks6(conf, sh, &s, disks); + else + handle_parity_checks5(conf, sh, &s, disks); + } if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { - md_done_sync(conf->mddev, STRIPE_SECTORS,1); + md_done_sync(conf->mddev, STRIPE_SECTORS, 1); clear_bit(STRIPE_SYNCING, &sh->state); } /* If the failed drives are just a ReadError, then we might need * to progress the repair/check process */ - if (s.failed <= 2 && !conf->mddev->ro) + if (s.failed <= conf->max_degraded && !conf->mddev->ro) for (i = 0; i < s.failed; i++) { - dev = &sh->dev[r6s.failed_num[i]]; + struct r5dev *dev = &sh->dev[s.failed_num[i]]; if (test_bit(R5_ReadError, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) && test_bit(R5_UPTODATE, &dev->flags) @@ -3526,8 +3288,26 @@ static void handle_stripe6(struct stripe_head *sh) } } + /* Finish reconstruct operations initiated by the expansion process */ if (sh->reconstruct_state == reconstruct_state_result) { + struct stripe_head *sh_src + = get_active_stripe(conf, sh->sector, 1, 1, 1); + if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) { + /* sh cannot be written until sh_src has been read. + * so arrange for sh to be delayed a little + */ + set_bit(STRIPE_DELAYED, &sh->state); + set_bit(STRIPE_HANDLE, &sh->state); + if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, + &sh_src->state)) + atomic_inc(&conf->preread_active_stripes); + release_stripe(sh_src); + goto finish; + } + if (sh_src) + release_stripe(sh_src); + sh->reconstruct_state = reconstruct_state_idle; clear_bit(STRIPE_EXPANDING, &sh->state); for (i = conf->raid_disks; i--; ) { @@ -3539,24 +3319,7 @@ static void handle_stripe6(struct stripe_head *sh) if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && !sh->reconstruct_state) { - struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1, 1); - if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { - /* sh cannot be written until sh2 has been read. - * so arrange for sh to be delayed a little - */ - set_bit(STRIPE_DELAYED, &sh->state); - set_bit(STRIPE_HANDLE, &sh->state); - if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, - &sh2->state)) - atomic_inc(&conf->preread_active_stripes); - release_stripe(sh2); - goto unlock; - } - if (sh2) - release_stripe(sh2); - - /* Need to write out all blocks after computing P&Q */ + /* Need to write out all blocks after computing parity */ sh->disks = conf->raid_disks; stripe_set_idx(sh->sector, conf, 0, sh); schedule_reconstruction(sh, &s, 1, 1); @@ -3569,22 +3332,39 @@ static void handle_stripe6(struct stripe_head *sh) if (s.expanding && s.locked == 0 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) - handle_stripe_expansion(conf, sh, &r6s); - - unlock: - spin_unlock(&sh->lock); + handle_stripe_expansion(conf, sh); +finish: /* wait for this device to become unblocked */ - if (unlikely(blocked_rdev)) - md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); + if (unlikely(s.blocked_rdev)) + md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); + + if (s.handle_bad_blocks) + for (i = disks; i--; ) { + mdk_rdev_t *rdev; + struct r5dev *dev = &sh->dev[i]; + if (test_and_clear_bit(R5_WriteError, &dev->flags)) { + /* We own a safe reference to the rdev */ + rdev = conf->disks[i].rdev; + if (!rdev_set_badblocks(rdev, sh->sector, + STRIPE_SECTORS, 0)) + md_error(conf->mddev, rdev); + rdev_dec_pending(rdev, conf->mddev); + } + if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { + rdev = conf->disks[i].rdev; + rdev_clear_badblocks(rdev, sh->sector, + STRIPE_SECTORS); + rdev_dec_pending(rdev, conf->mddev); + } + } if (s.ops_request) raid_run_ops(sh, s.ops_request); ops_run_io(sh, &s); - - if (dec_preread_active) { + if (s.dec_preread_active) { /* We delay this until after ops_run_io so that if make_request * is waiting on a flush, it won't continue until the writes * have actually been submitted. @@ -3595,15 +3375,9 @@ static void handle_stripe6(struct stripe_head *sh) md_wakeup_thread(conf->mddev->thread); } - return_io(return_bi); -} + return_io(s.return_bi); -static void handle_stripe(struct stripe_head *sh) -{ - if (sh->raid_conf->level == 6) - handle_stripe6(sh); - else - handle_stripe5(sh); + clear_bit(STRIPE_ACTIVE, &sh->state); } static void raid5_activate_delayed(raid5_conf_t *conf) @@ -3833,6 +3607,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) rcu_read_lock(); rdev = rcu_dereference(conf->disks[dd_idx].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) { + sector_t first_bad; + int bad_sectors; + atomic_inc(&rdev->nr_pending); rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; @@ -3840,8 +3617,10 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); align_bi->bi_sector += rdev->data_offset; - if (!bio_fits_rdev(align_bi)) { - /* too big in some way */ + if (!bio_fits_rdev(align_bi) || + is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, + &first_bad, &bad_sectors)) { + /* too big in some way, or has a known bad block */ bio_put(align_bi); rdev_dec_pending(rdev, mddev); return 0; @@ -4016,7 +3795,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) } } - if (bio_data_dir(bi) == WRITE && + if (rw == WRITE && logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { release_stripe(sh); @@ -4034,7 +3813,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) } if (test_bit(STRIPE_EXPANDING, &sh->state) || - !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { + !add_stripe_bio(sh, bi, dd_idx, rw)) { /* Stripe is busy expanding or * add failed due to overlap. Flush everything * and wait a while @@ -4375,10 +4154,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); - spin_lock(&sh->lock); - set_bit(STRIPE_SYNCING, &sh->state); - clear_bit(STRIPE_INSYNC, &sh->state); - spin_unlock(&sh->lock); + set_bit(STRIPE_SYNC_REQUESTED, &sh->state); handle_stripe(sh); release_stripe(sh); @@ -4509,6 +4285,9 @@ static void raid5d(mddev_t *mddev) release_stripe(sh); cond_resched(); + if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) + md_check_recovery(mddev); + spin_lock_irq(&conf->device_lock); } pr_debug("%d stripes handled\n", handled); @@ -5313,6 +5092,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number) * isn't possible. */ if (!test_bit(Faulty, &rdev->flags) && + mddev->recovery_disabled != conf->recovery_disabled && !has_failed(conf) && number < conf->raid_disks) { err = -EBUSY; @@ -5341,6 +5121,9 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) int first = 0; int last = conf->raid_disks - 1; + if (mddev->recovery_disabled == conf->recovery_disabled) + return -EBUSY; + if (has_failed(conf)) /* no point adding a device */ return -EINVAL; @@ -5519,16 +5302,14 @@ static int raid5_start_reshape(mddev_t *mddev) if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) { if (raid5_add_disk(mddev, rdev) == 0) { - char nm[20]; if (rdev->raid_disk >= conf->previous_raid_disks) { set_bit(In_sync, &rdev->flags); added_devices++; } else rdev->recovery_offset = 0; - sprintf(nm, "rd%d", rdev->raid_disk); - if (sysfs_create_link(&mddev->kobj, - &rdev->kobj, nm)) + + if (sysfs_link_rdev(mddev, rdev)) /* Failure here is OK */; } } else if (rdev->raid_disk >= conf->previous_raid_disks @@ -5624,9 +5405,7 @@ static void raid5_finish_reshape(mddev_t *mddev) d++) { mdk_rdev_t *rdev = conf->disks[d].rdev; if (rdev && raid5_remove_disk(mddev, d) == 0) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; } } diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 3ca77a2613ba..11b9566184b2 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -6,11 +6,11 @@ /* * - * Each stripe contains one buffer per disc. Each buffer can be in + * Each stripe contains one buffer per device. Each buffer can be in * one of a number of states stored in "flags". Changes between - * these states happen *almost* exclusively under a per-stripe - * spinlock. Some very specific changes can happen in bi_end_io, and - * these are not protected by the spin lock. + * these states happen *almost* exclusively under the protection of the + * STRIPE_ACTIVE flag. Some very specific changes can happen in bi_end_io, and + * these are not protected by STRIPE_ACTIVE. * * The flag bits that are used to represent these states are: * R5_UPTODATE and R5_LOCKED @@ -76,12 +76,10 @@ * block and the cached buffer are successfully written, any buffer on * a written list can be returned with b_end_io. * - * The write list and read list both act as fifos. The read list is - * protected by the device_lock. The write and written lists are - * protected by the stripe lock. The device_lock, which can be - * claimed while the stipe lock is held, is only for list - * manipulations and will only be held for a very short time. It can - * be claimed from interrupts. + * The write list and read list both act as fifos. The read list, + * write list and written list are protected by the device_lock. + * The device_lock is only for list manipulations and will only be + * held for a very short time. It can be claimed from interrupts. * * * Stripes in the stripe cache can be on one of two lists (or on @@ -96,7 +94,6 @@ * * The inactive_list, handle_list and hash bucket lists are all protected by the * device_lock. - * - stripes on the inactive_list never have their stripe_lock held. * - stripes have a reference counter. If count==0, they are on a list. * - If a stripe might need handling, STRIPE_HANDLE is set. * - When refcount reaches zero, then if STRIPE_HANDLE it is put on @@ -116,10 +113,10 @@ * attach a request to an active stripe (add_stripe_bh()) * lockdev attach-buffer unlockdev * handle a stripe (handle_stripe()) - * lockstripe clrSTRIPE_HANDLE ... + * setSTRIPE_ACTIVE, clrSTRIPE_HANDLE ... * (lockdev check-buffers unlockdev) .. * change-state .. - * record io/ops needed unlockstripe schedule io/ops + * record io/ops needed clearSTRIPE_ACTIVE schedule io/ops * release an active stripe (release_stripe()) * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev * @@ -128,8 +125,7 @@ * on a cached buffer, and plus one if the stripe is undergoing stripe * operations. * - * Stripe operations are performed outside the stripe lock, - * the stripe operations are: + * The stripe operations are: * -copying data between the stripe cache and user application buffers * -computing blocks to save a disk access, or to recover a missing block * -updating the parity on a write operation (reconstruct write and @@ -159,7 +155,8 @@ */ /* - * Operations state - intermediate states that are visible outside of sh->lock + * Operations state - intermediate states that are visible outside of + * STRIPE_ACTIVE. * In general _idle indicates nothing is running, _run indicates a data * processing operation is active, and _result means the data processing result * is stable and can be acted upon. For simple operations like biofill and @@ -209,7 +206,6 @@ struct stripe_head { short ddf_layout;/* use DDF ordering to calculate Q */ unsigned long state; /* state flags */ atomic_t count; /* nr of active thread/requests */ - spinlock_t lock; int bm_seq; /* sequence number for bitmap flushes */ int disks; /* disks in stripe */ enum check_states check_state; @@ -240,19 +236,20 @@ struct stripe_head { }; /* stripe_head_state - collects and tracks the dynamic state of a stripe_head - * for handle_stripe. It is only valid under spin_lock(sh->lock); + * for handle_stripe. */ struct stripe_head_state { int syncing, expanding, expanded; int locked, uptodate, to_read, to_write, failed, written; int to_fill, compute, req_compute, non_overwrite; - int failed_num; + int failed_num[2]; + int p_failed, q_failed; + int dec_preread_active; unsigned long ops_request; -}; -/* r6_state - extra state data only relevant to r6 */ -struct r6_state { - int p_failed, q_failed, failed_num[2]; + struct bio *return_bi; + mdk_rdev_t *blocked_rdev; + int handle_bad_blocks; }; /* Flags */ @@ -268,14 +265,16 @@ struct r6_state { #define R5_ReWrite 9 /* have tried to over-write the readerror */ #define R5_Expanded 10 /* This block now has post-expand data */ -#define R5_Wantcompute 11 /* compute_block in progress treat as - * uptodate - */ -#define R5_Wantfill 12 /* dev->toread contains a bio that needs - * filling - */ -#define R5_Wantdrain 13 /* dev->towrite needs to be drained */ -#define R5_WantFUA 14 /* Write should be FUA */ +#define R5_Wantcompute 11 /* compute_block in progress treat as + * uptodate + */ +#define R5_Wantfill 12 /* dev->toread contains a bio that needs + * filling + */ +#define R5_Wantdrain 13 /* dev->towrite needs to be drained */ +#define R5_WantFUA 14 /* Write should be FUA */ +#define R5_WriteError 15 /* got a write error - need to record it */ +#define R5_MadeGood 16 /* A bad block has been fixed by writing to it*/ /* * Write method */ @@ -289,21 +288,25 @@ struct r6_state { /* * Stripe state */ -#define STRIPE_HANDLE 2 -#define STRIPE_SYNCING 3 -#define STRIPE_INSYNC 4 -#define STRIPE_PREREAD_ACTIVE 5 -#define STRIPE_DELAYED 6 -#define STRIPE_DEGRADED 7 -#define STRIPE_BIT_DELAY 8 -#define STRIPE_EXPANDING 9 -#define STRIPE_EXPAND_SOURCE 10 -#define STRIPE_EXPAND_READY 11 -#define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */ -#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ -#define STRIPE_BIOFILL_RUN 14 -#define STRIPE_COMPUTE_RUN 15 -#define STRIPE_OPS_REQ_PENDING 16 +enum { + STRIPE_ACTIVE, + STRIPE_HANDLE, + STRIPE_SYNC_REQUESTED, + STRIPE_SYNCING, + STRIPE_INSYNC, + STRIPE_PREREAD_ACTIVE, + STRIPE_DELAYED, + STRIPE_DEGRADED, + STRIPE_BIT_DELAY, + STRIPE_EXPANDING, + STRIPE_EXPAND_SOURCE, + STRIPE_EXPAND_READY, + STRIPE_IO_STARTED, /* do not count towards 'bypass_count' */ + STRIPE_FULL_WRITE, /* all blocks are set to be overwritten */ + STRIPE_BIOFILL_RUN, + STRIPE_COMPUTE_RUN, + STRIPE_OPS_REQ_PENDING, +}; /* * Operation request flags @@ -336,7 +339,7 @@ struct r6_state { * PREREAD_ACTIVE. * In stripe_handle, if we find pre-reading is necessary, we do it if * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue. - * HANDLE gets cleared if stripe_handle leave nothing locked. + * HANDLE gets cleared if stripe_handle leaves nothing locked. */ @@ -399,7 +402,7 @@ struct raid5_private_data { * (fresh device added). * Cleared when a sync completes. */ - + int recovery_disabled; /* per cpu variables */ struct raid5_percpu { struct page *spare_page; /* Used when checking P/Q in raid6 */ diff --git a/drivers/net/Makefile b/drivers/net/Makefile index b7622c3745fa..e1eca2ab505e 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -282,6 +282,7 @@ obj-$(CONFIG_USB_HSO) += usb/ obj-$(CONFIG_USB_USBNET) += usb/ obj-$(CONFIG_USB_ZD1201) += usb/ obj-$(CONFIG_USB_IPHETH) += usb/ +obj-$(CONFIG_USB_CDC_PHONET) += usb/ obj-$(CONFIG_WLAN) += wireless/ obj-$(CONFIG_NET_TULIP) += tulip/ diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index 536038b22710..31798f5f5d06 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -1502,13 +1502,13 @@ static int __devinit ace_init(struct net_device *dev) * firmware to wipe the ring without re-initializing it. */ if (!test_and_set_bit(0, &ap->std_refill_busy)) - ace_load_std_rx_ring(ap, RX_RING_SIZE); + ace_load_std_rx_ring(dev, RX_RING_SIZE); else printk(KERN_ERR "%s: Someone is busy refilling the RX ring\n", ap->name); if (ap->version >= 2) { if (!test_and_set_bit(0, &ap->mini_refill_busy)) - ace_load_mini_rx_ring(ap, RX_MINI_SIZE); + ace_load_mini_rx_ring(dev, RX_MINI_SIZE); else printk(KERN_ERR "%s: Someone is busy refilling " "the RX mini ring\n", ap->name); @@ -1584,9 +1584,10 @@ static void ace_watchdog(struct net_device *data) } -static void ace_tasklet(unsigned long dev) +static void ace_tasklet(unsigned long arg) { - struct ace_private *ap = netdev_priv((struct net_device *)dev); + struct net_device *dev = (struct net_device *) arg; + struct ace_private *ap = netdev_priv(dev); int cur_size; cur_size = atomic_read(&ap->cur_rx_bufs); @@ -1595,7 +1596,7 @@ static void ace_tasklet(unsigned long dev) #ifdef DEBUG printk("refilling buffers (current %i)\n", cur_size); #endif - ace_load_std_rx_ring(ap, RX_RING_SIZE - cur_size); + ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size); } if (ap->version >= 2) { @@ -1606,7 +1607,7 @@ static void ace_tasklet(unsigned long dev) printk("refilling mini buffers (current %i)\n", cur_size); #endif - ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size); + ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size); } } @@ -1616,7 +1617,7 @@ static void ace_tasklet(unsigned long dev) #ifdef DEBUG printk("refilling jumbo buffers (current %i)\n", cur_size); #endif - ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size); + ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size); } ap->tasklet_pending = 0; } @@ -1642,8 +1643,9 @@ static void ace_dump_trace(struct ace_private *ap) * done only before the device is enabled, thus no interrupts are * generated and by the interrupt handler/tasklet handler. */ -static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs) +static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs) { + struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; short i, idx; @@ -1657,11 +1659,10 @@ static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs) struct rx_desc *rd; dma_addr_t mapping; - skb = dev_alloc_skb(ACE_STD_BUFSIZE + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(dev, ACE_STD_BUFSIZE); if (!skb) break; - skb_reserve(skb, NET_IP_ALIGN); mapping = pci_map_page(ap->pdev, virt_to_page(skb->data), offset_in_page(skb->data), ACE_STD_BUFSIZE, @@ -1705,8 +1706,9 @@ static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs) } -static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs) +static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs) { + struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; short i, idx; @@ -1718,11 +1720,10 @@ static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs) struct rx_desc *rd; dma_addr_t mapping; - skb = dev_alloc_skb(ACE_MINI_BUFSIZE + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(dev, ACE_MINI_BUFSIZE); if (!skb) break; - skb_reserve(skb, NET_IP_ALIGN); mapping = pci_map_page(ap->pdev, virt_to_page(skb->data), offset_in_page(skb->data), ACE_MINI_BUFSIZE, @@ -1762,8 +1763,9 @@ static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs) * Load the jumbo rx ring, this may happen at any time if the MTU * is changed to a value > 1500. */ -static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs) +static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs) { + struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; short i, idx; @@ -1774,11 +1776,10 @@ static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs) struct rx_desc *rd; dma_addr_t mapping; - skb = dev_alloc_skb(ACE_JUMBO_BUFSIZE + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(dev, ACE_JUMBO_BUFSIZE); if (!skb) break; - skb_reserve(skb, NET_IP_ALIGN); mapping = pci_map_page(ap->pdev, virt_to_page(skb->data), offset_in_page(skb->data), ACE_JUMBO_BUFSIZE, @@ -2196,7 +2197,7 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) #ifdef DEBUG printk("low on std buffers %i\n", cur_size); #endif - ace_load_std_rx_ring(ap, + ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size); } else run_tasklet = 1; @@ -2212,7 +2213,8 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) printk("low on mini buffers %i\n", cur_size); #endif - ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size); + ace_load_mini_rx_ring(dev, + RX_MINI_SIZE - cur_size); } else run_tasklet = 1; } @@ -2228,7 +2230,8 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) printk("low on jumbo buffers %i\n", cur_size); #endif - ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size); + ace_load_jumbo_rx_ring(dev, + RX_JUMBO_SIZE - cur_size); } else run_tasklet = 1; } @@ -2267,7 +2270,7 @@ static int ace_open(struct net_device *dev) if (ap->jumbo && !test_and_set_bit(0, &ap->jumbo_refill_busy)) - ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE); + ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE); if (dev->flags & IFF_PROMISC) { cmd.evt = C_SET_PROMISC_MODE; @@ -2575,7 +2578,7 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu) "support\n", dev->name); ap->jumbo = 1; if (!test_and_set_bit(0, &ap->jumbo_refill_busy)) - ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE); + ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE); ace_set_rxtx_parms(dev, 1); } } else { diff --git a/drivers/net/acenic.h b/drivers/net/acenic.h index f67dc9b0eb80..51c486cfbb8c 100644 --- a/drivers/net/acenic.h +++ b/drivers/net/acenic.h @@ -766,9 +766,9 @@ static inline void ace_unmask_irq(struct net_device *dev) * Prototypes */ static int ace_init(struct net_device *dev); -static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs); -static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs); -static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs); +static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs); +static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs); +static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs); static irqreturn_t ace_interrupt(int irq, void *dev_id); static int ace_load_firmware(struct net_device *dev); static int ace_open(struct net_device *dev); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 02842d05c11f..38a83acd502e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1557,8 +1557,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (slave_dev->type != ARPHRD_ETHER) bond_setup_by_slave(bond_dev, slave_dev); - else + else { ether_setup(bond_dev); + bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; + } netdev_bonding_change(bond_dev, NETDEV_POST_TYPE_CHANGE); @@ -4330,7 +4332,7 @@ static void bond_setup(struct net_device *bond_dev) bond_dev->tx_queue_len = 0; bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; bond_dev->priv_flags |= IFF_BONDING; - bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); /* At first, we block adding VLANs. That's the only way to * prevent problems that occur when adding VLANs over an @@ -4691,7 +4693,7 @@ static int bond_check_params(struct bond_params *params) /* miimon and arp_interval not set, we need one so things * work as expected, see bonding.txt for details */ - pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n"); + pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n"); } if (primary && !USES_PRIMARY(bond_mode)) { diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index b60835f58650..2dfb4bf90087 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1025,6 +1025,7 @@ static ssize_t bonding_store_primary(struct device *d, int i; struct slave *slave; struct bonding *bond = to_bond(d); + char ifname[IFNAMSIZ]; if (!rtnl_trylock()) return restart_syscall(); @@ -1035,32 +1036,33 @@ static ssize_t bonding_store_primary(struct device *d, if (!USES_PRIMARY(bond->params.mode)) { pr_info("%s: Unable to set primary slave; %s is in mode %d\n", bond->dev->name, bond->dev->name, bond->params.mode); - } else { - bond_for_each_slave(bond, slave, i) { - if (strnicmp - (slave->dev->name, buf, - strlen(slave->dev->name)) == 0) { - pr_info("%s: Setting %s as primary slave.\n", - bond->dev->name, slave->dev->name); - bond->primary_slave = slave; - strcpy(bond->params.primary, slave->dev->name); - bond_select_active_slave(bond); - goto out; - } - } + goto out; + } - /* if we got here, then we didn't match the name of any slave */ + sscanf(buf, "%16s", ifname); /* IFNAMSIZ */ - if (strlen(buf) == 0 || buf[0] == '\n') { - pr_info("%s: Setting primary slave to None.\n", - bond->dev->name); - bond->primary_slave = NULL; - bond_select_active_slave(bond); - } else { - pr_info("%s: Unable to set %.*s as primary slave as it is not a slave.\n", - bond->dev->name, (int)strlen(buf) - 1, buf); + /* check to see if we are clearing primary */ + if (!strlen(ifname) || buf[0] == '\n') { + pr_info("%s: Setting primary slave to None.\n", + bond->dev->name); + bond->primary_slave = NULL; + bond_select_active_slave(bond); + goto out; + } + + bond_for_each_slave(bond, slave, i) { + if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { + pr_info("%s: Setting %s as primary slave.\n", + bond->dev->name, slave->dev->name); + bond->primary_slave = slave; + strcpy(bond->params.primary, slave->dev->name); + bond_select_active_slave(bond); + goto out; } } + + pr_info("%s: Unable to set %.*s as primary slave.\n", + bond->dev->name, (int)strlen(buf) - 1, buf); out: write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); @@ -1195,6 +1197,7 @@ static ssize_t bonding_store_active_slave(struct device *d, struct slave *old_active = NULL; struct slave *new_active = NULL; struct bonding *bond = to_bond(d); + char ifname[IFNAMSIZ]; if (!rtnl_trylock()) return restart_syscall(); @@ -1203,56 +1206,62 @@ static ssize_t bonding_store_active_slave(struct device *d, read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); - if (!USES_PRIMARY(bond->params.mode)) + if (!USES_PRIMARY(bond->params.mode)) { pr_info("%s: Unable to change active slave; %s is in mode %d\n", bond->dev->name, bond->dev->name, bond->params.mode); - else { - bond_for_each_slave(bond, slave, i) { - if (strnicmp - (slave->dev->name, buf, - strlen(slave->dev->name)) == 0) { - old_active = bond->curr_active_slave; - new_active = slave; - if (new_active == old_active) { - /* do nothing */ - pr_info("%s: %s is already the current active slave.\n", + goto out; + } + + sscanf(buf, "%16s", ifname); /* IFNAMSIZ */ + + /* check to see if we are clearing active */ + if (!strlen(ifname) || buf[0] == '\n') { + pr_info("%s: Clearing current active slave.\n", + bond->dev->name); + bond->curr_active_slave = NULL; + bond_select_active_slave(bond); + goto out; + } + + bond_for_each_slave(bond, slave, i) { + if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { + old_active = bond->curr_active_slave; + new_active = slave; + if (new_active == old_active) { + /* do nothing */ + pr_info("%s: %s is already the current" + " active slave.\n", + bond->dev->name, + slave->dev->name); + goto out; + } + else { + if ((new_active) && + (old_active) && + (new_active->link == BOND_LINK_UP) && + IS_UP(new_active->dev)) { + pr_info("%s: Setting %s as active" + " slave.\n", bond->dev->name, slave->dev->name); - goto out; + bond_change_active_slave(bond, + new_active); } else { - if ((new_active) && - (old_active) && - (new_active->link == BOND_LINK_UP) && - IS_UP(new_active->dev)) { - pr_info("%s: Setting %s as active slave.\n", - bond->dev->name, - slave->dev->name); - bond_change_active_slave(bond, new_active); - } - else { - pr_info("%s: Could not set %s as active slave; either %s is down or the link is down.\n", - bond->dev->name, - slave->dev->name, - slave->dev->name); - } - goto out; + pr_info("%s: Could not set %s as" + " active slave; either %s is" + " down or the link is down.\n", + bond->dev->name, + slave->dev->name, + slave->dev->name); } + goto out; } } - - /* if we got here, then we didn't match the name of any slave */ - - if (strlen(buf) == 0 || buf[0] == '\n') { - pr_info("%s: Setting active slave to None.\n", - bond->dev->name); - bond->primary_slave = NULL; - bond_select_active_slave(bond); - } else { - pr_info("%s: Unable to set %.*s as active slave as it is not a slave.\n", - bond->dev->name, (int)strlen(buf) - 1, buf); - } } + + pr_info("%s: Unable to set %.*s as active slave.\n", + bond->dev->name, (int)strlen(buf) - 1, buf); out: write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index e64cd9ceac3f..e55df308a3af 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -2764,7 +2764,14 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) prefetch(skb->data); vlanflags = le32_to_cpu(np->get_rx.ex->buflow); - if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) { + + /* + * There's need to check for NETIF_F_HW_VLAN_RX here. + * Even if vlan rx accel is disabled, + * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set. + */ + if (dev->features & NETIF_F_HW_VLAN_RX && + vlanflags & NV_RX3_VLAN_TAG_PRESENT) { u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK; __vlan_hwaccel_put_tag(skb, vid); @@ -5331,15 +5338,16 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK; dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_RXCSUM; - dev->features |= dev->hw_features; } np->vlanctl_bits = 0; if (id->driver_data & DEV_HAS_VLAN) { np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE; - dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; + dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; } + dev->features |= dev->hw_features; + np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG; if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) || (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) || @@ -5607,6 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_error; } + nv_vlan_mode(dev, dev->features); + netif_carrier_off(dev); dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n", diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 835cd2588148..2659daad783d 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -388,12 +388,8 @@ static void gfar_init_mac(struct net_device *ndev) if (priv->hwts_rx_en) rctrl |= RCTRL_PRSDEP_INIT | RCTRL_TS_ENABLE; - /* keep vlan related bits if it's enabled */ - if (ndev->features & NETIF_F_HW_VLAN_TX) - rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT; - if (ndev->features & NETIF_F_HW_VLAN_RX) - tctrl |= TCTRL_VLINS; + rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT; /* Init rctrl based on our settings */ gfar_write(®s->rctrl, rctrl); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 6e82dd32e806..46b5f5fd686b 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -183,7 +183,7 @@ static void ifb_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); random_ether_addr(dev->dev_addr); } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index ba631fcece34..05172c39a0ce 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -572,7 +572,7 @@ void macvlan_common_setup(struct net_device *dev) { ether_setup(dev); - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->netdev_ops = &macvlan_netdev_ops; dev->destructor = free_netdev; dev->header_ops = &macvlan_hard_header_ops, diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 803576568154..dc3fbf61910b 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -190,6 +190,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) /* minimum number of free TX descriptors required to wake up TX process */ #define TG3_TX_WAKEUP_THRESH(tnapi) ((tnapi)->tx_pending / 4) +#define TG3_TX_BD_DMA_MAX 4096 #define TG3_RAW_IP_ALIGN 2 @@ -4824,7 +4825,7 @@ static void tg3_tx(struct tg3_napi *tnapi) txq = netdev_get_tx_queue(tp->dev, index); while (sw_idx != hw_idx) { - struct ring_info *ri = &tnapi->tx_buffers[sw_idx]; + struct tg3_tx_ring_info *ri = &tnapi->tx_buffers[sw_idx]; struct sk_buff *skb = ri->skb; int i, tx_bug = 0; @@ -4840,6 +4841,12 @@ static void tg3_tx(struct tg3_napi *tnapi) ri->skb = NULL; + while (ri->fragmented) { + ri->fragmented = false; + sw_idx = NEXT_TX(sw_idx); + ri = &tnapi->tx_buffers[sw_idx]; + } + sw_idx = NEXT_TX(sw_idx); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { @@ -4851,6 +4858,13 @@ static void tg3_tx(struct tg3_napi *tnapi) dma_unmap_addr(ri, mapping), skb_shinfo(skb)->frags[i].size, PCI_DMA_TODEVICE); + + while (ri->fragmented) { + ri->fragmented = false; + sw_idx = NEXT_TX(sw_idx); + ri = &tnapi->tx_buffers[sw_idx]; + } + sw_idx = NEXT_TX(sw_idx); } @@ -5901,40 +5915,100 @@ static inline int tg3_40bit_overflow_test(struct tg3 *tp, dma_addr_t mapping, #endif } -static void tg3_set_txd(struct tg3_napi *tnapi, int entry, - dma_addr_t mapping, int len, u32 flags, - u32 mss_and_is_end) +static inline void tg3_tx_set_bd(struct tg3_tx_buffer_desc *txbd, + dma_addr_t mapping, u32 len, u32 flags, + u32 mss, u32 vlan) +{ + txbd->addr_hi = ((u64) mapping >> 32); + txbd->addr_lo = ((u64) mapping & 0xffffffff); + txbd->len_flags = (len << TXD_LEN_SHIFT) | (flags & 0x0000ffff); + txbd->vlan_tag = (mss << TXD_MSS_SHIFT) | (vlan << TXD_VLAN_TAG_SHIFT); +} + +static bool tg3_tx_frag_set(struct tg3_napi *tnapi, u32 *entry, u32 *budget, + dma_addr_t map, u32 len, u32 flags, + u32 mss, u32 vlan) { - struct tg3_tx_buffer_desc *txd = &tnapi->tx_ring[entry]; - int is_end = (mss_and_is_end & 0x1); - u32 mss = (mss_and_is_end >> 1); - u32 vlan_tag = 0; + struct tg3 *tp = tnapi->tp; + bool hwbug = false; + + if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8) + hwbug = 1; + + if (tg3_4g_overflow_test(map, len)) + hwbug = 1; + + if (tg3_40bit_overflow_test(tp, map, len)) + hwbug = 1; + + if (tg3_flag(tp, 4K_FIFO_LIMIT)) { + u32 tmp_flag = flags & ~TXD_FLAG_END; + while (len > TG3_TX_BD_DMA_MAX) { + u32 frag_len = TG3_TX_BD_DMA_MAX; + len -= TG3_TX_BD_DMA_MAX; + + if (len) { + tnapi->tx_buffers[*entry].fragmented = true; + /* Avoid the 8byte DMA problem */ + if (len <= 8) { + len += TG3_TX_BD_DMA_MAX / 2; + frag_len = TG3_TX_BD_DMA_MAX / 2; + } + } else + tmp_flag = flags; + + if (*budget) { + tg3_tx_set_bd(&tnapi->tx_ring[*entry], map, + frag_len, tmp_flag, mss, vlan); + (*budget)--; + *entry = NEXT_TX(*entry); + } else { + hwbug = 1; + break; + } + + map += frag_len; + } - if (is_end) - flags |= TXD_FLAG_END; - if (flags & TXD_FLAG_VLAN) { - vlan_tag = flags >> 16; - flags &= 0xffff; + if (len) { + if (*budget) { + tg3_tx_set_bd(&tnapi->tx_ring[*entry], map, + len, flags, mss, vlan); + (*budget)--; + *entry = NEXT_TX(*entry); + } else { + hwbug = 1; + } + } + } else { + tg3_tx_set_bd(&tnapi->tx_ring[*entry], map, + len, flags, mss, vlan); + *entry = NEXT_TX(*entry); } - vlan_tag |= (mss << TXD_MSS_SHIFT); - txd->addr_hi = ((u64) mapping >> 32); - txd->addr_lo = ((u64) mapping & 0xffffffff); - txd->len_flags = (len << TXD_LEN_SHIFT) | flags; - txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT; + return hwbug; } -static void tg3_skb_error_unmap(struct tg3_napi *tnapi, - struct sk_buff *skb, int last) +static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last) { int i; - u32 entry = tnapi->tx_prod; - struct ring_info *txb = &tnapi->tx_buffers[entry]; + struct sk_buff *skb; + struct tg3_tx_ring_info *txb = &tnapi->tx_buffers[entry]; + + skb = txb->skb; + txb->skb = NULL; pci_unmap_single(tnapi->tp->pdev, dma_unmap_addr(txb, mapping), skb_headlen(skb), PCI_DMA_TODEVICE); + + while (txb->fragmented) { + txb->fragmented = false; + entry = NEXT_TX(entry); + txb = &tnapi->tx_buffers[entry]; + } + for (i = 0; i < last; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -5944,18 +6018,24 @@ static void tg3_skb_error_unmap(struct tg3_napi *tnapi, pci_unmap_page(tnapi->tp->pdev, dma_unmap_addr(txb, mapping), frag->size, PCI_DMA_TODEVICE); + + while (txb->fragmented) { + txb->fragmented = false; + entry = NEXT_TX(entry); + txb = &tnapi->tx_buffers[entry]; + } } } /* Workaround 4GB and 40-bit hardware DMA bugs. */ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, struct sk_buff *skb, - u32 base_flags, u32 mss) + u32 *entry, u32 *budget, + u32 base_flags, u32 mss, u32 vlan) { struct tg3 *tp = tnapi->tp; struct sk_buff *new_skb; dma_addr_t new_addr = 0; - u32 entry = tnapi->tx_prod; int ret = 0; if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701) @@ -5976,24 +6056,22 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, PCI_DMA_TODEVICE); /* Make sure the mapping succeeded */ if (pci_dma_mapping_error(tp->pdev, new_addr)) { - ret = -1; dev_kfree_skb(new_skb); - - /* Make sure new skb does not cross any 4G boundaries. - * Drop the packet if it does. - */ - } else if (tg3_4g_overflow_test(new_addr, new_skb->len)) { - pci_unmap_single(tp->pdev, new_addr, new_skb->len, - PCI_DMA_TODEVICE); ret = -1; - dev_kfree_skb(new_skb); } else { - tnapi->tx_buffers[entry].skb = new_skb; - dma_unmap_addr_set(&tnapi->tx_buffers[entry], + base_flags |= TXD_FLAG_END; + + tnapi->tx_buffers[*entry].skb = new_skb; + dma_unmap_addr_set(&tnapi->tx_buffers[*entry], mapping, new_addr); - tg3_set_txd(tnapi, entry, new_addr, new_skb->len, - base_flags, 1 | (mss << 1)); + if (tg3_tx_frag_set(tnapi, entry, budget, new_addr, + new_skb->len, base_flags, + mss, vlan)) { + tg3_tx_skb_unmap(tnapi, *entry, 0); + dev_kfree_skb(new_skb); + ret = -1; + } } } @@ -6051,7 +6129,8 @@ tg3_tso_bug_end: static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); - u32 len, entry, base_flags, mss; + u32 len, entry, base_flags, mss, vlan = 0; + u32 budget; int i = -1, would_hit_hwbug; dma_addr_t mapping; struct tg3_napi *tnapi; @@ -6063,12 +6142,14 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (tg3_flag(tp, ENABLE_TSS)) tnapi++; + budget = tg3_tx_avail(tnapi); + /* We are running in BH disabled context with netif_tx_lock * and TX reclaim runs via tp->napi.poll inside of a software * interrupt. Furthermore, IRQ processing runs lockless so we have * no IRQ context deadlocks to worry about either. Rejoice! */ - if (unlikely(tg3_tx_avail(tnapi) <= (skb_shinfo(skb)->nr_frags + 1))) { + if (unlikely(budget <= (skb_shinfo(skb)->nr_frags + 1))) { if (!netif_tx_queue_stopped(txq)) { netif_tx_stop_queue(txq); @@ -6153,9 +6234,12 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) } } - if (vlan_tx_tag_present(skb)) - base_flags |= (TXD_FLAG_VLAN | - (vlan_tx_tag_get(skb) << 16)); +#ifdef BCM_KERNEL_SUPPORTS_8021Q + if (vlan_tx_tag_present(skb)) { + base_flags |= TXD_FLAG_VLAN; + vlan = vlan_tx_tag_get(skb); + } +#endif if (tg3_flag(tp, USE_JUMBO_BDFLAG) && !mss && skb->len > VLAN_ETH_FRAME_LEN) @@ -6174,25 +6258,23 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) would_hit_hwbug = 0; - if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8) - would_hit_hwbug = 1; - - if (tg3_4g_overflow_test(mapping, len)) - would_hit_hwbug = 1; - - if (tg3_40bit_overflow_test(tp, mapping, len)) - would_hit_hwbug = 1; - if (tg3_flag(tp, 5701_DMA_BUG)) would_hit_hwbug = 1; - tg3_set_txd(tnapi, entry, mapping, len, base_flags, - (skb_shinfo(skb)->nr_frags == 0) | (mss << 1)); - - entry = NEXT_TX(entry); + if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping, len, base_flags | + ((skb_shinfo(skb)->nr_frags == 0) ? TXD_FLAG_END : 0), + mss, vlan)) + would_hit_hwbug = 1; /* Now loop through additional data fragments, and queue them. */ if (skb_shinfo(skb)->nr_frags > 0) { + u32 tmp_mss = mss; + + if (!tg3_flag(tp, HW_TSO_1) && + !tg3_flag(tp, HW_TSO_2) && + !tg3_flag(tp, HW_TSO_3)) + tmp_mss = 0; + last = skb_shinfo(skb)->nr_frags - 1; for (i = 0; i <= last; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -6209,39 +6291,25 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (pci_dma_mapping_error(tp->pdev, mapping)) goto dma_error; - if (tg3_flag(tp, SHORT_DMA_BUG) && - len <= 8) + if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping, + len, base_flags | + ((i == last) ? TXD_FLAG_END : 0), + tmp_mss, vlan)) would_hit_hwbug = 1; - - if (tg3_4g_overflow_test(mapping, len)) - would_hit_hwbug = 1; - - if (tg3_40bit_overflow_test(tp, mapping, len)) - would_hit_hwbug = 1; - - if (tg3_flag(tp, HW_TSO_1) || - tg3_flag(tp, HW_TSO_2) || - tg3_flag(tp, HW_TSO_3)) - tg3_set_txd(tnapi, entry, mapping, len, - base_flags, (i == last)|(mss << 1)); - else - tg3_set_txd(tnapi, entry, mapping, len, - base_flags, (i == last)); - - entry = NEXT_TX(entry); } } if (would_hit_hwbug) { - tg3_skb_error_unmap(tnapi, skb, i); + tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i); /* If the workaround fails due to memory/mapping * failure, silently drop this packet. */ - if (tigon3_dma_hwbug_workaround(tnapi, skb, base_flags, mss)) + entry = tnapi->tx_prod; + budget = tg3_tx_avail(tnapi); + if (tigon3_dma_hwbug_workaround(tnapi, skb, &entry, &budget, + base_flags, mss, vlan)) goto out_unlock; - - entry = NEXT_TX(tnapi->tx_prod); } skb_tx_timestamp(skb); @@ -6269,7 +6337,7 @@ out_unlock: return NETDEV_TX_OK; dma_error: - tg3_skb_error_unmap(tnapi, skb, i); + tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i); dev_kfree_skb(skb); tnapi->tx_buffers[tnapi->tx_prod].skb = NULL; return NETDEV_TX_OK; @@ -6602,35 +6670,13 @@ static void tg3_free_rings(struct tg3 *tp) if (!tnapi->tx_buffers) continue; - for (i = 0; i < TG3_TX_RING_SIZE; ) { - struct ring_info *txp; - struct sk_buff *skb; - unsigned int k; - - txp = &tnapi->tx_buffers[i]; - skb = txp->skb; + for (i = 0; i < TG3_TX_RING_SIZE; i++) { + struct sk_buff *skb = tnapi->tx_buffers[i].skb; - if (skb == NULL) { - i++; + if (!skb) continue; - } - - pci_unmap_single(tp->pdev, - dma_unmap_addr(txp, mapping), - skb_headlen(skb), - PCI_DMA_TODEVICE); - txp->skb = NULL; - i++; - - for (k = 0; k < skb_shinfo(skb)->nr_frags; k++) { - txp = &tnapi->tx_buffers[i & (TG3_TX_RING_SIZE - 1)]; - pci_unmap_page(tp->pdev, - dma_unmap_addr(txp, mapping), - skb_shinfo(skb)->frags[k].size, - PCI_DMA_TODEVICE); - i++; - } + tg3_tx_skb_unmap(tnapi, i, skb_shinfo(skb)->nr_frags); dev_kfree_skb_any(skb); } @@ -6762,9 +6808,9 @@ static int tg3_alloc_consistent(struct tg3 *tp) */ if ((!i && !tg3_flag(tp, ENABLE_TSS)) || (i && tg3_flag(tp, ENABLE_TSS))) { - tnapi->tx_buffers = kzalloc(sizeof(struct ring_info) * - TG3_TX_RING_SIZE, - GFP_KERNEL); + tnapi->tx_buffers = kzalloc( + sizeof(struct tg3_tx_ring_info) * + TG3_TX_RING_SIZE, GFP_KERNEL); if (!tnapi->tx_buffers) goto err_out; @@ -8360,7 +8406,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) /* Program the jumbo buffer descriptor ring control * blocks on those devices that have them. */ - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 || + if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0 || (tg3_flag(tp, JUMBO_CAPABLE) && !tg3_flag(tp, 5780_CLASS))) { if (tg3_flag(tp, JUMBO_RING_ENABLE)) { @@ -11204,6 +11250,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode) { u32 mac_mode, rx_start_idx, rx_idx, tx_idx, opaque_key; u32 base_flags = 0, mss = 0, desc_idx, coal_now, data_off, val; + u32 budget; struct sk_buff *skb, *rx_skb; u8 *tx_data; dma_addr_t map; @@ -11363,6 +11410,10 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode) return -EIO; } + val = tnapi->tx_prod; + tnapi->tx_buffers[val].skb = skb; + dma_unmap_addr_set(&tnapi->tx_buffers[val], mapping, map); + tw32_f(HOSTCC_MODE, tp->coalesce_mode | HOSTCC_MODE_ENABLE | rnapi->coal_now); @@ -11370,8 +11421,13 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode) rx_start_idx = rnapi->hw_status->idx[0].rx_producer; - tg3_set_txd(tnapi, tnapi->tx_prod, map, tx_len, - base_flags, (mss << 1) | 1); + budget = tg3_tx_avail(tnapi); + if (tg3_tx_frag_set(tnapi, &val, &budget, map, tx_len, + base_flags | TXD_FLAG_END, mss, 0)) { + tnapi->tx_buffers[val].skb = NULL; + dev_kfree_skb(skb); + return -EIO; + } tnapi->tx_prod++; @@ -11394,7 +11450,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode) break; } - pci_unmap_single(tp->pdev, map, tx_len, PCI_DMA_TODEVICE); + tg3_tx_skb_unmap(tnapi, tnapi->tx_prod - 1, 0); dev_kfree_skb(skb); if (tx_idx != tnapi->tx_prod) @@ -13817,7 +13873,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tg3_flag_set(tp, 5705_PLUS); /* Determine TSO capabilities */ - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) + if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0) ; /* Do nothing. HW bug. */ else if (tg3_flag(tp, 57765_PLUS)) tg3_flag_set(tp, HW_TSO_3); @@ -13880,11 +13936,14 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) if (tg3_flag(tp, 5755_PLUS)) tg3_flag_set(tp, SHORT_DMA_BUG); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) + tg3_flag_set(tp, 4K_FIFO_LIMIT); + if (tg3_flag(tp, 5717_PLUS)) tg3_flag_set(tp, LRG_PROD_RING_CAP); if (tg3_flag(tp, 57765_PLUS) && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5719) + tp->pci_chip_rev_id != CHIPREV_ID_5719_A0) tg3_flag_set(tp, USE_JUMBO_BDFLAG); if (!tg3_flag(tp, 5705_PLUS) || diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 691539ba17b3..2ea456dd5880 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2652,6 +2652,12 @@ struct ring_info { DEFINE_DMA_UNMAP_ADDR(mapping); }; +struct tg3_tx_ring_info { + struct sk_buff *skb; + DEFINE_DMA_UNMAP_ADDR(mapping); + bool fragmented; +}; + struct tg3_link_config { /* Describes what we're trying to get. */ u32 advertising; @@ -2816,7 +2822,7 @@ struct tg3_napi { u32 last_tx_cons; u32 prodmbox; struct tg3_tx_buffer_desc *tx_ring; - struct ring_info *tx_buffers; + struct tg3_tx_ring_info *tx_buffers; dma_addr_t status_mapping; dma_addr_t rx_rcb_mapping; @@ -2899,6 +2905,7 @@ enum TG3_FLAGS { TG3_FLAG_57765_PLUS, TG3_FLAG_APE_HAS_NCSI, TG3_FLAG_5717_PLUS, + TG3_FLAG_4K_FIFO_LIMIT, /* Add new flags before this comment and TG3_FLAG_NUMBER_OF_FLAGS */ TG3_FLAG_NUMBER_OF_FLAGS, /* Last entry in enum TG3_FLAGS */ diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9a6b3824da14..71f3d1a35b74 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -528,6 +528,7 @@ static void tun_net_init(struct net_device *dev) dev->netdev_ops = &tap_netdev_ops; /* Ethernet TAP Device */ ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; random_ether_addr(dev->dev_addr); diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c index 52502883523e..c5c4b4def7fb 100644 --- a/drivers/net/usb/asix.c +++ b/drivers/net/usb/asix.c @@ -314,12 +314,11 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb) skb_pull(skb, 4); while (skb->len > 0) { - if ((short)(header & 0x0000ffff) != - ~((short)((header & 0xffff0000) >> 16))) { + if ((header & 0x07ff) != ((~header >> 16) & 0x07ff)) netdev_err(dev->net, "asix_rx_fixup() Bad Header Length\n"); - } + /* get the packet length */ - size = (u16) (header & 0x0000ffff); + size = (u16) (header & 0x000007ff); if ((skb->len) - ((size + 1) & 0xfffe) == 0) { u8 alignment = (unsigned long)skb->data & 0x3; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 7f78db7bd68d..5b23767ea817 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -263,6 +263,8 @@ static void veth_setup(struct net_device *dev) { ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->netdev_ops = &veth_netdev_ops; dev->ethtool_ops = &veth_ethtool_ops; dev->features |= NETIF_F_LLTX; diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index b25c9229a6a9..eb2028187fbe 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1074,9 +1074,10 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) used = pvc_is_used(pvc); - if (type == ARPHRD_ETHER) + if (type == ARPHRD_ETHER) { dev = alloc_netdev(0, "pvceth%d", ether_setup); - else + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + } else dev = alloc_netdev(0, "pvc%d", pvc_setup); if (!dev) { diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 55cf71fbffe3..e1b3e3c134fd 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -2823,6 +2823,7 @@ static struct net_device *_init_airo_card( unsigned short irq, int port, dev->wireless_data = &ai->wireless_data; dev->irq = irq; dev->base_addr = port; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; SET_NETDEV_DEV(dev, dmdev); diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig index d2293dcc117f..3cab843afb05 100644 --- a/drivers/net/wireless/b43/Kconfig +++ b/drivers/net/wireless/b43/Kconfig @@ -28,7 +28,7 @@ config B43 config B43_BCMA bool "Support for BCMA bus" - depends on B43 && BCMA && BROKEN + depends on B43 && BCMA default y config B43_SSB diff --git a/drivers/net/wireless/b43/bus.c b/drivers/net/wireless/b43/bus.c index 64c3f65ff8c0..05f6c7bff6ab 100644 --- a/drivers/net/wireless/b43/bus.c +++ b/drivers/net/wireless/b43/bus.c @@ -244,10 +244,12 @@ void b43_bus_set_wldev(struct b43_bus_dev *dev, void *wldev) #ifdef CONFIG_B43_BCMA case B43_BUS_BCMA: bcma_set_drvdata(dev->bdev, wldev); + break; #endif #ifdef CONFIG_B43_SSB case B43_BUS_SSB: ssb_set_drvdata(dev->sdev, wldev); + break; #endif } } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 032d46674f6b..26f1ab840cc7 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -5350,6 +5350,7 @@ static void b43_ssb_remove(struct ssb_device *sdev) { struct b43_wl *wl = ssb_get_devtypedata(sdev); struct b43_wldev *wldev = ssb_get_drvdata(sdev); + struct b43_bus_dev *dev = wldev->dev; /* We must cancel any work here before unregistering from ieee80211, * as the ieee80211 unreg will destroy the workqueue. */ @@ -5365,14 +5366,14 @@ static void b43_ssb_remove(struct ssb_device *sdev) ieee80211_unregister_hw(wl->hw); } - b43_one_core_detach(wldev->dev); + b43_one_core_detach(dev); if (list_empty(&wl->devlist)) { b43_leds_unregister(wl); /* Last core on the chip unregistered. * We can destroy common struct b43_wl. */ - b43_wireless_exit(wldev->dev, wl); + b43_wireless_exit(dev, wl); } } diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c index d5084829c9e5..89a116fba1de 100644 --- a/drivers/net/wireless/hostap/hostap_main.c +++ b/drivers/net/wireless/hostap/hostap_main.c @@ -855,6 +855,7 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local, iface = netdev_priv(dev); ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; /* kernel callbacks */ if (iface) { diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index 037231540719..c77e0543e502 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -1596,7 +1596,7 @@ static void pn533_disconnect(struct usb_interface *interface) usb_free_urb(dev->out_urb); kfree(dev); - nfc_dev_info(&dev->interface->dev, "NXP PN533 NFC device disconnected"); + nfc_dev_info(&interface->dev, "NXP PN533 NFC device disconnected"); } static struct usb_driver pn533_driver = { diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c index 77cb2a14c896..81525ae5d869 100644 --- a/drivers/pci/pci-label.c +++ b/drivers/pci/pci-label.c @@ -55,7 +55,7 @@ enum smbios_attr_enum { SMBIOS_ATTR_INSTANCE_SHOW, }; -static mode_t +static size_t find_smbios_instance_string(struct pci_dev *pdev, char *buf, enum smbios_attr_enum attribute) { diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h index 081c171a1ed6..5ce5170254ca 100644 --- a/drivers/scsi/be2iscsi/be_main.h +++ b/drivers/scsi/be2iscsi/be_main.h @@ -397,7 +397,7 @@ struct amap_pdu_data_out { }; struct be_cmd_bhs { - struct iscsi_cmd iscsi_hdr; + struct iscsi_scsi_req iscsi_hdr; unsigned char pad1[16]; struct pdu_data_out iscsi_data_pdu; unsigned char pad2[BE_SENSE_INFO_SIZE - @@ -428,7 +428,7 @@ struct be_nonio_bhs { }; struct be_status_bhs { - struct iscsi_cmd iscsi_hdr; + struct iscsi_scsi_req iscsi_hdr; unsigned char pad1[16]; /** * The plus 2 below is to hold the sense info length that gets diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 030a96c646c3..9ae80cd5953b 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -332,11 +332,11 @@ int bnx2i_send_iscsi_login(struct bnx2i_conn *bnx2i_conn, { struct bnx2i_cmd *bnx2i_cmd; struct bnx2i_login_request *login_wqe; - struct iscsi_login *login_hdr; + struct iscsi_login_req *login_hdr; u32 dword; bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data; - login_hdr = (struct iscsi_login *)task->hdr; + login_hdr = (struct iscsi_login_req *)task->hdr; login_wqe = (struct bnx2i_login_request *) bnx2i_conn->ep->qp.sq_prod_qe; @@ -1349,7 +1349,7 @@ int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session, struct bnx2i_cmd_response *resp_cqe; struct bnx2i_cmd *bnx2i_cmd; struct iscsi_task *task; - struct iscsi_cmd_rsp *hdr; + struct iscsi_scsi_rsp *hdr; u32 datalen = 0; resp_cqe = (struct bnx2i_cmd_response *)cqe; @@ -1376,7 +1376,7 @@ int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session, } bnx2i_iscsi_unmap_sg_list(bnx2i_cmd); - hdr = (struct iscsi_cmd_rsp *)task->hdr; + hdr = (struct iscsi_scsi_rsp *)task->hdr; resp_cqe = (struct bnx2i_cmd_response *)cqe; hdr->opcode = resp_cqe->op_code; hdr->max_cmdsn = cpu_to_be32(resp_cqe->max_cmd_sn); diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index 5c55a75ae597..cffd4d75df56 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -1213,7 +1213,7 @@ static int bnx2i_task_xmit(struct iscsi_task *task) struct bnx2i_conn *bnx2i_conn = conn->dd_data; struct scsi_cmnd *sc = task->sc; struct bnx2i_cmd *cmd = task->dd_data; - struct iscsi_cmd *hdr = (struct iscsi_cmd *) task->hdr; + struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; if (atomic_read(&bnx2i_conn->ep->num_active_cmds) + 1 > hba->max_sqes) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index d7a4120034a2..256a999d010b 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -84,22 +84,6 @@ MODULE_PARM_DESC(debug_libiscsi_eh, __func__, ##arg); \ } while (0); -/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */ -#define SNA32_CHECK 2147483648UL - -static int iscsi_sna_lt(u32 n1, u32 n2) -{ - return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) || - (n1 > n2 && (n2 - n1 < SNA32_CHECK))); -} - -/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */ -static int iscsi_sna_lte(u32 n1, u32 n2) -{ - return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) || - (n1 > n2 && (n2 - n1 < SNA32_CHECK))); -} - inline void iscsi_conn_queue_work(struct iscsi_conn *conn) { struct Scsi_Host *shost = conn->session->host; @@ -360,7 +344,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) struct iscsi_conn *conn = task->conn; struct iscsi_session *session = conn->session; struct scsi_cmnd *sc = task->sc; - struct iscsi_cmd *hdr; + struct iscsi_scsi_req *hdr; unsigned hdrlength, cmd_len; itt_t itt; int rc; @@ -374,7 +358,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) if (rc) return rc; } - hdr = (struct iscsi_cmd *) task->hdr; + hdr = (struct iscsi_scsi_req *)task->hdr; itt = hdr->itt; memset(hdr, 0, sizeof(*hdr)); @@ -830,7 +814,7 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, struct iscsi_task *task, char *data, int datalen) { - struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr; + struct iscsi_scsi_rsp *rhdr = (struct iscsi_scsi_rsp *)hdr; struct iscsi_session *session = conn->session; struct scsi_cmnd *sc = task->sc; diff --git a/drivers/staging/ath6kl/os/linux/ar6000_drv.c b/drivers/staging/ath6kl/os/linux/ar6000_drv.c index 499b7a90e941..32ee39ad00df 100644 --- a/drivers/staging/ath6kl/os/linux/ar6000_drv.c +++ b/drivers/staging/ath6kl/os/linux/ar6000_drv.c @@ -6205,6 +6205,7 @@ int ar6000_create_ap_interface(struct ar6_softc *ar, char *ap_ifname) ether_setup(dev); init_netdev(dev, ap_ifname); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; if (register_netdev(dev)) { AR_DEBUG_PRINTF(ATH_DEBUG_ERR,("ar6000_create_ap_interface: register_netdev failed\n")); diff --git a/drivers/staging/brcm80211/brcmsmac/mac80211_if.h b/drivers/staging/brcm80211/brcmsmac/mac80211_if.h index 5711e7c16b50..40e3d375ea99 100644 --- a/drivers/staging/brcm80211/brcmsmac/mac80211_if.h +++ b/drivers/staging/brcm80211/brcmsmac/mac80211_if.h @@ -24,8 +24,6 @@ #define BRCMS_SET_SHORTSLOT_OVERRIDE 146 -#include <linux/interrupt.h> - /* BMAC Note: High-only driver is no longer working in softirq context as it needs to block and * sleep so perimeter lock has to be a semaphore instead of spinlock. This requires timers to be * submitted to workqueue instead of being on kernel timer diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig index 5cb0f0ef6af0..b28794b72125 100644 --- a/drivers/target/Kconfig +++ b/drivers/target/Kconfig @@ -31,5 +31,6 @@ config TCM_PSCSI source "drivers/target/loopback/Kconfig" source "drivers/target/tcm_fc/Kconfig" +source "drivers/target/iscsi/Kconfig" endif diff --git a/drivers/target/Makefile b/drivers/target/Makefile index 21df808a992c..1060c7b7f803 100644 --- a/drivers/target/Makefile +++ b/drivers/target/Makefile @@ -24,5 +24,5 @@ obj-$(CONFIG_TCM_PSCSI) += target_core_pscsi.o # Fabric modules obj-$(CONFIG_LOOPBACK_TARGET) += loopback/ - obj-$(CONFIG_TCM_FC) += tcm_fc/ +obj-$(CONFIG_ISCSI_TARGET) += iscsi/ diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig new file mode 100644 index 000000000000..564ff4e0dbc4 --- /dev/null +++ b/drivers/target/iscsi/Kconfig @@ -0,0 +1,8 @@ +config ISCSI_TARGET + tristate "Linux-iSCSI.org iSCSI Target Mode Stack" + select CRYPTO + select CRYPTO_CRC32C + select CRYPTO_CRC32C_INTEL if X86 + help + Say M here to enable the ConfigFS enabled Linux-iSCSI.org iSCSI + Target Mode Stack. diff --git a/drivers/target/iscsi/Makefile b/drivers/target/iscsi/Makefile new file mode 100644 index 000000000000..5b9a2cf7f0a9 --- /dev/null +++ b/drivers/target/iscsi/Makefile @@ -0,0 +1,20 @@ +iscsi_target_mod-y += iscsi_target_parameters.o \ + iscsi_target_seq_pdu_list.o \ + iscsi_target_tq.o \ + iscsi_target_auth.o \ + iscsi_target_datain_values.o \ + iscsi_target_device.o \ + iscsi_target_erl0.o \ + iscsi_target_erl1.o \ + iscsi_target_erl2.o \ + iscsi_target_login.o \ + iscsi_target_nego.o \ + iscsi_target_nodeattrib.o \ + iscsi_target_tmr.o \ + iscsi_target_tpg.o \ + iscsi_target_util.o \ + iscsi_target.o \ + iscsi_target_configfs.o \ + iscsi_target_stat.o + +obj-$(CONFIG_ISCSI_TARGET) += iscsi_target_mod.o diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c new file mode 100644 index 000000000000..14c81c4265bd --- /dev/null +++ b/drivers/target/iscsi/iscsi_target.c @@ -0,0 +1,4559 @@ +/******************************************************************************* + * This file contains main functions related to the iSCSI Target Core Driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/string.h> +#include <linux/kthread.h> +#include <linux/crypto.h> +#include <linux/completion.h> +#include <asm/unaligned.h> +#include <scsi/scsi_device.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_tmr.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_tq.h" +#include "iscsi_target_configfs.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_login.h" +#include "iscsi_target_tmr.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_device.h" +#include "iscsi_target_stat.h" + +static LIST_HEAD(g_tiqn_list); +static LIST_HEAD(g_np_list); +static DEFINE_SPINLOCK(tiqn_lock); +static DEFINE_SPINLOCK(np_lock); + +static struct idr tiqn_idr; +struct idr sess_idr; +struct mutex auth_id_lock; +spinlock_t sess_idr_lock; + +struct iscsit_global *iscsit_global; + +struct kmem_cache *lio_cmd_cache; +struct kmem_cache *lio_qr_cache; +struct kmem_cache *lio_dr_cache; +struct kmem_cache *lio_ooo_cache; +struct kmem_cache *lio_r2t_cache; + +static int iscsit_handle_immediate_data(struct iscsi_cmd *, + unsigned char *buf, u32); +static int iscsit_logout_post_handler(struct iscsi_cmd *, struct iscsi_conn *); + +struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *buf) +{ + struct iscsi_tiqn *tiqn = NULL; + + spin_lock(&tiqn_lock); + list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) { + if (!strcmp(tiqn->tiqn, buf)) { + + spin_lock(&tiqn->tiqn_state_lock); + if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) { + tiqn->tiqn_access_count++; + spin_unlock(&tiqn->tiqn_state_lock); + spin_unlock(&tiqn_lock); + return tiqn; + } + spin_unlock(&tiqn->tiqn_state_lock); + } + } + spin_unlock(&tiqn_lock); + + return NULL; +} + +static int iscsit_set_tiqn_shutdown(struct iscsi_tiqn *tiqn) +{ + spin_lock(&tiqn->tiqn_state_lock); + if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) { + tiqn->tiqn_state = TIQN_STATE_SHUTDOWN; + spin_unlock(&tiqn->tiqn_state_lock); + return 0; + } + spin_unlock(&tiqn->tiqn_state_lock); + + return -1; +} + +void iscsit_put_tiqn_for_login(struct iscsi_tiqn *tiqn) +{ + spin_lock(&tiqn->tiqn_state_lock); + tiqn->tiqn_access_count--; + spin_unlock(&tiqn->tiqn_state_lock); +} + +/* + * Note that IQN formatting is expected to be done in userspace, and + * no explict IQN format checks are done here. + */ +struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf) +{ + struct iscsi_tiqn *tiqn = NULL; + int ret; + + if (strlen(buf) > ISCSI_IQN_LEN) { + pr_err("Target IQN exceeds %d bytes\n", + ISCSI_IQN_LEN); + return ERR_PTR(-EINVAL); + } + + tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL); + if (!tiqn) { + pr_err("Unable to allocate struct iscsi_tiqn\n"); + return ERR_PTR(-ENOMEM); + } + + sprintf(tiqn->tiqn, "%s", buf); + INIT_LIST_HEAD(&tiqn->tiqn_list); + INIT_LIST_HEAD(&tiqn->tiqn_tpg_list); + spin_lock_init(&tiqn->tiqn_state_lock); + spin_lock_init(&tiqn->tiqn_tpg_lock); + spin_lock_init(&tiqn->sess_err_stats.lock); + spin_lock_init(&tiqn->login_stats.lock); + spin_lock_init(&tiqn->logout_stats.lock); + + if (!idr_pre_get(&tiqn_idr, GFP_KERNEL)) { + pr_err("idr_pre_get() for tiqn_idr failed\n"); + kfree(tiqn); + return ERR_PTR(-ENOMEM); + } + tiqn->tiqn_state = TIQN_STATE_ACTIVE; + + spin_lock(&tiqn_lock); + ret = idr_get_new(&tiqn_idr, NULL, &tiqn->tiqn_index); + if (ret < 0) { + pr_err("idr_get_new() failed for tiqn->tiqn_index\n"); + spin_unlock(&tiqn_lock); + kfree(tiqn); + return ERR_PTR(ret); + } + list_add_tail(&tiqn->tiqn_list, &g_tiqn_list); + spin_unlock(&tiqn_lock); + + pr_debug("CORE[0] - Added iSCSI Target IQN: %s\n", tiqn->tiqn); + + return tiqn; + +} + +static void iscsit_wait_for_tiqn(struct iscsi_tiqn *tiqn) +{ + /* + * Wait for accesses to said struct iscsi_tiqn to end. + */ + spin_lock(&tiqn->tiqn_state_lock); + while (tiqn->tiqn_access_count != 0) { + spin_unlock(&tiqn->tiqn_state_lock); + msleep(10); + spin_lock(&tiqn->tiqn_state_lock); + } + spin_unlock(&tiqn->tiqn_state_lock); +} + +void iscsit_del_tiqn(struct iscsi_tiqn *tiqn) +{ + /* + * iscsit_set_tiqn_shutdown sets tiqn->tiqn_state = TIQN_STATE_SHUTDOWN + * while holding tiqn->tiqn_state_lock. This means that all subsequent + * attempts to access this struct iscsi_tiqn will fail from both transport + * fabric and control code paths. + */ + if (iscsit_set_tiqn_shutdown(tiqn) < 0) { + pr_err("iscsit_set_tiqn_shutdown() failed\n"); + return; + } + + iscsit_wait_for_tiqn(tiqn); + + spin_lock(&tiqn_lock); + list_del(&tiqn->tiqn_list); + idr_remove(&tiqn_idr, tiqn->tiqn_index); + spin_unlock(&tiqn_lock); + + pr_debug("CORE[0] - Deleted iSCSI Target IQN: %s\n", + tiqn->tiqn); + kfree(tiqn); +} + +int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) +{ + int ret; + /* + * Determine if the network portal is accepting storage traffic. + */ + spin_lock_bh(&np->np_thread_lock); + if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { + spin_unlock_bh(&np->np_thread_lock); + return -1; + } + if (np->np_login_tpg) { + pr_err("np->np_login_tpg() is not NULL!\n"); + spin_unlock_bh(&np->np_thread_lock); + return -1; + } + spin_unlock_bh(&np->np_thread_lock); + /* + * Determine if the portal group is accepting storage traffic. + */ + spin_lock_bh(&tpg->tpg_state_lock); + if (tpg->tpg_state != TPG_STATE_ACTIVE) { + spin_unlock_bh(&tpg->tpg_state_lock); + return -1; + } + spin_unlock_bh(&tpg->tpg_state_lock); + + /* + * Here we serialize access across the TIQN+TPG Tuple. + */ + ret = mutex_lock_interruptible(&tpg->np_login_lock); + if ((ret != 0) || signal_pending(current)) + return -1; + + spin_lock_bh(&np->np_thread_lock); + np->np_login_tpg = tpg; + spin_unlock_bh(&np->np_thread_lock); + + return 0; +} + +int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) +{ + struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; + + spin_lock_bh(&np->np_thread_lock); + np->np_login_tpg = NULL; + spin_unlock_bh(&np->np_thread_lock); + + mutex_unlock(&tpg->np_login_lock); + + if (tiqn) + iscsit_put_tiqn_for_login(tiqn); + + return 0; +} + +static struct iscsi_np *iscsit_get_np( + struct __kernel_sockaddr_storage *sockaddr, + int network_transport) +{ + struct sockaddr_in *sock_in, *sock_in_e; + struct sockaddr_in6 *sock_in6, *sock_in6_e; + struct iscsi_np *np; + int ip_match = 0; + u16 port; + + spin_lock_bh(&np_lock); + list_for_each_entry(np, &g_np_list, np_list) { + spin_lock(&np->np_thread_lock); + if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { + spin_unlock(&np->np_thread_lock); + continue; + } + + if (sockaddr->ss_family == AF_INET6) { + sock_in6 = (struct sockaddr_in6 *)sockaddr; + sock_in6_e = (struct sockaddr_in6 *)&np->np_sockaddr; + + if (!memcmp((void *)&sock_in6->sin6_addr.in6_u, + (void *)&sock_in6_e->sin6_addr.in6_u, + sizeof(struct in6_addr))) + ip_match = 1; + + port = ntohs(sock_in6->sin6_port); + } else { + sock_in = (struct sockaddr_in *)sockaddr; + sock_in_e = (struct sockaddr_in *)&np->np_sockaddr; + + if (sock_in->sin_addr.s_addr == + sock_in_e->sin_addr.s_addr) + ip_match = 1; + + port = ntohs(sock_in->sin_port); + } + + if ((ip_match == 1) && (np->np_port == port) && + (np->np_network_transport == network_transport)) { + /* + * Increment the np_exports reference count now to + * prevent iscsit_del_np() below from being called + * while iscsi_tpg_add_network_portal() is called. + */ + np->np_exports++; + spin_unlock(&np->np_thread_lock); + spin_unlock_bh(&np_lock); + return np; + } + spin_unlock(&np->np_thread_lock); + } + spin_unlock_bh(&np_lock); + + return NULL; +} + +struct iscsi_np *iscsit_add_np( + struct __kernel_sockaddr_storage *sockaddr, + char *ip_str, + int network_transport) +{ + struct sockaddr_in *sock_in; + struct sockaddr_in6 *sock_in6; + struct iscsi_np *np; + int ret; + /* + * Locate the existing struct iscsi_np if already active.. + */ + np = iscsit_get_np(sockaddr, network_transport); + if (np) + return np; + + np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL); + if (!np) { + pr_err("Unable to allocate memory for struct iscsi_np\n"); + return ERR_PTR(-ENOMEM); + } + + np->np_flags |= NPF_IP_NETWORK; + if (sockaddr->ss_family == AF_INET6) { + sock_in6 = (struct sockaddr_in6 *)sockaddr; + snprintf(np->np_ip, IPV6_ADDRESS_SPACE, "%s", ip_str); + np->np_port = ntohs(sock_in6->sin6_port); + } else { + sock_in = (struct sockaddr_in *)sockaddr; + sprintf(np->np_ip, "%s", ip_str); + np->np_port = ntohs(sock_in->sin_port); + } + + np->np_network_transport = network_transport; + spin_lock_init(&np->np_thread_lock); + init_completion(&np->np_restart_comp); + INIT_LIST_HEAD(&np->np_list); + + ret = iscsi_target_setup_login_socket(np, sockaddr); + if (ret != 0) { + kfree(np); + return ERR_PTR(ret); + } + + np->np_thread = kthread_run(iscsi_target_login_thread, np, "iscsi_np"); + if (IS_ERR(np->np_thread)) { + pr_err("Unable to create kthread: iscsi_np\n"); + ret = PTR_ERR(np->np_thread); + kfree(np); + return ERR_PTR(ret); + } + /* + * Increment the np_exports reference count now to prevent + * iscsit_del_np() below from being run while a new call to + * iscsi_tpg_add_network_portal() for a matching iscsi_np is + * active. We don't need to hold np->np_thread_lock at this + * point because iscsi_np has not been added to g_np_list yet. + */ + np->np_exports = 1; + + spin_lock_bh(&np_lock); + list_add_tail(&np->np_list, &g_np_list); + spin_unlock_bh(&np_lock); + + pr_debug("CORE[0] - Added Network Portal: %s:%hu on %s\n", + np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ? + "TCP" : "SCTP"); + + return np; +} + +int iscsit_reset_np_thread( + struct iscsi_np *np, + struct iscsi_tpg_np *tpg_np, + struct iscsi_portal_group *tpg) +{ + spin_lock_bh(&np->np_thread_lock); + if (tpg && tpg_np) { + /* + * The reset operation need only be performed when the + * passed struct iscsi_portal_group has a login in progress + * to one of the network portals. + */ + if (tpg_np->tpg_np->np_login_tpg != tpg) { + spin_unlock_bh(&np->np_thread_lock); + return 0; + } + } + if (np->np_thread_state == ISCSI_NP_THREAD_INACTIVE) { + spin_unlock_bh(&np->np_thread_lock); + return 0; + } + np->np_thread_state = ISCSI_NP_THREAD_RESET; + + if (np->np_thread) { + spin_unlock_bh(&np->np_thread_lock); + send_sig(SIGINT, np->np_thread, 1); + wait_for_completion(&np->np_restart_comp); + spin_lock_bh(&np->np_thread_lock); + } + spin_unlock_bh(&np->np_thread_lock); + + return 0; +} + +int iscsit_del_np_comm(struct iscsi_np *np) +{ + if (!np->np_socket) + return 0; + + /* + * Some network transports allocate their own struct sock->file, + * see if we need to free any additional allocated resources. + */ + if (np->np_flags & NPF_SCTP_STRUCT_FILE) { + kfree(np->np_socket->file); + np->np_socket->file = NULL; + } + + sock_release(np->np_socket); + return 0; +} + +int iscsit_del_np(struct iscsi_np *np) +{ + spin_lock_bh(&np->np_thread_lock); + np->np_exports--; + if (np->np_exports) { + spin_unlock_bh(&np->np_thread_lock); + return 0; + } + np->np_thread_state = ISCSI_NP_THREAD_SHUTDOWN; + spin_unlock_bh(&np->np_thread_lock); + + if (np->np_thread) { + /* + * We need to send the signal to wakeup Linux/Net + * which may be sleeping in sock_accept().. + */ + send_sig(SIGINT, np->np_thread, 1); + kthread_stop(np->np_thread); + } + iscsit_del_np_comm(np); + + spin_lock_bh(&np_lock); + list_del(&np->np_list); + spin_unlock_bh(&np_lock); + + pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n", + np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ? + "TCP" : "SCTP"); + + kfree(np); + return 0; +} + +static int __init iscsi_target_init_module(void) +{ + int ret = 0; + + pr_debug("iSCSI-Target "ISCSIT_VERSION"\n"); + + iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL); + if (!iscsit_global) { + pr_err("Unable to allocate memory for iscsit_global\n"); + return -1; + } + mutex_init(&auth_id_lock); + spin_lock_init(&sess_idr_lock); + idr_init(&tiqn_idr); + idr_init(&sess_idr); + + ret = iscsi_target_register_configfs(); + if (ret < 0) + goto out; + + ret = iscsi_thread_set_init(); + if (ret < 0) + goto configfs_out; + + if (iscsi_allocate_thread_sets(TARGET_THREAD_SET_COUNT) != + TARGET_THREAD_SET_COUNT) { + pr_err("iscsi_allocate_thread_sets() returned" + " unexpected value!\n"); + goto ts_out1; + } + + lio_cmd_cache = kmem_cache_create("lio_cmd_cache", + sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd), + 0, NULL); + if (!lio_cmd_cache) { + pr_err("Unable to kmem_cache_create() for" + " lio_cmd_cache\n"); + goto ts_out2; + } + + lio_qr_cache = kmem_cache_create("lio_qr_cache", + sizeof(struct iscsi_queue_req), + __alignof__(struct iscsi_queue_req), 0, NULL); + if (!lio_qr_cache) { + pr_err("nable to kmem_cache_create() for" + " lio_qr_cache\n"); + goto cmd_out; + } + + lio_dr_cache = kmem_cache_create("lio_dr_cache", + sizeof(struct iscsi_datain_req), + __alignof__(struct iscsi_datain_req), 0, NULL); + if (!lio_dr_cache) { + pr_err("Unable to kmem_cache_create() for" + " lio_dr_cache\n"); + goto qr_out; + } + + lio_ooo_cache = kmem_cache_create("lio_ooo_cache", + sizeof(struct iscsi_ooo_cmdsn), + __alignof__(struct iscsi_ooo_cmdsn), 0, NULL); + if (!lio_ooo_cache) { + pr_err("Unable to kmem_cache_create() for" + " lio_ooo_cache\n"); + goto dr_out; + } + + lio_r2t_cache = kmem_cache_create("lio_r2t_cache", + sizeof(struct iscsi_r2t), __alignof__(struct iscsi_r2t), + 0, NULL); + if (!lio_r2t_cache) { + pr_err("Unable to kmem_cache_create() for" + " lio_r2t_cache\n"); + goto ooo_out; + } + + if (iscsit_load_discovery_tpg() < 0) + goto r2t_out; + + return ret; +r2t_out: + kmem_cache_destroy(lio_r2t_cache); +ooo_out: + kmem_cache_destroy(lio_ooo_cache); +dr_out: + kmem_cache_destroy(lio_dr_cache); +qr_out: + kmem_cache_destroy(lio_qr_cache); +cmd_out: + kmem_cache_destroy(lio_cmd_cache); +ts_out2: + iscsi_deallocate_thread_sets(); +ts_out1: + iscsi_thread_set_free(); +configfs_out: + iscsi_target_deregister_configfs(); +out: + kfree(iscsit_global); + return -ENOMEM; +} + +static void __exit iscsi_target_cleanup_module(void) +{ + iscsi_deallocate_thread_sets(); + iscsi_thread_set_free(); + iscsit_release_discovery_tpg(); + kmem_cache_destroy(lio_cmd_cache); + kmem_cache_destroy(lio_qr_cache); + kmem_cache_destroy(lio_dr_cache); + kmem_cache_destroy(lio_ooo_cache); + kmem_cache_destroy(lio_r2t_cache); + + iscsi_target_deregister_configfs(); + + kfree(iscsit_global); +} + +int iscsit_add_reject( + u8 reason, + int fail_conn, + unsigned char *buf, + struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd; + struct iscsi_reject *hdr; + int ret; + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return -1; + + cmd->iscsi_opcode = ISCSI_OP_REJECT; + if (fail_conn) + cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; + + hdr = (struct iscsi_reject *) cmd->pdu; + hdr->reason = reason; + + cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); + if (!cmd->buf_ptr) { + pr_err("Unable to allocate memory for cmd->buf_ptr\n"); + iscsit_release_cmd(cmd); + return -1; + } + memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN); + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + cmd->i_state = ISTATE_SEND_REJECT; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + ret = wait_for_completion_interruptible(&cmd->reject_comp); + if (ret != 0) + return -1; + + return (!fail_conn) ? 0 : -1; +} + +int iscsit_add_reject_from_cmd( + u8 reason, + int fail_conn, + int add_to_conn, + unsigned char *buf, + struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn; + struct iscsi_reject *hdr; + int ret; + + if (!cmd->conn) { + pr_err("cmd->conn is NULL for ITT: 0x%08x\n", + cmd->init_task_tag); + return -1; + } + conn = cmd->conn; + + cmd->iscsi_opcode = ISCSI_OP_REJECT; + if (fail_conn) + cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; + + hdr = (struct iscsi_reject *) cmd->pdu; + hdr->reason = reason; + + cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); + if (!cmd->buf_ptr) { + pr_err("Unable to allocate memory for cmd->buf_ptr\n"); + iscsit_release_cmd(cmd); + return -1; + } + memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN); + + if (add_to_conn) { + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + } + + cmd->i_state = ISTATE_SEND_REJECT; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + ret = wait_for_completion_interruptible(&cmd->reject_comp); + if (ret != 0) + return -1; + + return (!fail_conn) ? 0 : -1; +} + +/* + * Map some portion of the allocated scatterlist to an iovec, suitable for + * kernel sockets to copy data in/out. This handles both pages and slab-allocated + * buffers, since we have been tricky and mapped t_mem_sg to the buffer in + * either case (see iscsit_alloc_buffs) + */ +static int iscsit_map_iovec( + struct iscsi_cmd *cmd, + struct kvec *iov, + u32 data_offset, + u32 data_length) +{ + u32 i = 0; + struct scatterlist *sg; + unsigned int page_off; + + /* + * We have a private mapping of the allocated pages in t_mem_sg. + * At this point, we also know each contains a page. + */ + sg = &cmd->t_mem_sg[data_offset / PAGE_SIZE]; + page_off = (data_offset % PAGE_SIZE); + + cmd->first_data_sg = sg; + cmd->first_data_sg_off = page_off; + + while (data_length) { + u32 cur_len = min_t(u32, data_length, sg->length - page_off); + + iov[i].iov_base = kmap(sg_page(sg)) + sg->offset + page_off; + iov[i].iov_len = cur_len; + + data_length -= cur_len; + page_off = 0; + sg = sg_next(sg); + i++; + } + + cmd->kmapped_nents = i; + + return i; +} + +static void iscsit_unmap_iovec(struct iscsi_cmd *cmd) +{ + u32 i; + struct scatterlist *sg; + + sg = cmd->first_data_sg; + + for (i = 0; i < cmd->kmapped_nents; i++) + kunmap(sg_page(&sg[i])); +} + +static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn) +{ + struct iscsi_cmd *cmd; + + conn->exp_statsn = exp_statsn; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + spin_lock(&cmd->istate_lock); + if ((cmd->i_state == ISTATE_SENT_STATUS) && + (cmd->stat_sn < exp_statsn)) { + cmd->i_state = ISTATE_REMOVE; + spin_unlock(&cmd->istate_lock); + iscsit_add_cmd_to_immediate_queue(cmd, conn, + cmd->i_state); + continue; + } + spin_unlock(&cmd->istate_lock); + } + spin_unlock_bh(&conn->cmd_lock); +} + +static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd) +{ + u32 iov_count = (cmd->se_cmd.t_data_nents == 0) ? 1 : + cmd->se_cmd.t_data_nents; + + iov_count += TRANSPORT_IOV_DATA_BUFFER; + + cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL); + if (!cmd->iov_data) { + pr_err("Unable to allocate cmd->iov_data\n"); + return -ENOMEM; + } + + cmd->orig_iov_data_count = iov_count; + return 0; +} + +static int iscsit_alloc_buffs(struct iscsi_cmd *cmd) +{ + struct scatterlist *sgl; + u32 length = cmd->se_cmd.data_length; + int nents = DIV_ROUND_UP(length, PAGE_SIZE); + int i = 0, ret; + /* + * If no SCSI payload is present, allocate the default iovecs used for + * iSCSI PDU Header + */ + if (!length) + return iscsit_allocate_iovecs(cmd); + + sgl = kzalloc(sizeof(*sgl) * nents, GFP_KERNEL); + if (!sgl) + return -ENOMEM; + + sg_init_table(sgl, nents); + + while (length) { + int buf_size = min_t(int, length, PAGE_SIZE); + struct page *page; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto page_alloc_failed; + + sg_set_page(&sgl[i], page, buf_size, 0); + + length -= buf_size; + i++; + } + + cmd->t_mem_sg = sgl; + cmd->t_mem_sg_nents = nents; + + /* BIDI ops not supported */ + + /* Tell the core about our preallocated memory */ + transport_generic_map_mem_to_cmd(&cmd->se_cmd, sgl, nents, NULL, 0); + /* + * Allocate iovecs for SCSI payload after transport_generic_map_mem_to_cmd + * so that cmd->se_cmd.t_tasks_se_num has been set. + */ + ret = iscsit_allocate_iovecs(cmd); + if (ret < 0) + goto page_alloc_failed; + + return 0; + +page_alloc_failed: + while (i >= 0) { + __free_page(sg_page(&sgl[i])); + i--; + } + kfree(cmd->t_mem_sg); + cmd->t_mem_sg = NULL; + return -ENOMEM; +} + +static int iscsit_handle_scsi_cmd( + struct iscsi_conn *conn, + unsigned char *buf) +{ + int data_direction, cmdsn_ret = 0, immed_ret, ret, transport_ret; + int dump_immediate_data = 0, send_check_condition = 0, payload_length; + struct iscsi_cmd *cmd = NULL; + struct iscsi_scsi_req *hdr; + + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->cmd_pdus++; + if (conn->sess->se_sess->se_node_acl) { + spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); + conn->sess->se_sess->se_node_acl->num_cmds++; + spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); + } + spin_unlock_bh(&conn->sess->session_stats_lock); + + hdr = (struct iscsi_scsi_req *) buf; + payload_length = ntoh24(hdr->dlength); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->data_length = be32_to_cpu(hdr->data_length); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + + /* FIXME; Add checks for AdditionalHeaderSegment */ + + if (!(hdr->flags & ISCSI_FLAG_CMD_WRITE) && + !(hdr->flags & ISCSI_FLAG_CMD_FINAL)) { + pr_err("ISCSI_FLAG_CMD_WRITE & ISCSI_FLAG_CMD_FINAL" + " not set. Bad iSCSI Initiator.\n"); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + if (((hdr->flags & ISCSI_FLAG_CMD_READ) || + (hdr->flags & ISCSI_FLAG_CMD_WRITE)) && !hdr->data_length) { + /* + * Vmware ESX v3.0 uses a modified Cisco Initiator (v3.4.2) + * that adds support for RESERVE/RELEASE. There is a bug + * add with this new functionality that sets R/W bits when + * neither CDB carries any READ or WRITE datapayloads. + */ + if ((hdr->cdb[0] == 0x16) || (hdr->cdb[0] == 0x17)) { + hdr->flags &= ~ISCSI_FLAG_CMD_READ; + hdr->flags &= ~ISCSI_FLAG_CMD_WRITE; + goto done; + } + + pr_err("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE" + " set when Expected Data Transfer Length is 0 for" + " CDB: 0x%02x. Bad iSCSI Initiator.\n", hdr->cdb[0]); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } +done: + + if (!(hdr->flags & ISCSI_FLAG_CMD_READ) && + !(hdr->flags & ISCSI_FLAG_CMD_WRITE) && (hdr->data_length != 0)) { + pr_err("ISCSI_FLAG_CMD_READ and/or ISCSI_FLAG_CMD_WRITE" + " MUST be set if Expected Data Transfer Length is not 0." + " Bad iSCSI Initiator\n"); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + if ((hdr->flags & ISCSI_FLAG_CMD_READ) && + (hdr->flags & ISCSI_FLAG_CMD_WRITE)) { + pr_err("Bidirectional operations not supported!\n"); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + if (hdr->opcode & ISCSI_OP_IMMEDIATE) { + pr_err("Illegally set Immediate Bit in iSCSI Initiator" + " Scsi Command PDU.\n"); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + if (payload_length && !conn->sess->sess_ops->ImmediateData) { + pr_err("ImmediateData=No but DataSegmentLength=%u," + " protocol error.\n", payload_length); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if ((hdr->data_length == payload_length) && + (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))) { + pr_err("Expected Data Transfer Length and Length of" + " Immediate Data are the same, but ISCSI_FLAG_CMD_FINAL" + " bit is not set protocol error\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if (payload_length > hdr->data_length) { + pr_err("DataSegmentLength: %u is greater than" + " EDTL: %u, protocol error.\n", payload_length, + hdr->data_length); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("DataSegmentLength: %u is greater than" + " MaxRecvDataSegmentLength: %u, protocol error.\n", + payload_length, conn->conn_ops->MaxRecvDataSegmentLength); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if (payload_length > conn->sess->sess_ops->FirstBurstLength) { + pr_err("DataSegmentLength: %u is greater than" + " FirstBurstLength: %u, protocol error.\n", + payload_length, conn->sess->sess_ops->FirstBurstLength); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE : + (hdr->flags & ISCSI_FLAG_CMD_READ) ? DMA_FROM_DEVICE : + DMA_NONE; + + cmd = iscsit_allocate_se_cmd(conn, hdr->data_length, data_direction, + (hdr->flags & ISCSI_FLAG_CMD_ATTR_MASK)); + if (!cmd) + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, + buf, conn); + + pr_debug("Got SCSI Command, ITT: 0x%08x, CmdSN: 0x%08x," + " ExpXferLen: %u, Length: %u, CID: %hu\n", hdr->itt, + hdr->cmdsn, hdr->data_length, payload_length, conn->cid); + + cmd->iscsi_opcode = ISCSI_OP_SCSI_CMD; + cmd->i_state = ISTATE_NEW_CMD; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); + cmd->immediate_data = (payload_length) ? 1 : 0; + cmd->unsolicited_data = ((!(hdr->flags & ISCSI_FLAG_CMD_FINAL) && + (hdr->flags & ISCSI_FLAG_CMD_WRITE)) ? 1 : 0); + if (cmd->unsolicited_data) + cmd->cmd_flags |= ICF_NON_IMMEDIATE_UNSOLICITED_DATA; + + conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; + if (hdr->flags & ISCSI_FLAG_CMD_READ) { + spin_lock_bh(&conn->sess->ttt_lock); + cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++; + if (cmd->targ_xfer_tag == 0xFFFFFFFF) + cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++; + spin_unlock_bh(&conn->sess->ttt_lock); + } else if (hdr->flags & ISCSI_FLAG_CMD_WRITE) + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + cmd->first_burst_len = payload_length; + + if (cmd->data_direction == DMA_FROM_DEVICE) { + struct iscsi_datain_req *dr; + + dr = iscsit_allocate_datain_req(); + if (!dr) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + + iscsit_attach_datain_req(cmd, dr); + } + + /* + * The CDB is going to an se_device_t. + */ + ret = iscsit_get_lun_for_cmd(cmd, hdr->cdb, + get_unaligned_le64(&hdr->lun)); + if (ret < 0) { + if (cmd->se_cmd.scsi_sense_reason == TCM_NON_EXISTENT_LUN) { + pr_debug("Responding to non-acl'ed," + " non-existent or non-exported iSCSI LUN:" + " 0x%016Lx\n", get_unaligned_le64(&hdr->lun)); + } + if (ret == PYX_TRANSPORT_OUT_OF_MEMORY_RESOURCES) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + + send_check_condition = 1; + goto attach_cmd; + } + /* + * The Initiator Node has access to the LUN (the addressing method + * is handled inside of iscsit_get_lun_for_cmd()). Now it's time to + * allocate 1->N transport tasks (depending on sector count and + * maximum request size the physical HBA(s) can handle. + */ + transport_ret = transport_generic_allocate_tasks(&cmd->se_cmd, hdr->cdb); + if (transport_ret == -ENOMEM) { + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + } else if (transport_ret == -EINVAL) { + /* + * Unsupported SAM Opcode. CHECK_CONDITION will be sent + * in iscsit_execute_cmd() during the CmdSN OOO Execution + * Mechinism. + */ + send_check_condition = 1; + } else { + if (iscsit_decide_list_to_build(cmd, payload_length) < 0) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + } + +attach_cmd: + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + /* + * Check if we need to delay processing because of ALUA + * Active/NonOptimized primary access state.. + */ + core_alua_check_nonop_delay(&cmd->se_cmd); + /* + * Allocate and setup SGL used with transport_generic_map_mem_to_cmd(). + * also call iscsit_allocate_iovecs() + */ + ret = iscsit_alloc_buffs(cmd); + if (ret < 0) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + /* + * Check the CmdSN against ExpCmdSN/MaxCmdSN here if + * the Immediate Bit is not set, and no Immediate + * Data is attached. + * + * A PDU/CmdSN carrying Immediate Data can only + * be processed after the DataCRC has passed. + * If the DataCRC fails, the CmdSN MUST NOT + * be acknowledged. (See below) + */ + if (!cmd->immediate_data) { + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + /* + * If no Immediate Data is attached, it's OK to return now. + */ + if (!cmd->immediate_data) { + if (send_check_condition) + return 0; + + if (cmd->unsolicited_data) { + iscsit_set_dataout_sequence_values(cmd); + + spin_lock_bh(&cmd->dataout_timeout_lock); + iscsit_start_dataout_timer(cmd, cmd->conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + } + + return 0; + } + + /* + * Early CHECK_CONDITIONs never make it to the transport processing + * thread. They are processed in CmdSN order by + * iscsit_check_received_cmdsn() below. + */ + if (send_check_condition) { + immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; + dump_immediate_data = 1; + goto after_immediate_data; + } + /* + * Call directly into transport_generic_new_cmd() to perform + * the backend memory allocation. + */ + ret = transport_generic_new_cmd(&cmd->se_cmd); + if ((ret < 0) || (cmd->se_cmd.se_cmd_flags & SCF_SE_CMD_FAILED)) { + immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; + dump_immediate_data = 1; + goto after_immediate_data; + } + + immed_ret = iscsit_handle_immediate_data(cmd, buf, payload_length); +after_immediate_data: + if (immed_ret == IMMEDIATE_DATA_NORMAL_OPERATION) { + /* + * A PDU/CmdSN carrying Immediate Data passed + * DataCRC, check against ExpCmdSN/MaxCmdSN if + * Immediate Bit is not set. + */ + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + /* + * Special case for Unsupported SAM WRITE Opcodes + * and ImmediateData=Yes. + */ + if (dump_immediate_data) { + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return -1; + } else if (cmd->unsolicited_data) { + iscsit_set_dataout_sequence_values(cmd); + + spin_lock_bh(&cmd->dataout_timeout_lock); + iscsit_start_dataout_timer(cmd, cmd->conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + } + + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + + } else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) { + /* + * Immediate Data failed DataCRC and ERL>=1, + * silently drop this PDU and let the initiator + * plug the CmdSN gap. + * + * FIXME: Send Unsolicited NOPIN with reserved + * TTT here to help the initiator figure out + * the missing CmdSN, although they should be + * intelligent enough to determine the missing + * CmdSN and issue a retry to plug the sequence. + */ + cmd->i_state = ISTATE_REMOVE; + iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); + } else /* immed_ret == IMMEDIATE_DATA_CANNOT_RECOVER */ + return -1; + + return 0; +} + +static u32 iscsit_do_crypto_hash_sg( + struct hash_desc *hash, + struct iscsi_cmd *cmd, + u32 data_offset, + u32 data_length, + u32 padding, + u8 *pad_bytes) +{ + u32 data_crc; + u32 i; + struct scatterlist *sg; + unsigned int page_off; + + crypto_hash_init(hash); + + sg = cmd->first_data_sg; + page_off = cmd->first_data_sg_off; + + i = 0; + while (data_length) { + u32 cur_len = min_t(u32, data_length, (sg[i].length - page_off)); + + crypto_hash_update(hash, &sg[i], cur_len); + + data_length -= cur_len; + page_off = 0; + i++; + } + + if (padding) { + struct scatterlist pad_sg; + + sg_init_one(&pad_sg, pad_bytes, padding); + crypto_hash_update(hash, &pad_sg, padding); + } + crypto_hash_final(hash, (u8 *) &data_crc); + + return data_crc; +} + +static void iscsit_do_crypto_hash_buf( + struct hash_desc *hash, + unsigned char *buf, + u32 payload_length, + u32 padding, + u8 *pad_bytes, + u8 *data_crc) +{ + struct scatterlist sg; + + crypto_hash_init(hash); + + sg_init_one(&sg, (u8 *)buf, payload_length); + crypto_hash_update(hash, &sg, payload_length); + + if (padding) { + sg_init_one(&sg, pad_bytes, padding); + crypto_hash_update(hash, &sg, padding); + } + crypto_hash_final(hash, data_crc); +} + +static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf) +{ + int iov_ret, ooo_cmdsn = 0, ret; + u8 data_crc_failed = 0; + u32 checksum, iov_count = 0, padding = 0, rx_got = 0; + u32 rx_size = 0, payload_length; + struct iscsi_cmd *cmd = NULL; + struct se_cmd *se_cmd; + struct iscsi_data *hdr; + struct kvec *iov; + unsigned long flags; + + hdr = (struct iscsi_data *) buf; + payload_length = ntoh24(hdr->dlength); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->ttt = be32_to_cpu(hdr->ttt); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + hdr->datasn = be32_to_cpu(hdr->datasn); + hdr->offset = be32_to_cpu(hdr->offset); + + if (!payload_length) { + pr_err("DataOUT payload is ZERO, protocol error.\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + /* iSCSI write */ + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->rx_data_octets += payload_length; + if (conn->sess->se_sess->se_node_acl) { + spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); + conn->sess->se_sess->se_node_acl->write_bytes += payload_length; + spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); + } + spin_unlock_bh(&conn->sess->session_stats_lock); + + if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("DataSegmentLength: %u is greater than" + " MaxRecvDataSegmentLength: %u\n", payload_length, + conn->conn_ops->MaxRecvDataSegmentLength); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt, + payload_length); + if (!cmd) + return 0; + + pr_debug("Got DataOut ITT: 0x%08x, TTT: 0x%08x," + " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n", + hdr->itt, hdr->ttt, hdr->datasn, hdr->offset, + payload_length, conn->cid); + + if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) { + pr_err("Command ITT: 0x%08x received DataOUT after" + " last DataOUT received, dumping payload\n", + cmd->init_task_tag); + return iscsit_dump_data_payload(conn, payload_length, 1); + } + + if (cmd->data_direction != DMA_TO_DEVICE) { + pr_err("Command ITT: 0x%08x received DataOUT for a" + " NON-WRITE command.\n", cmd->init_task_tag); + return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + se_cmd = &cmd->se_cmd; + iscsit_mod_dataout_timer(cmd); + + if ((hdr->offset + payload_length) > cmd->data_length) { + pr_err("DataOut Offset: %u, Length %u greater than" + " iSCSI Command EDTL %u, protocol error.\n", + hdr->offset, payload_length, cmd->data_length); + return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, + 1, 0, buf, cmd); + } + + if (cmd->unsolicited_data) { + int dump_unsolicited_data = 0; + + if (conn->sess->sess_ops->InitialR2T) { + pr_err("Received unexpected unsolicited data" + " while InitialR2T=Yes, protocol error.\n"); + transport_send_check_condition_and_sense(&cmd->se_cmd, + TCM_UNEXPECTED_UNSOLICITED_DATA, 0); + return -1; + } + /* + * Special case for dealing with Unsolicited DataOUT + * and Unsupported SAM WRITE Opcodes and SE resource allocation + * failures; + */ + + /* Something's amiss if we're not in WRITE_PENDING state... */ + spin_lock_irqsave(&se_cmd->t_state_lock, flags); + WARN_ON(se_cmd->t_state != TRANSPORT_WRITE_PENDING); + spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); + + spin_lock_irqsave(&se_cmd->t_state_lock, flags); + if (!(se_cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) || + (se_cmd->se_cmd_flags & SCF_SE_CMD_FAILED)) + dump_unsolicited_data = 1; + spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); + + if (dump_unsolicited_data) { + /* + * Check if a delayed TASK_ABORTED status needs to + * be sent now if the ISCSI_FLAG_CMD_FINAL has been + * received with the unsolicitied data out. + */ + if (hdr->flags & ISCSI_FLAG_CMD_FINAL) + iscsit_stop_dataout_timer(cmd); + + transport_check_aborted_status(se_cmd, + (hdr->flags & ISCSI_FLAG_CMD_FINAL)); + return iscsit_dump_data_payload(conn, payload_length, 1); + } + } else { + /* + * For the normal solicited data path: + * + * Check for a delayed TASK_ABORTED status and dump any + * incoming data out payload if one exists. Also, when the + * ISCSI_FLAG_CMD_FINAL is set to denote the end of the current + * data out sequence, we decrement outstanding_r2ts. Once + * outstanding_r2ts reaches zero, go ahead and send the delayed + * TASK_ABORTED status. + */ + if (atomic_read(&se_cmd->t_transport_aborted) != 0) { + if (hdr->flags & ISCSI_FLAG_CMD_FINAL) + if (--cmd->outstanding_r2ts < 1) { + iscsit_stop_dataout_timer(cmd); + transport_check_aborted_status( + se_cmd, 1); + } + + return iscsit_dump_data_payload(conn, payload_length, 1); + } + } + /* + * Preform DataSN, DataSequenceInOrder, DataPDUInOrder, and + * within-command recovery checks before receiving the payload. + */ + ret = iscsit_check_pre_dataout(cmd, buf); + if (ret == DATAOUT_WITHIN_COMMAND_RECOVERY) + return 0; + else if (ret == DATAOUT_CANNOT_RECOVER) + return -1; + + rx_size += payload_length; + iov = &cmd->iov_data[0]; + + iov_ret = iscsit_map_iovec(cmd, iov, hdr->offset, payload_length); + if (iov_ret < 0) + return -1; + + iov_count += iov_ret; + + padding = ((-payload_length) & 3); + if (padding != 0) { + iov[iov_count].iov_base = cmd->pad_bytes; + iov[iov_count++].iov_len = padding; + rx_size += padding; + pr_debug("Receiving %u padding bytes.\n", padding); + } + + if (conn->conn_ops->DataDigest) { + iov[iov_count].iov_base = &checksum; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + rx_size += ISCSI_CRC_LEN; + } + + rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size); + + iscsit_unmap_iovec(cmd); + + if (rx_got != rx_size) + return -1; + + if (conn->conn_ops->DataDigest) { + u32 data_crc; + + data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd, + hdr->offset, payload_length, padding, + cmd->pad_bytes); + + if (checksum != data_crc) { + pr_err("ITT: 0x%08x, Offset: %u, Length: %u," + " DataSN: 0x%08x, CRC32C DataDigest 0x%08x" + " does not match computed 0x%08x\n", + hdr->itt, hdr->offset, payload_length, + hdr->datasn, checksum, data_crc); + data_crc_failed = 1; + } else { + pr_debug("Got CRC32C DataDigest 0x%08x for" + " %u bytes of Data Out\n", checksum, + payload_length); + } + } + /* + * Increment post receive data and CRC values or perform + * within-command recovery. + */ + ret = iscsit_check_post_dataout(cmd, buf, data_crc_failed); + if ((ret == DATAOUT_NORMAL) || (ret == DATAOUT_WITHIN_COMMAND_RECOVERY)) + return 0; + else if (ret == DATAOUT_SEND_R2T) { + iscsit_set_dataout_sequence_values(cmd); + iscsit_build_r2ts_for_cmd(cmd, conn, 0); + } else if (ret == DATAOUT_SEND_TO_TRANSPORT) { + /* + * Handle extra special case for out of order + * Unsolicited Data Out. + */ + spin_lock_bh(&cmd->istate_lock); + ooo_cmdsn = (cmd->cmd_flags & ICF_OOO_CMDSN); + cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; + cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; + spin_unlock_bh(&cmd->istate_lock); + + iscsit_stop_dataout_timer(cmd); + return (!ooo_cmdsn) ? transport_generic_handle_data( + &cmd->se_cmd) : 0; + } else /* DATAOUT_CANNOT_RECOVER */ + return -1; + + return 0; +} + +static int iscsit_handle_nop_out( + struct iscsi_conn *conn, + unsigned char *buf) +{ + unsigned char *ping_data = NULL; + int cmdsn_ret, niov = 0, ret = 0, rx_got, rx_size; + u32 checksum, data_crc, padding = 0, payload_length; + u64 lun; + struct iscsi_cmd *cmd = NULL; + struct kvec *iov = NULL; + struct iscsi_nopout *hdr; + + hdr = (struct iscsi_nopout *) buf; + payload_length = ntoh24(hdr->dlength); + lun = get_unaligned_le64(&hdr->lun); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->ttt = be32_to_cpu(hdr->ttt); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + + if ((hdr->itt == 0xFFFFFFFF) && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) { + pr_err("NOPOUT ITT is reserved, but Immediate Bit is" + " not set, protocol error.\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("NOPOUT Ping Data DataSegmentLength: %u is" + " greater than MaxRecvDataSegmentLength: %u, protocol" + " error.\n", payload_length, + conn->conn_ops->MaxRecvDataSegmentLength); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + pr_debug("Got NOPOUT Ping %s ITT: 0x%08x, TTT: 0x%09x," + " CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n", + (hdr->itt == 0xFFFFFFFF) ? "Response" : "Request", + hdr->itt, hdr->ttt, hdr->cmdsn, hdr->exp_statsn, + payload_length); + /* + * This is not a response to a Unsolicited NopIN, which means + * it can either be a NOPOUT ping request (with a valid ITT), + * or a NOPOUT not requesting a NOPIN (with a reserved ITT). + * Either way, make sure we allocate an struct iscsi_cmd, as both + * can contain ping data. + */ + if (hdr->ttt == 0xFFFFFFFF) { + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return iscsit_add_reject( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, buf, conn); + + cmd->iscsi_opcode = ISCSI_OP_NOOP_OUT; + cmd->i_state = ISTATE_SEND_NOPIN; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? + 1 : 0); + conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + cmd->data_direction = DMA_NONE; + } + + if (payload_length && (hdr->ttt == 0xFFFFFFFF)) { + rx_size = payload_length; + ping_data = kzalloc(payload_length + 1, GFP_KERNEL); + if (!ping_data) { + pr_err("Unable to allocate memory for" + " NOPOUT ping data.\n"); + ret = -1; + goto out; + } + + iov = &cmd->iov_misc[0]; + iov[niov].iov_base = ping_data; + iov[niov++].iov_len = payload_length; + + padding = ((-payload_length) & 3); + if (padding != 0) { + pr_debug("Receiving %u additional bytes" + " for padding.\n", padding); + iov[niov].iov_base = &cmd->pad_bytes; + iov[niov++].iov_len = padding; + rx_size += padding; + } + if (conn->conn_ops->DataDigest) { + iov[niov].iov_base = &checksum; + iov[niov++].iov_len = ISCSI_CRC_LEN; + rx_size += ISCSI_CRC_LEN; + } + + rx_got = rx_data(conn, &cmd->iov_misc[0], niov, rx_size); + if (rx_got != rx_size) { + ret = -1; + goto out; + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, + ping_data, payload_length, + padding, cmd->pad_bytes, + (u8 *)&data_crc); + + if (checksum != data_crc) { + pr_err("Ping data CRC32C DataDigest" + " 0x%08x does not match computed 0x%08x\n", + checksum, data_crc); + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from" + " NOPOUT Ping DataCRC failure while in" + " ERL=0.\n"); + ret = -1; + goto out; + } else { + /* + * Silently drop this PDU and let the + * initiator plug the CmdSN gap. + */ + pr_debug("Dropping NOPOUT" + " Command CmdSN: 0x%08x due to" + " DataCRC error.\n", hdr->cmdsn); + ret = 0; + goto out; + } + } else { + pr_debug("Got CRC32C DataDigest" + " 0x%08x for %u bytes of ping data.\n", + checksum, payload_length); + } + } + + ping_data[payload_length] = '\0'; + /* + * Attach ping data to struct iscsi_cmd->buf_ptr. + */ + cmd->buf_ptr = (void *)ping_data; + cmd->buf_ptr_size = payload_length; + + pr_debug("Got %u bytes of NOPOUT ping" + " data.\n", payload_length); + pr_debug("Ping Data: \"%s\"\n", ping_data); + } + + if (hdr->itt != 0xFFFFFFFF) { + if (!cmd) { + pr_err("Checking CmdSN for NOPOUT," + " but cmd is NULL!\n"); + return -1; + } + /* + * Initiator is expecting a NopIN ping reply, + */ + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + if (hdr->opcode & ISCSI_OP_IMMEDIATE) { + iscsit_add_cmd_to_response_queue(cmd, conn, + cmd->i_state); + return 0; + } + + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + ret = 0; + goto ping_out; + } + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + + return 0; + } + + if (hdr->ttt != 0xFFFFFFFF) { + /* + * This was a response to a unsolicited NOPIN ping. + */ + cmd = iscsit_find_cmd_from_ttt(conn, hdr->ttt); + if (!cmd) + return -1; + + iscsit_stop_nopin_response_timer(conn); + + cmd->i_state = ISTATE_REMOVE; + iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); + iscsit_start_nopin_timer(conn); + } else { + /* + * Initiator is not expecting a NOPIN is response. + * Just ignore for now. + * + * iSCSI v19-91 10.18 + * "A NOP-OUT may also be used to confirm a changed + * ExpStatSN if another PDU will not be available + * for a long time." + */ + ret = 0; + goto out; + } + + return 0; +out: + if (cmd) + iscsit_release_cmd(cmd); +ping_out: + kfree(ping_data); + return ret; +} + +static int iscsit_handle_task_mgt_cmd( + struct iscsi_conn *conn, + unsigned char *buf) +{ + struct iscsi_cmd *cmd; + struct se_tmr_req *se_tmr; + struct iscsi_tmr_req *tmr_req; + struct iscsi_tm *hdr; + u32 payload_length; + int out_of_order_cmdsn = 0; + int ret; + u8 function; + + hdr = (struct iscsi_tm *) buf; + payload_length = ntoh24(hdr->dlength); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->rtt = be32_to_cpu(hdr->rtt); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + hdr->refcmdsn = be32_to_cpu(hdr->refcmdsn); + hdr->exp_datasn = be32_to_cpu(hdr->exp_datasn); + hdr->flags &= ~ISCSI_FLAG_CMD_FINAL; + function = hdr->flags; + + pr_debug("Got Task Management Request ITT: 0x%08x, CmdSN:" + " 0x%08x, Function: 0x%02x, RefTaskTag: 0x%08x, RefCmdSN:" + " 0x%08x, CID: %hu\n", hdr->itt, hdr->cmdsn, function, + hdr->rtt, hdr->refcmdsn, conn->cid); + + if ((function != ISCSI_TM_FUNC_ABORT_TASK) && + ((function != ISCSI_TM_FUNC_TASK_REASSIGN) && + (hdr->rtt != ISCSI_RESERVED_TAG))) { + pr_err("RefTaskTag should be set to 0xFFFFFFFF.\n"); + hdr->rtt = ISCSI_RESERVED_TAG; + } + + if ((function == ISCSI_TM_FUNC_TASK_REASSIGN) && + !(hdr->opcode & ISCSI_OP_IMMEDIATE)) { + pr_err("Task Management Request TASK_REASSIGN not" + " issued as immediate command, bad iSCSI Initiator" + "implementation\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + if ((function != ISCSI_TM_FUNC_ABORT_TASK) && + (hdr->refcmdsn != ISCSI_RESERVED_TAG)) + hdr->refcmdsn = ISCSI_RESERVED_TAG; + + cmd = iscsit_allocate_se_cmd_for_tmr(conn, function); + if (!cmd) + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, buf, conn); + + cmd->iscsi_opcode = ISCSI_OP_SCSI_TMFUNC; + cmd->i_state = ISTATE_SEND_TASKMGTRSP; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); + cmd->init_task_tag = hdr->itt; + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + se_tmr = cmd->se_cmd.se_tmr_req; + tmr_req = cmd->tmr_req; + /* + * Locate the struct se_lun for all TMRs not related to ERL=2 TASK_REASSIGN + */ + if (function != ISCSI_TM_FUNC_TASK_REASSIGN) { + ret = iscsit_get_lun_for_tmr(cmd, + get_unaligned_le64(&hdr->lun)); + if (ret < 0) { + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + se_tmr->response = ISCSI_TMF_RSP_NO_LUN; + goto attach; + } + } + + switch (function) { + case ISCSI_TM_FUNC_ABORT_TASK: + se_tmr->response = iscsit_tmr_abort_task(cmd, buf); + if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE) { + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + goto attach; + } + break; + case ISCSI_TM_FUNC_ABORT_TASK_SET: + case ISCSI_TM_FUNC_CLEAR_ACA: + case ISCSI_TM_FUNC_CLEAR_TASK_SET: + case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: + break; + case ISCSI_TM_FUNC_TARGET_WARM_RESET: + if (iscsit_tmr_task_warm_reset(conn, tmr_req, buf) < 0) { + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED; + goto attach; + } + break; + case ISCSI_TM_FUNC_TARGET_COLD_RESET: + if (iscsit_tmr_task_cold_reset(conn, tmr_req, buf) < 0) { + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED; + goto attach; + } + break; + case ISCSI_TM_FUNC_TASK_REASSIGN: + se_tmr->response = iscsit_tmr_task_reassign(cmd, buf); + /* + * Perform sanity checks on the ExpDataSN only if the + * TASK_REASSIGN was successful. + */ + if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE) + break; + + if (iscsit_check_task_reassign_expdatasn(tmr_req, conn) < 0) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_INVALID, 1, 1, + buf, cmd); + break; + default: + pr_err("Unknown TMR function: 0x%02x, protocol" + " error.\n", function); + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + se_tmr->response = ISCSI_TMF_RSP_NOT_SUPPORTED; + goto attach; + } + + if ((function != ISCSI_TM_FUNC_TASK_REASSIGN) && + (se_tmr->response == ISCSI_TMF_RSP_COMPLETE)) + se_tmr->call_transport = 1; +attach: + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { + int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP) + out_of_order_cmdsn = 1; + else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + return 0; + } else { /* (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) */ + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + } + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + if (out_of_order_cmdsn) + return 0; + /* + * Found the referenced task, send to transport for processing. + */ + if (se_tmr->call_transport) + return transport_generic_handle_tmr(&cmd->se_cmd); + + /* + * Could not find the referenced LUN, task, or Task Management + * command not authorized or supported. Change state and + * let the tx_thread send the response. + * + * For connection recovery, this is also the default action for + * TMR TASK_REASSIGN. + */ + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; +} + +/* #warning FIXME: Support Text Command parameters besides SendTargets */ +static int iscsit_handle_text_cmd( + struct iscsi_conn *conn, + unsigned char *buf) +{ + char *text_ptr, *text_in; + int cmdsn_ret, niov = 0, rx_got, rx_size; + u32 checksum = 0, data_crc = 0, payload_length; + u32 padding = 0, text_length = 0; + struct iscsi_cmd *cmd; + struct kvec iov[3]; + struct iscsi_text *hdr; + + hdr = (struct iscsi_text *) buf; + payload_length = ntoh24(hdr->dlength); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->ttt = be32_to_cpu(hdr->ttt); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + + if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("Unable to accept text parameter length: %u" + "greater than MaxRecvDataSegmentLength %u.\n", + payload_length, conn->conn_ops->MaxRecvDataSegmentLength); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x," + " ExpStatSN: 0x%08x, Length: %u\n", hdr->itt, hdr->cmdsn, + hdr->exp_statsn, payload_length); + + rx_size = text_length = payload_length; + if (text_length) { + text_in = kzalloc(text_length, GFP_KERNEL); + if (!text_in) { + pr_err("Unable to allocate memory for" + " incoming text parameters\n"); + return -1; + } + + memset(iov, 0, 3 * sizeof(struct kvec)); + iov[niov].iov_base = text_in; + iov[niov++].iov_len = text_length; + + padding = ((-payload_length) & 3); + if (padding != 0) { + iov[niov].iov_base = cmd->pad_bytes; + iov[niov++].iov_len = padding; + rx_size += padding; + pr_debug("Receiving %u additional bytes" + " for padding.\n", padding); + } + if (conn->conn_ops->DataDigest) { + iov[niov].iov_base = &checksum; + iov[niov++].iov_len = ISCSI_CRC_LEN; + rx_size += ISCSI_CRC_LEN; + } + + rx_got = rx_data(conn, &iov[0], niov, rx_size); + if (rx_got != rx_size) { + kfree(text_in); + return -1; + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, + text_in, text_length, + padding, cmd->pad_bytes, + (u8 *)&data_crc); + + if (checksum != data_crc) { + pr_err("Text data CRC32C DataDigest" + " 0x%08x does not match computed" + " 0x%08x\n", checksum, data_crc); + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from" + " Text Data digest failure while in" + " ERL=0.\n"); + kfree(text_in); + return -1; + } else { + /* + * Silently drop this PDU and let the + * initiator plug the CmdSN gap. + */ + pr_debug("Dropping Text" + " Command CmdSN: 0x%08x due to" + " DataCRC error.\n", hdr->cmdsn); + kfree(text_in); + return 0; + } + } else { + pr_debug("Got CRC32C DataDigest" + " 0x%08x for %u bytes of text data.\n", + checksum, text_length); + } + } + text_in[text_length - 1] = '\0'; + pr_debug("Successfully read %d bytes of text" + " data.\n", text_length); + + if (strncmp("SendTargets", text_in, 11) != 0) { + pr_err("Received Text Data that is not" + " SendTargets, cannot continue.\n"); + kfree(text_in); + return -1; + } + text_ptr = strchr(text_in, '='); + if (!text_ptr) { + pr_err("No \"=\" separator found in Text Data," + " cannot continue.\n"); + kfree(text_in); + return -1; + } + if (strncmp("=All", text_ptr, 4) != 0) { + pr_err("Unable to locate All value for" + " SendTargets key, cannot continue.\n"); + kfree(text_in); + return -1; + } +/*#warning Support SendTargets=(iSCSI Target Name/Nothing) values. */ + kfree(text_in); + } + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, buf, conn); + + cmd->iscsi_opcode = ISCSI_OP_TEXT; + cmd->i_state = ISTATE_SEND_TEXTRSP; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); + conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + cmd->data_direction = DMA_NONE; + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + + return 0; + } + + return iscsit_execute_cmd(cmd, 0); +} + +int iscsit_logout_closesession(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +{ + struct iscsi_conn *conn_p; + struct iscsi_session *sess = conn->sess; + + pr_debug("Received logout request CLOSESESSION on CID: %hu" + " for SID: %u.\n", conn->cid, conn->sess->sid); + + atomic_set(&sess->session_logout, 1); + atomic_set(&conn->conn_logout_remove, 1); + conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_SESSION; + + iscsit_inc_conn_usage_count(conn); + iscsit_inc_session_usage_count(sess); + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn_p, &sess->sess_conn_list, conn_list) { + if (conn_p->conn_state != TARG_CONN_STATE_LOGGED_IN) + continue; + + pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n"); + conn_p->conn_state = TARG_CONN_STATE_IN_LOGOUT; + } + spin_unlock_bh(&sess->conn_lock); + + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + return 0; +} + +int iscsit_logout_closeconnection(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +{ + struct iscsi_conn *l_conn; + struct iscsi_session *sess = conn->sess; + + pr_debug("Received logout request CLOSECONNECTION for CID:" + " %hu on CID: %hu.\n", cmd->logout_cid, conn->cid); + + /* + * A Logout Request with a CLOSECONNECTION reason code for a CID + * can arrive on a connection with a differing CID. + */ + if (conn->cid == cmd->logout_cid) { + spin_lock_bh(&conn->state_lock); + pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n"); + conn->conn_state = TARG_CONN_STATE_IN_LOGOUT; + + atomic_set(&conn->conn_logout_remove, 1); + conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_CONNECTION; + iscsit_inc_conn_usage_count(conn); + + spin_unlock_bh(&conn->state_lock); + } else { + /* + * Handle all different cid CLOSECONNECTION requests in + * iscsit_logout_post_handler_diffcid() as to give enough + * time for any non immediate command's CmdSN to be + * acknowledged on the connection in question. + * + * Here we simply make sure the CID is still around. + */ + l_conn = iscsit_get_conn_from_cid(sess, + cmd->logout_cid); + if (!l_conn) { + cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND; + iscsit_add_cmd_to_response_queue(cmd, conn, + cmd->i_state); + return 0; + } + + iscsit_dec_conn_usage_count(l_conn); + } + + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + return 0; +} + +int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + + pr_debug("Received explicit REMOVECONNFORRECOVERY logout for" + " CID: %hu on CID: %hu.\n", cmd->logout_cid, conn->cid); + + if (sess->sess_ops->ErrorRecoveryLevel != 2) { + pr_err("Received Logout Request REMOVECONNFORRECOVERY" + " while ERL!=2.\n"); + cmd->logout_response = ISCSI_LOGOUT_RECOVERY_UNSUPPORTED; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; + } + + if (conn->cid == cmd->logout_cid) { + pr_err("Received Logout Request REMOVECONNFORRECOVERY" + " with CID: %hu on CID: %hu, implementation error.\n", + cmd->logout_cid, conn->cid); + cmd->logout_response = ISCSI_LOGOUT_CLEANUP_FAILED; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; + } + + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + return 0; +} + +static int iscsit_handle_logout_cmd( + struct iscsi_conn *conn, + unsigned char *buf) +{ + int cmdsn_ret, logout_remove = 0; + u8 reason_code = 0; + struct iscsi_cmd *cmd; + struct iscsi_logout *hdr; + struct iscsi_tiqn *tiqn = iscsit_snmp_get_tiqn(conn); + + hdr = (struct iscsi_logout *) buf; + reason_code = (hdr->flags & 0x7f); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->cid = be16_to_cpu(hdr->cid); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + + if (tiqn) { + spin_lock(&tiqn->logout_stats.lock); + if (reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION) + tiqn->logout_stats.normal_logouts++; + else + tiqn->logout_stats.abnormal_logouts++; + spin_unlock(&tiqn->logout_stats.lock); + } + + pr_debug("Got Logout Request ITT: 0x%08x CmdSN: 0x%08x" + " ExpStatSN: 0x%08x Reason: 0x%02x CID: %hu on CID: %hu\n", + hdr->itt, hdr->cmdsn, hdr->exp_statsn, reason_code, + hdr->cid, conn->cid); + + if (conn->conn_state != TARG_CONN_STATE_LOGGED_IN) { + pr_err("Received logout request on connection that" + " is not in logged in state, ignoring request.\n"); + return 0; + } + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, + buf, conn); + + cmd->iscsi_opcode = ISCSI_OP_LOGOUT; + cmd->i_state = ISTATE_SEND_LOGOUTRSP; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); + conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + cmd->logout_cid = hdr->cid; + cmd->logout_reason = reason_code; + cmd->data_direction = DMA_NONE; + + /* + * We need to sleep in these cases (by returning 1) until the Logout + * Response gets sent in the tx thread. + */ + if ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION) || + ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) && + (hdr->cid == conn->cid))) + logout_remove = 1; + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + if (reason_code != ISCSI_LOGOUT_REASON_RECOVERY) + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + /* + * Immediate commands are executed, well, immediately. + * Non-Immediate Logout Commands are executed in CmdSN order. + */ + if (hdr->opcode & ISCSI_OP_IMMEDIATE) { + int ret = iscsit_execute_cmd(cmd, 0); + + if (ret < 0) + return ret; + } else { + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + logout_remove = 0; + } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + } + + return logout_remove; +} + +static int iscsit_handle_snack( + struct iscsi_conn *conn, + unsigned char *buf) +{ + u32 unpacked_lun; + u64 lun; + struct iscsi_snack *hdr; + + hdr = (struct iscsi_snack *) buf; + hdr->flags &= ~ISCSI_FLAG_CMD_FINAL; + lun = get_unaligned_le64(&hdr->lun); + unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->ttt = be32_to_cpu(hdr->ttt); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + hdr->begrun = be32_to_cpu(hdr->begrun); + hdr->runlength = be32_to_cpu(hdr->runlength); + + pr_debug("Got ISCSI_INIT_SNACK, ITT: 0x%08x, ExpStatSN:" + " 0x%08x, Type: 0x%02x, BegRun: 0x%08x, RunLength: 0x%08x," + " CID: %hu\n", hdr->itt, hdr->exp_statsn, hdr->flags, + hdr->begrun, hdr->runlength, conn->cid); + + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Initiator sent SNACK request while in" + " ErrorRecoveryLevel=0.\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + /* + * SNACK_DATA and SNACK_R2T are both 0, so check which function to + * call from inside iscsi_send_recovery_datain_or_r2t(). + */ + switch (hdr->flags & ISCSI_FLAG_SNACK_TYPE_MASK) { + case 0: + return iscsit_handle_recovery_datain_or_r2t(conn, buf, + hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength); + return 0; + case ISCSI_FLAG_SNACK_TYPE_STATUS: + return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt, + hdr->begrun, hdr->runlength); + case ISCSI_FLAG_SNACK_TYPE_DATA_ACK: + return iscsit_handle_data_ack(conn, hdr->ttt, hdr->begrun, + hdr->runlength); + case ISCSI_FLAG_SNACK_TYPE_RDATA: + /* FIXME: Support R-Data SNACK */ + pr_err("R-Data SNACK Not Supported.\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + default: + pr_err("Unknown SNACK type 0x%02x, protocol" + " error.\n", hdr->flags & 0x0f); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + return 0; +} + +static void iscsit_rx_thread_wait_for_tcp(struct iscsi_conn *conn) +{ + if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) || + (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) { + wait_for_completion_interruptible_timeout( + &conn->rx_half_close_comp, + ISCSI_RX_THREAD_TCP_TIMEOUT * HZ); + } +} + +static int iscsit_handle_immediate_data( + struct iscsi_cmd *cmd, + unsigned char *buf, + u32 length) +{ + int iov_ret, rx_got = 0, rx_size = 0; + u32 checksum, iov_count = 0, padding = 0; + struct iscsi_conn *conn = cmd->conn; + struct kvec *iov; + + iov_ret = iscsit_map_iovec(cmd, cmd->iov_data, cmd->write_data_done, length); + if (iov_ret < 0) + return IMMEDIATE_DATA_CANNOT_RECOVER; + + rx_size = length; + iov_count = iov_ret; + iov = &cmd->iov_data[0]; + + padding = ((-length) & 3); + if (padding != 0) { + iov[iov_count].iov_base = cmd->pad_bytes; + iov[iov_count++].iov_len = padding; + rx_size += padding; + } + + if (conn->conn_ops->DataDigest) { + iov[iov_count].iov_base = &checksum; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + rx_size += ISCSI_CRC_LEN; + } + + rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size); + + iscsit_unmap_iovec(cmd); + + if (rx_got != rx_size) { + iscsit_rx_thread_wait_for_tcp(conn); + return IMMEDIATE_DATA_CANNOT_RECOVER; + } + + if (conn->conn_ops->DataDigest) { + u32 data_crc; + + data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd, + cmd->write_data_done, length, padding, + cmd->pad_bytes); + + if (checksum != data_crc) { + pr_err("ImmediateData CRC32C DataDigest 0x%08x" + " does not match computed 0x%08x\n", checksum, + data_crc); + + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from" + " Immediate Data digest failure while" + " in ERL=0.\n"); + iscsit_add_reject_from_cmd( + ISCSI_REASON_DATA_DIGEST_ERROR, + 1, 0, buf, cmd); + return IMMEDIATE_DATA_CANNOT_RECOVER; + } else { + iscsit_add_reject_from_cmd( + ISCSI_REASON_DATA_DIGEST_ERROR, + 0, 0, buf, cmd); + return IMMEDIATE_DATA_ERL1_CRC_FAILURE; + } + } else { + pr_debug("Got CRC32C DataDigest 0x%08x for" + " %u bytes of Immediate Data\n", checksum, + length); + } + } + + cmd->write_data_done += length; + + if (cmd->write_data_done == cmd->data_length) { + spin_lock_bh(&cmd->istate_lock); + cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; + cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; + spin_unlock_bh(&cmd->istate_lock); + } + + return IMMEDIATE_DATA_NORMAL_OPERATION; +} + +/* + * Called with sess->conn_lock held. + */ +/* #warning iscsi_build_conn_drop_async_message() only sends out on connections + with active network interface */ +static void iscsit_build_conn_drop_async_message(struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd; + struct iscsi_conn *conn_p; + + /* + * Only send a Asynchronous Message on connections whos network + * interface is still functional. + */ + list_for_each_entry(conn_p, &conn->sess->sess_conn_list, conn_list) { + if (conn_p->conn_state == TARG_CONN_STATE_LOGGED_IN) { + iscsit_inc_conn_usage_count(conn_p); + break; + } + } + + if (!conn_p) + return; + + cmd = iscsit_allocate_cmd(conn_p, GFP_KERNEL); + if (!cmd) { + iscsit_dec_conn_usage_count(conn_p); + return; + } + + cmd->logout_cid = conn->cid; + cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT; + cmd->i_state = ISTATE_SEND_ASYNCMSG; + + spin_lock_bh(&conn_p->cmd_lock); + list_add_tail(&cmd->i_list, &conn_p->conn_cmd_list); + spin_unlock_bh(&conn_p->cmd_lock); + + iscsit_add_cmd_to_response_queue(cmd, conn_p, cmd->i_state); + iscsit_dec_conn_usage_count(conn_p); +} + +static int iscsit_send_conn_drop_async_message( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_async *hdr; + + cmd->tx_size = ISCSI_HDR_LEN; + cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT; + + hdr = (struct iscsi_async *) cmd->pdu; + hdr->opcode = ISCSI_OP_ASYNC_EVENT; + hdr->flags = ISCSI_FLAG_CMD_FINAL; + cmd->init_task_tag = 0xFFFFFFFF; + cmd->targ_xfer_tag = 0xFFFFFFFF; + put_unaligned_be64(0xFFFFFFFFFFFFFFFFULL, &hdr->rsvd4[0]); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->async_event = ISCSI_ASYNC_MSG_DROPPING_CONNECTION; + hdr->param1 = cpu_to_be16(cmd->logout_cid); + hdr->param2 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Wait); + hdr->param3 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Retain); + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + cmd->tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32C HeaderDigest to" + " Async Message 0x%08x\n", *header_digest); + } + + cmd->iov_misc[0].iov_base = cmd->pdu; + cmd->iov_misc[0].iov_len = cmd->tx_size; + cmd->iov_misc_count = 1; + + pr_debug("Sending Connection Dropped Async Message StatSN:" + " 0x%08x, for CID: %hu on CID: %hu\n", cmd->stat_sn, + cmd->logout_cid, conn->cid); + return 0; +} + +static int iscsit_send_data_in( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + int *eodr) +{ + int iov_ret = 0, set_statsn = 0; + u32 iov_count = 0, tx_size = 0; + struct iscsi_datain datain; + struct iscsi_datain_req *dr; + struct iscsi_data_rsp *hdr; + struct kvec *iov; + + memset(&datain, 0, sizeof(struct iscsi_datain)); + dr = iscsit_get_datain_values(cmd, &datain); + if (!dr) { + pr_err("iscsit_get_datain_values failed for ITT: 0x%08x\n", + cmd->init_task_tag); + return -1; + } + + /* + * Be paranoid and double check the logic for now. + */ + if ((datain.offset + datain.length) > cmd->data_length) { + pr_err("Command ITT: 0x%08x, datain.offset: %u and" + " datain.length: %u exceeds cmd->data_length: %u\n", + cmd->init_task_tag, datain.offset, datain.length, + cmd->data_length); + return -1; + } + + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->tx_data_octets += datain.length; + if (conn->sess->se_sess->se_node_acl) { + spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); + conn->sess->se_sess->se_node_acl->read_bytes += datain.length; + spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); + } + spin_unlock_bh(&conn->sess->session_stats_lock); + /* + * Special case for successfully execution w/ both DATAIN + * and Sense Data. + */ + if ((datain.flags & ISCSI_FLAG_DATA_STATUS) && + (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)) + datain.flags &= ~ISCSI_FLAG_DATA_STATUS; + else { + if ((dr->dr_complete == DATAIN_COMPLETE_NORMAL) || + (dr->dr_complete == DATAIN_COMPLETE_CONNECTION_RECOVERY)) { + iscsit_increment_maxcmdsn(cmd, conn->sess); + cmd->stat_sn = conn->stat_sn++; + set_statsn = 1; + } else if (dr->dr_complete == + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY) + set_statsn = 1; + } + + hdr = (struct iscsi_data_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_SCSI_DATA_IN; + hdr->flags = datain.flags; + if (hdr->flags & ISCSI_FLAG_DATA_STATUS) { + if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) { + hdr->flags |= ISCSI_FLAG_DATA_OVERFLOW; + hdr->residual_count = cpu_to_be32(cmd->residual_count); + } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) { + hdr->flags |= ISCSI_FLAG_DATA_UNDERFLOW; + hdr->residual_count = cpu_to_be32(cmd->residual_count); + } + } + hton24(hdr->dlength, datain.length); + if (hdr->flags & ISCSI_FLAG_DATA_ACK) + int_to_scsilun(cmd->se_cmd.orig_fe_lun, + (struct scsi_lun *)&hdr->lun); + else + put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun); + + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->ttt = (hdr->flags & ISCSI_FLAG_DATA_ACK) ? + cpu_to_be32(cmd->targ_xfer_tag) : + 0xFFFFFFFF; + hdr->statsn = (set_statsn) ? cpu_to_be32(cmd->stat_sn) : + 0xFFFFFFFF; + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->datasn = cpu_to_be32(datain.data_sn); + hdr->offset = cpu_to_be32(datain.offset); + + iov = &cmd->iov_data[0]; + iov[iov_count].iov_base = cmd->pdu; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + tx_size += ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attaching CRC32 HeaderDigest" + " for DataIN PDU 0x%08x\n", *header_digest); + } + + iov_ret = iscsit_map_iovec(cmd, &cmd->iov_data[1], datain.offset, datain.length); + if (iov_ret < 0) + return -1; + + iov_count += iov_ret; + tx_size += datain.length; + + cmd->padding = ((-datain.length) & 3); + if (cmd->padding) { + iov[iov_count].iov_base = cmd->pad_bytes; + iov[iov_count++].iov_len = cmd->padding; + tx_size += cmd->padding; + + pr_debug("Attaching %u padding bytes\n", + cmd->padding); + } + if (conn->conn_ops->DataDigest) { + cmd->data_crc = iscsit_do_crypto_hash_sg(&conn->conn_tx_hash, cmd, + datain.offset, datain.length, cmd->padding, cmd->pad_bytes); + + iov[iov_count].iov_base = &cmd->data_crc; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attached CRC32C DataDigest %d bytes, crc" + " 0x%08x\n", datain.length+cmd->padding, cmd->data_crc); + } + + cmd->iov_data_count = iov_count; + cmd->tx_size = tx_size; + + pr_debug("Built DataIN ITT: 0x%08x, StatSN: 0x%08x," + " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n", + cmd->init_task_tag, ntohl(hdr->statsn), ntohl(hdr->datasn), + ntohl(hdr->offset), datain.length, conn->cid); + + if (dr->dr_complete) { + *eodr = (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ? + 2 : 1; + iscsit_free_datain_req(cmd, dr); + } + + return 0; +} + +static int iscsit_send_logout_response( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int niov = 0, tx_size; + struct iscsi_conn *logout_conn = NULL; + struct iscsi_conn_recovery *cr = NULL; + struct iscsi_session *sess = conn->sess; + struct kvec *iov; + struct iscsi_logout_rsp *hdr; + /* + * The actual shutting down of Sessions and/or Connections + * for CLOSESESSION and CLOSECONNECTION Logout Requests + * is done in scsi_logout_post_handler(). + */ + switch (cmd->logout_reason) { + case ISCSI_LOGOUT_REASON_CLOSE_SESSION: + pr_debug("iSCSI session logout successful, setting" + " logout response to ISCSI_LOGOUT_SUCCESS.\n"); + cmd->logout_response = ISCSI_LOGOUT_SUCCESS; + break; + case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION: + if (cmd->logout_response == ISCSI_LOGOUT_CID_NOT_FOUND) + break; + /* + * For CLOSECONNECTION logout requests carrying + * a matching logout CID -> local CID, the reference + * for the local CID will have been incremented in + * iscsi_logout_closeconnection(). + * + * For CLOSECONNECTION logout requests carrying + * a different CID than the connection it arrived + * on, the connection responding to cmd->logout_cid + * is stopped in iscsit_logout_post_handler_diffcid(). + */ + + pr_debug("iSCSI CID: %hu logout on CID: %hu" + " successful.\n", cmd->logout_cid, conn->cid); + cmd->logout_response = ISCSI_LOGOUT_SUCCESS; + break; + case ISCSI_LOGOUT_REASON_RECOVERY: + if ((cmd->logout_response == ISCSI_LOGOUT_RECOVERY_UNSUPPORTED) || + (cmd->logout_response == ISCSI_LOGOUT_CLEANUP_FAILED)) + break; + /* + * If the connection is still active from our point of view + * force connection recovery to occur. + */ + logout_conn = iscsit_get_conn_from_cid_rcfr(sess, + cmd->logout_cid); + if ((logout_conn)) { + iscsit_connection_reinstatement_rcfr(logout_conn); + iscsit_dec_conn_usage_count(logout_conn); + } + + cr = iscsit_get_inactive_connection_recovery_entry( + conn->sess, cmd->logout_cid); + if (!cr) { + pr_err("Unable to locate CID: %hu for" + " REMOVECONNFORRECOVERY Logout Request.\n", + cmd->logout_cid); + cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND; + break; + } + + iscsit_discard_cr_cmds_by_expstatsn(cr, cmd->exp_stat_sn); + + pr_debug("iSCSI REMOVECONNFORRECOVERY logout" + " for recovery for CID: %hu on CID: %hu successful.\n", + cmd->logout_cid, conn->cid); + cmd->logout_response = ISCSI_LOGOUT_SUCCESS; + break; + default: + pr_err("Unknown cmd->logout_reason: 0x%02x\n", + cmd->logout_reason); + return -1; + } + + tx_size = ISCSI_HDR_LEN; + hdr = (struct iscsi_logout_rsp *)cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_LOGOUT_RSP; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hdr->response = cmd->logout_response; + hdr->itt = cpu_to_be32(cmd->init_task_tag); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + iov[niov].iov_base = cmd->pdu; + iov[niov++].iov_len = ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32C HeaderDigest to" + " Logout Response 0x%08x\n", *header_digest); + } + cmd->iov_misc_count = niov; + cmd->tx_size = tx_size; + + pr_debug("Sending Logout Response ITT: 0x%08x StatSN:" + " 0x%08x Response: 0x%02x CID: %hu on CID: %hu\n", + cmd->init_task_tag, cmd->stat_sn, hdr->response, + cmd->logout_cid, conn->cid); + + return 0; +} + +/* + * Unsolicited NOPIN, either requesting a response or not. + */ +static int iscsit_send_unsolicited_nopin( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + int want_response) +{ + int tx_size = ISCSI_HDR_LEN; + struct iscsi_nopin *hdr; + + hdr = (struct iscsi_nopin *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_NOOP_IN; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); + cmd->stat_sn = conn->stat_sn; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32C HeaderDigest to" + " NopIN 0x%08x\n", *header_digest); + } + + cmd->iov_misc[0].iov_base = cmd->pdu; + cmd->iov_misc[0].iov_len = tx_size; + cmd->iov_misc_count = 1; + cmd->tx_size = tx_size; + + pr_debug("Sending Unsolicited NOPIN TTT: 0x%08x StatSN:" + " 0x%08x CID: %hu\n", hdr->ttt, cmd->stat_sn, conn->cid); + + return 0; +} + +static int iscsit_send_nopin_response( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int niov = 0, tx_size; + u32 padding = 0; + struct kvec *iov; + struct iscsi_nopin *hdr; + + tx_size = ISCSI_HDR_LEN; + hdr = (struct iscsi_nopin *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_NOOP_IN; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hton24(hdr->dlength, cmd->buf_ptr_size); + put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun); + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + iov[niov].iov_base = cmd->pdu; + iov[niov++].iov_len = ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32C HeaderDigest" + " to NopIn 0x%08x\n", *header_digest); + } + + /* + * NOPOUT Ping Data is attached to struct iscsi_cmd->buf_ptr. + * NOPOUT DataSegmentLength is at struct iscsi_cmd->buf_ptr_size. + */ + if (cmd->buf_ptr_size) { + iov[niov].iov_base = cmd->buf_ptr; + iov[niov++].iov_len = cmd->buf_ptr_size; + tx_size += cmd->buf_ptr_size; + + pr_debug("Echoing back %u bytes of ping" + " data.\n", cmd->buf_ptr_size); + + padding = ((-cmd->buf_ptr_size) & 3); + if (padding != 0) { + iov[niov].iov_base = &cmd->pad_bytes; + iov[niov++].iov_len = padding; + tx_size += padding; + pr_debug("Attaching %u additional" + " padding bytes.\n", padding); + } + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + cmd->buf_ptr, cmd->buf_ptr_size, + padding, (u8 *)&cmd->pad_bytes, + (u8 *)&cmd->data_crc); + + iov[niov].iov_base = &cmd->data_crc; + iov[niov++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attached DataDigest for %u" + " bytes of ping data, CRC 0x%08x\n", + cmd->buf_ptr_size, cmd->data_crc); + } + } + + cmd->iov_misc_count = niov; + cmd->tx_size = tx_size; + + pr_debug("Sending NOPIN Response ITT: 0x%08x, TTT:" + " 0x%08x, StatSN: 0x%08x, Length %u\n", cmd->init_task_tag, + cmd->targ_xfer_tag, cmd->stat_sn, cmd->buf_ptr_size); + + return 0; +} + +int iscsit_send_r2t( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int tx_size = 0; + struct iscsi_r2t *r2t; + struct iscsi_r2t_rsp *hdr; + + r2t = iscsit_get_r2t_from_list(cmd); + if (!r2t) + return -1; + + hdr = (struct iscsi_r2t_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_R2T; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + int_to_scsilun(cmd->se_cmd.orig_fe_lun, + (struct scsi_lun *)&hdr->lun); + hdr->itt = cpu_to_be32(cmd->init_task_tag); + spin_lock_bh(&conn->sess->ttt_lock); + r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++; + if (r2t->targ_xfer_tag == 0xFFFFFFFF) + r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++; + spin_unlock_bh(&conn->sess->ttt_lock); + hdr->ttt = cpu_to_be32(r2t->targ_xfer_tag); + hdr->statsn = cpu_to_be32(conn->stat_sn); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->r2tsn = cpu_to_be32(r2t->r2t_sn); + hdr->data_offset = cpu_to_be32(r2t->offset); + hdr->data_length = cpu_to_be32(r2t->xfer_len); + + cmd->iov_misc[0].iov_base = cmd->pdu; + cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN; + tx_size += ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for R2T" + " PDU 0x%08x\n", *header_digest); + } + + pr_debug("Built %sR2T, ITT: 0x%08x, TTT: 0x%08x, StatSN:" + " 0x%08x, R2TSN: 0x%08x, Offset: %u, DDTL: %u, CID: %hu\n", + (!r2t->recovery_r2t) ? "" : "Recovery ", cmd->init_task_tag, + r2t->targ_xfer_tag, ntohl(hdr->statsn), r2t->r2t_sn, + r2t->offset, r2t->xfer_len, conn->cid); + + cmd->iov_misc_count = 1; + cmd->tx_size = tx_size; + + spin_lock_bh(&cmd->r2t_lock); + r2t->sent_r2t = 1; + spin_unlock_bh(&cmd->r2t_lock); + + return 0; +} + +/* + * type 0: Normal Operation. + * type 1: Called from Storage Transport. + * type 2: Called from iscsi_task_reassign_complete_write() for + * connection recovery. + */ +int iscsit_build_r2ts_for_cmd( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + int type) +{ + int first_r2t = 1; + u32 offset = 0, xfer_len = 0; + + spin_lock_bh(&cmd->r2t_lock); + if (cmd->cmd_flags & ICF_SENT_LAST_R2T) { + spin_unlock_bh(&cmd->r2t_lock); + return 0; + } + + if (conn->sess->sess_ops->DataSequenceInOrder && (type != 2)) + if (cmd->r2t_offset < cmd->write_data_done) + cmd->r2t_offset = cmd->write_data_done; + + while (cmd->outstanding_r2ts < conn->sess->sess_ops->MaxOutstandingR2T) { + if (conn->sess->sess_ops->DataSequenceInOrder) { + offset = cmd->r2t_offset; + + if (first_r2t && (type == 2)) { + xfer_len = ((offset + + (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len) > + cmd->data_length) ? + (cmd->data_length - offset) : + (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len)); + } else { + xfer_len = ((offset + + conn->sess->sess_ops->MaxBurstLength) > + cmd->data_length) ? + (cmd->data_length - offset) : + conn->sess->sess_ops->MaxBurstLength; + } + cmd->r2t_offset += xfer_len; + + if (cmd->r2t_offset == cmd->data_length) + cmd->cmd_flags |= ICF_SENT_LAST_R2T; + } else { + struct iscsi_seq *seq; + + seq = iscsit_get_seq_holder_for_r2t(cmd); + if (!seq) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + offset = seq->offset; + xfer_len = seq->xfer_len; + + if (cmd->seq_send_order == cmd->seq_count) + cmd->cmd_flags |= ICF_SENT_LAST_R2T; + } + cmd->outstanding_r2ts++; + first_r2t = 0; + + if (iscsit_add_r2t_to_list(cmd, offset, xfer_len, 0, 0) < 0) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + if (cmd->cmd_flags & ICF_SENT_LAST_R2T) + break; + } + spin_unlock_bh(&cmd->r2t_lock); + + return 0; +} + +static int iscsit_send_status( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + u8 iov_count = 0, recovery; + u32 padding = 0, tx_size = 0; + struct iscsi_scsi_rsp *hdr; + struct kvec *iov; + + recovery = (cmd->i_state != ISTATE_SEND_STATUS); + if (!recovery) + cmd->stat_sn = conn->stat_sn++; + + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->rsp_pdus++; + spin_unlock_bh(&conn->sess->session_stats_lock); + + hdr = (struct iscsi_scsi_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_SCSI_CMD_RSP; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) { + hdr->flags |= ISCSI_FLAG_CMD_OVERFLOW; + hdr->residual_count = cpu_to_be32(cmd->residual_count); + } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) { + hdr->flags |= ISCSI_FLAG_CMD_UNDERFLOW; + hdr->residual_count = cpu_to_be32(cmd->residual_count); + } + hdr->response = cmd->iscsi_response; + hdr->cmd_status = cmd->se_cmd.scsi_status; + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + iov[iov_count].iov_base = cmd->pdu; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + tx_size += ISCSI_HDR_LEN; + + /* + * Attach SENSE DATA payload to iSCSI Response PDU + */ + if (cmd->se_cmd.sense_buffer && + ((cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) || + (cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) { + padding = -(cmd->se_cmd.scsi_sense_length) & 3; + hton24(hdr->dlength, cmd->se_cmd.scsi_sense_length); + iov[iov_count].iov_base = cmd->se_cmd.sense_buffer; + iov[iov_count++].iov_len = + (cmd->se_cmd.scsi_sense_length + padding); + tx_size += cmd->se_cmd.scsi_sense_length; + + if (padding) { + memset(cmd->se_cmd.sense_buffer + + cmd->se_cmd.scsi_sense_length, 0, padding); + tx_size += padding; + pr_debug("Adding %u bytes of padding to" + " SENSE.\n", padding); + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + cmd->se_cmd.sense_buffer, + (cmd->se_cmd.scsi_sense_length + padding), + 0, NULL, (u8 *)&cmd->data_crc); + + iov[iov_count].iov_base = &cmd->data_crc; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attaching CRC32 DataDigest for" + " SENSE, %u bytes CRC 0x%08x\n", + (cmd->se_cmd.scsi_sense_length + padding), + cmd->data_crc); + } + + pr_debug("Attaching SENSE DATA: %u bytes to iSCSI" + " Response PDU\n", + cmd->se_cmd.scsi_sense_length); + } + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for Response" + " PDU 0x%08x\n", *header_digest); + } + + cmd->iov_misc_count = iov_count; + cmd->tx_size = tx_size; + + pr_debug("Built %sSCSI Response, ITT: 0x%08x, StatSN: 0x%08x," + " Response: 0x%02x, SAM Status: 0x%02x, CID: %hu\n", + (!recovery) ? "" : "Recovery ", cmd->init_task_tag, + cmd->stat_sn, 0x00, cmd->se_cmd.scsi_status, conn->cid); + + return 0; +} + +static u8 iscsit_convert_tcm_tmr_rsp(struct se_tmr_req *se_tmr) +{ + switch (se_tmr->response) { + case TMR_FUNCTION_COMPLETE: + return ISCSI_TMF_RSP_COMPLETE; + case TMR_TASK_DOES_NOT_EXIST: + return ISCSI_TMF_RSP_NO_TASK; + case TMR_LUN_DOES_NOT_EXIST: + return ISCSI_TMF_RSP_NO_LUN; + case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED: + return ISCSI_TMF_RSP_NOT_SUPPORTED; + case TMR_FUNCTION_AUTHORIZATION_FAILED: + return ISCSI_TMF_RSP_AUTH_FAILED; + case TMR_FUNCTION_REJECTED: + default: + return ISCSI_TMF_RSP_REJECTED; + } +} + +static int iscsit_send_task_mgt_rsp( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; + struct iscsi_tm_rsp *hdr; + u32 tx_size = 0; + + hdr = (struct iscsi_tm_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_SCSI_TMFUNC_RSP; + hdr->response = iscsit_convert_tcm_tmr_rsp(se_tmr); + hdr->itt = cpu_to_be32(cmd->init_task_tag); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + cmd->iov_misc[0].iov_base = cmd->pdu; + cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN; + tx_size += ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for Task" + " Mgmt Response PDU 0x%08x\n", *header_digest); + } + + cmd->iov_misc_count = 1; + cmd->tx_size = tx_size; + + pr_debug("Built Task Management Response ITT: 0x%08x," + " StatSN: 0x%08x, Response: 0x%02x, CID: %hu\n", + cmd->init_task_tag, cmd->stat_sn, hdr->response, conn->cid); + + return 0; +} + +static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) +{ + char *payload = NULL; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_portal_group *tpg; + struct iscsi_tiqn *tiqn; + struct iscsi_tpg_np *tpg_np; + int buffer_len, end_of_buf = 0, len = 0, payload_len = 0; + unsigned char buf[256]; + + buffer_len = (conn->conn_ops->MaxRecvDataSegmentLength > 32768) ? + 32768 : conn->conn_ops->MaxRecvDataSegmentLength; + + memset(buf, 0, 256); + + payload = kzalloc(buffer_len, GFP_KERNEL); + if (!payload) { + pr_err("Unable to allocate memory for sendtargets" + " response.\n"); + return -ENOMEM; + } + + spin_lock(&tiqn_lock); + list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) { + len = sprintf(buf, "TargetName=%s", tiqn->tiqn); + len += 1; + + if ((len + payload_len) > buffer_len) { + spin_unlock(&tiqn->tiqn_tpg_lock); + end_of_buf = 1; + goto eob; + } + memcpy((void *)payload + payload_len, buf, len); + payload_len += len; + + spin_lock(&tiqn->tiqn_tpg_lock); + list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { + + spin_lock(&tpg->tpg_state_lock); + if ((tpg->tpg_state == TPG_STATE_FREE) || + (tpg->tpg_state == TPG_STATE_INACTIVE)) { + spin_unlock(&tpg->tpg_state_lock); + continue; + } + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tpg->tpg_np_lock); + list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, + tpg_np_list) { + len = sprintf(buf, "TargetAddress=" + "%s%s%s:%hu,%hu", + (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ? + "[" : "", tpg_np->tpg_np->np_ip, + (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ? + "]" : "", tpg_np->tpg_np->np_port, + tpg->tpgt); + len += 1; + + if ((len + payload_len) > buffer_len) { + spin_unlock(&tpg->tpg_np_lock); + spin_unlock(&tiqn->tiqn_tpg_lock); + end_of_buf = 1; + goto eob; + } + memcpy((void *)payload + payload_len, buf, len); + payload_len += len; + } + spin_unlock(&tpg->tpg_np_lock); + } + spin_unlock(&tiqn->tiqn_tpg_lock); +eob: + if (end_of_buf) + break; + } + spin_unlock(&tiqn_lock); + + cmd->buf_ptr = payload; + + return payload_len; +} + +/* + * FIXME: Add support for F_BIT and C_BIT when the length is longer than + * MaxRecvDataSegmentLength. + */ +static int iscsit_send_text_rsp( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_text_rsp *hdr; + struct kvec *iov; + u32 padding = 0, tx_size = 0; + int text_length, iov_count = 0; + + text_length = iscsit_build_sendtargets_response(cmd); + if (text_length < 0) + return text_length; + + padding = ((-text_length) & 3); + if (padding != 0) { + memset(cmd->buf_ptr + text_length, 0, padding); + pr_debug("Attaching %u additional bytes for" + " padding.\n", padding); + } + + hdr = (struct iscsi_text_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_TEXT_RSP; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hton24(hdr->dlength, text_length); + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + + iov[iov_count].iov_base = cmd->pdu; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + iov[iov_count].iov_base = cmd->buf_ptr; + iov[iov_count++].iov_len = text_length + padding; + + tx_size += (ISCSI_HDR_LEN + text_length + padding); + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for" + " Text Response PDU 0x%08x\n", *header_digest); + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + cmd->buf_ptr, (text_length + padding), + 0, NULL, (u8 *)&cmd->data_crc); + + iov[iov_count].iov_base = &cmd->data_crc; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attaching DataDigest for %u bytes of text" + " data, CRC 0x%08x\n", (text_length + padding), + cmd->data_crc); + } + + cmd->iov_misc_count = iov_count; + cmd->tx_size = tx_size; + + pr_debug("Built Text Response: ITT: 0x%08x, StatSN: 0x%08x," + " Length: %u, CID: %hu\n", cmd->init_task_tag, cmd->stat_sn, + text_length, conn->cid); + return 0; +} + +static int iscsit_send_reject( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + u32 iov_count = 0, tx_size = 0; + struct iscsi_reject *hdr; + struct kvec *iov; + + hdr = (struct iscsi_reject *) cmd->pdu; + hdr->opcode = ISCSI_OP_REJECT; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hton24(hdr->dlength, ISCSI_HDR_LEN); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + + iov[iov_count].iov_base = cmd->pdu; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + iov[iov_count].iov_base = cmd->buf_ptr; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + + tx_size = (ISCSI_HDR_LEN + ISCSI_HDR_LEN); + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for" + " REJECT PDU 0x%08x\n", *header_digest); + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)cmd->buf_ptr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)&cmd->data_crc); + + iov[iov_count].iov_base = &cmd->data_crc; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 DataDigest for REJECT" + " PDU 0x%08x\n", cmd->data_crc); + } + + cmd->iov_misc_count = iov_count; + cmd->tx_size = tx_size; + + pr_debug("Built Reject PDU StatSN: 0x%08x, Reason: 0x%02x," + " CID: %hu\n", ntohl(hdr->statsn), hdr->reason, conn->cid); + + return 0; +} + +static void iscsit_tx_thread_wait_for_tcp(struct iscsi_conn *conn) +{ + if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) || + (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) { + wait_for_completion_interruptible_timeout( + &conn->tx_half_close_comp, + ISCSI_TX_THREAD_TCP_TIMEOUT * HZ); + } +} + +#ifdef CONFIG_SMP + +void iscsit_thread_get_cpumask(struct iscsi_conn *conn) +{ + struct iscsi_thread_set *ts = conn->thread_set; + int ord, cpu; + /* + * thread_id is assigned from iscsit_global->ts_bitmap from + * within iscsi_thread_set.c:iscsi_allocate_thread_sets() + * + * Here we use thread_id to determine which CPU that this + * iSCSI connection's iscsi_thread_set will be scheduled to + * execute upon. + */ + ord = ts->thread_id % cpumask_weight(cpu_online_mask); +#if 0 + pr_debug(">>>>>>>>>>>>>>>>>>>> Generated ord: %d from" + " thread_id: %d\n", ord, ts->thread_id); +#endif + for_each_online_cpu(cpu) { + if (ord-- == 0) { + cpumask_set_cpu(cpu, conn->conn_cpumask); + return; + } + } + /* + * This should never be reached.. + */ + dump_stack(); + cpumask_setall(conn->conn_cpumask); +} + +static inline void iscsit_thread_check_cpumask( + struct iscsi_conn *conn, + struct task_struct *p, + int mode) +{ + char buf[128]; + /* + * mode == 1 signals iscsi_target_tx_thread() usage. + * mode == 0 signals iscsi_target_rx_thread() usage. + */ + if (mode == 1) { + if (!conn->conn_tx_reset_cpumask) + return; + conn->conn_tx_reset_cpumask = 0; + } else { + if (!conn->conn_rx_reset_cpumask) + return; + conn->conn_rx_reset_cpumask = 0; + } + /* + * Update the CPU mask for this single kthread so that + * both TX and RX kthreads are scheduled to run on the + * same CPU. + */ + memset(buf, 0, 128); + cpumask_scnprintf(buf, 128, conn->conn_cpumask); +#if 0 + pr_debug(">>>>>>>>>>>>>> Calling set_cpus_allowed_ptr():" + " %s for %s\n", buf, p->comm); +#endif + set_cpus_allowed_ptr(p, conn->conn_cpumask); +} + +#else +#define iscsit_thread_get_cpumask(X) ({}) +#define iscsit_thread_check_cpumask(X, Y, Z) ({}) +#endif /* CONFIG_SMP */ + +int iscsi_target_tx_thread(void *arg) +{ + u8 state; + int eodr = 0; + int ret = 0; + int sent_status = 0; + int use_misc = 0; + int map_sg = 0; + struct iscsi_cmd *cmd = NULL; + struct iscsi_conn *conn; + struct iscsi_queue_req *qr = NULL; + struct se_cmd *se_cmd; + struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg; + /* + * Allow ourselves to be interrupted by SIGINT so that a + * connection recovery / failure event can be triggered externally. + */ + allow_signal(SIGINT); + +restart: + conn = iscsi_tx_thread_pre_handler(ts); + if (!conn) + goto out; + + eodr = map_sg = ret = sent_status = use_misc = 0; + + while (!kthread_should_stop()) { + /* + * Ensure that both TX and RX per connection kthreads + * are scheduled to run on the same CPU. + */ + iscsit_thread_check_cpumask(conn, current, 1); + + schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT); + + if ((ts->status == ISCSI_THREAD_SET_RESET) || + signal_pending(current)) + goto transport_err; + +get_immediate: + qr = iscsit_get_cmd_from_immediate_queue(conn); + if (qr) { + atomic_set(&conn->check_immediate_queue, 0); + cmd = qr->cmd; + state = qr->state; + kmem_cache_free(lio_qr_cache, qr); + + spin_lock_bh(&cmd->istate_lock); + switch (state) { + case ISTATE_SEND_R2T: + spin_unlock_bh(&cmd->istate_lock); + ret = iscsit_send_r2t(cmd, conn); + break; + case ISTATE_REMOVE: + spin_unlock_bh(&cmd->istate_lock); + + if (cmd->data_direction == DMA_TO_DEVICE) + iscsit_stop_dataout_timer(cmd); + + spin_lock_bh(&conn->cmd_lock); + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + /* + * Determine if a struct se_cmd is assoicated with + * this struct iscsi_cmd. + */ + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) && + !(cmd->tmr_req)) + iscsit_release_cmd(cmd); + else + transport_generic_free_cmd(&cmd->se_cmd, + 1, 0); + goto get_immediate; + case ISTATE_SEND_NOPIN_WANT_RESPONSE: + spin_unlock_bh(&cmd->istate_lock); + iscsit_mod_nopin_response_timer(conn); + ret = iscsit_send_unsolicited_nopin(cmd, + conn, 1); + break; + case ISTATE_SEND_NOPIN_NO_RESPONSE: + spin_unlock_bh(&cmd->istate_lock); + ret = iscsit_send_unsolicited_nopin(cmd, + conn, 0); + break; + default: + pr_err("Unknown Opcode: 0x%02x ITT:" + " 0x%08x, i_state: %d on CID: %hu\n", + cmd->iscsi_opcode, cmd->init_task_tag, state, + conn->cid); + spin_unlock_bh(&cmd->istate_lock); + goto transport_err; + } + if (ret < 0) { + conn->tx_immediate_queue = 0; + goto transport_err; + } + + if (iscsit_send_tx_data(cmd, conn, 1) < 0) { + conn->tx_immediate_queue = 0; + iscsit_tx_thread_wait_for_tcp(conn); + goto transport_err; + } + + spin_lock_bh(&cmd->istate_lock); + switch (state) { + case ISTATE_SEND_R2T: + spin_unlock_bh(&cmd->istate_lock); + spin_lock_bh(&cmd->dataout_timeout_lock); + iscsit_start_dataout_timer(cmd, conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + break; + case ISTATE_SEND_NOPIN_WANT_RESPONSE: + cmd->i_state = ISTATE_SENT_NOPIN_WANT_RESPONSE; + spin_unlock_bh(&cmd->istate_lock); + break; + case ISTATE_SEND_NOPIN_NO_RESPONSE: + cmd->i_state = ISTATE_SENT_STATUS; + spin_unlock_bh(&cmd->istate_lock); + break; + default: + pr_err("Unknown Opcode: 0x%02x ITT:" + " 0x%08x, i_state: %d on CID: %hu\n", + cmd->iscsi_opcode, cmd->init_task_tag, + state, conn->cid); + spin_unlock_bh(&cmd->istate_lock); + goto transport_err; + } + goto get_immediate; + } else + conn->tx_immediate_queue = 0; + +get_response: + qr = iscsit_get_cmd_from_response_queue(conn); + if (qr) { + cmd = qr->cmd; + state = qr->state; + kmem_cache_free(lio_qr_cache, qr); + + spin_lock_bh(&cmd->istate_lock); +check_rsp_state: + switch (state) { + case ISTATE_SEND_DATAIN: + spin_unlock_bh(&cmd->istate_lock); + ret = iscsit_send_data_in(cmd, conn, + &eodr); + map_sg = 1; + break; + case ISTATE_SEND_STATUS: + case ISTATE_SEND_STATUS_RECOVERY: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_status(cmd, conn); + break; + case ISTATE_SEND_LOGOUTRSP: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_logout_response(cmd, conn); + break; + case ISTATE_SEND_ASYNCMSG: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_conn_drop_async_message( + cmd, conn); + break; + case ISTATE_SEND_NOPIN: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_nopin_response(cmd, conn); + break; + case ISTATE_SEND_REJECT: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_reject(cmd, conn); + break; + case ISTATE_SEND_TASKMGTRSP: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_task_mgt_rsp(cmd, conn); + if (ret != 0) + break; + ret = iscsit_tmr_post_handler(cmd, conn); + if (ret != 0) + iscsit_fall_back_to_erl0(conn->sess); + break; + case ISTATE_SEND_TEXTRSP: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_text_rsp(cmd, conn); + break; + default: + pr_err("Unknown Opcode: 0x%02x ITT:" + " 0x%08x, i_state: %d on CID: %hu\n", + cmd->iscsi_opcode, cmd->init_task_tag, + state, conn->cid); + spin_unlock_bh(&cmd->istate_lock); + goto transport_err; + } + if (ret < 0) { + conn->tx_response_queue = 0; + goto transport_err; + } + + se_cmd = &cmd->se_cmd; + + if (map_sg && !conn->conn_ops->IFMarker) { + if (iscsit_fe_sendpage_sg(cmd, conn) < 0) { + conn->tx_response_queue = 0; + iscsit_tx_thread_wait_for_tcp(conn); + iscsit_unmap_iovec(cmd); + goto transport_err; + } + } else { + if (iscsit_send_tx_data(cmd, conn, use_misc) < 0) { + conn->tx_response_queue = 0; + iscsit_tx_thread_wait_for_tcp(conn); + iscsit_unmap_iovec(cmd); + goto transport_err; + } + } + map_sg = 0; + iscsit_unmap_iovec(cmd); + + spin_lock_bh(&cmd->istate_lock); + switch (state) { + case ISTATE_SEND_DATAIN: + if (!eodr) + goto check_rsp_state; + + if (eodr == 1) { + cmd->i_state = ISTATE_SENT_LAST_DATAIN; + sent_status = 1; + eodr = use_misc = 0; + } else if (eodr == 2) { + cmd->i_state = state = + ISTATE_SEND_STATUS; + sent_status = 0; + eodr = use_misc = 0; + goto check_rsp_state; + } + break; + case ISTATE_SEND_STATUS: + use_misc = 0; + sent_status = 1; + break; + case ISTATE_SEND_ASYNCMSG: + case ISTATE_SEND_NOPIN: + case ISTATE_SEND_STATUS_RECOVERY: + case ISTATE_SEND_TEXTRSP: + use_misc = 0; + sent_status = 1; + break; + case ISTATE_SEND_REJECT: + use_misc = 0; + if (cmd->cmd_flags & ICF_REJECT_FAIL_CONN) { + cmd->cmd_flags &= ~ICF_REJECT_FAIL_CONN; + spin_unlock_bh(&cmd->istate_lock); + complete(&cmd->reject_comp); + goto transport_err; + } + complete(&cmd->reject_comp); + break; + case ISTATE_SEND_TASKMGTRSP: + use_misc = 0; + sent_status = 1; + break; + case ISTATE_SEND_LOGOUTRSP: + spin_unlock_bh(&cmd->istate_lock); + if (!iscsit_logout_post_handler(cmd, conn)) + goto restart; + spin_lock_bh(&cmd->istate_lock); + use_misc = 0; + sent_status = 1; + break; + default: + pr_err("Unknown Opcode: 0x%02x ITT:" + " 0x%08x, i_state: %d on CID: %hu\n", + cmd->iscsi_opcode, cmd->init_task_tag, + cmd->i_state, conn->cid); + spin_unlock_bh(&cmd->istate_lock); + goto transport_err; + } + + if (sent_status) { + cmd->i_state = ISTATE_SENT_STATUS; + sent_status = 0; + } + spin_unlock_bh(&cmd->istate_lock); + + if (atomic_read(&conn->check_immediate_queue)) + goto get_immediate; + + goto get_response; + } else + conn->tx_response_queue = 0; + } + +transport_err: + iscsit_take_action_for_connection_exit(conn); + goto restart; +out: + return 0; +} + +int iscsi_target_rx_thread(void *arg) +{ + int ret; + u8 buffer[ISCSI_HDR_LEN], opcode; + u32 checksum = 0, digest = 0; + struct iscsi_conn *conn = NULL; + struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg; + struct kvec iov; + /* + * Allow ourselves to be interrupted by SIGINT so that a + * connection recovery / failure event can be triggered externally. + */ + allow_signal(SIGINT); + +restart: + conn = iscsi_rx_thread_pre_handler(ts); + if (!conn) + goto out; + + while (!kthread_should_stop()) { + /* + * Ensure that both TX and RX per connection kthreads + * are scheduled to run on the same CPU. + */ + iscsit_thread_check_cpumask(conn, current, 0); + + memset(buffer, 0, ISCSI_HDR_LEN); + memset(&iov, 0, sizeof(struct kvec)); + + iov.iov_base = buffer; + iov.iov_len = ISCSI_HDR_LEN; + + ret = rx_data(conn, &iov, 1, ISCSI_HDR_LEN); + if (ret != ISCSI_HDR_LEN) { + iscsit_rx_thread_wait_for_tcp(conn); + goto transport_err; + } + + /* + * Set conn->bad_hdr for use with REJECT PDUs. + */ + memcpy(&conn->bad_hdr, &buffer, ISCSI_HDR_LEN); + + if (conn->conn_ops->HeaderDigest) { + iov.iov_base = &digest; + iov.iov_len = ISCSI_CRC_LEN; + + ret = rx_data(conn, &iov, 1, ISCSI_CRC_LEN); + if (ret != ISCSI_CRC_LEN) { + iscsit_rx_thread_wait_for_tcp(conn); + goto transport_err; + } + + iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, + buffer, ISCSI_HDR_LEN, + 0, NULL, (u8 *)&checksum); + + if (digest != checksum) { + pr_err("HeaderDigest CRC32C failed," + " received 0x%08x, computed 0x%08x\n", + digest, checksum); + /* + * Set the PDU to 0xff so it will intentionally + * hit default in the switch below. + */ + memset(buffer, 0xff, ISCSI_HDR_LEN); + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->conn_digest_errors++; + spin_unlock_bh(&conn->sess->session_stats_lock); + } else { + pr_debug("Got HeaderDigest CRC32C" + " 0x%08x\n", checksum); + } + } + + if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) + goto transport_err; + + opcode = buffer[0] & ISCSI_OPCODE_MASK; + + if (conn->sess->sess_ops->SessionType && + ((!(opcode & ISCSI_OP_TEXT)) || + (!(opcode & ISCSI_OP_LOGOUT)))) { + pr_err("Received illegal iSCSI Opcode: 0x%02x" + " while in Discovery Session, rejecting.\n", opcode); + iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buffer, conn); + goto transport_err; + } + + switch (opcode) { + case ISCSI_OP_SCSI_CMD: + if (iscsit_handle_scsi_cmd(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_SCSI_DATA_OUT: + if (iscsit_handle_data_out(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_NOOP_OUT: + if (iscsit_handle_nop_out(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_SCSI_TMFUNC: + if (iscsit_handle_task_mgt_cmd(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_TEXT: + if (iscsit_handle_text_cmd(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_LOGOUT: + ret = iscsit_handle_logout_cmd(conn, buffer); + if (ret > 0) { + wait_for_completion_timeout(&conn->conn_logout_comp, + SECONDS_FOR_LOGOUT_COMP * HZ); + goto transport_err; + } else if (ret < 0) + goto transport_err; + break; + case ISCSI_OP_SNACK: + if (iscsit_handle_snack(conn, buffer) < 0) + goto transport_err; + break; + default: + pr_err("Got unknown iSCSI OpCode: 0x%02x\n", + opcode); + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Cannot recover from unknown" + " opcode while ERL=0, closing iSCSI connection" + ".\n"); + goto transport_err; + } + if (!conn->conn_ops->OFMarker) { + pr_err("Unable to recover from unknown" + " opcode while OFMarker=No, closing iSCSI" + " connection.\n"); + goto transport_err; + } + if (iscsit_recover_from_unknown_opcode(conn) < 0) { + pr_err("Unable to recover from unknown" + " opcode, closing iSCSI connection.\n"); + goto transport_err; + } + break; + } + } + +transport_err: + if (!signal_pending(current)) + atomic_set(&conn->transport_failed, 1); + iscsit_take_action_for_connection_exit(conn); + goto restart; +out: + return 0; +} + +static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL; + struct iscsi_session *sess = conn->sess; + struct se_cmd *se_cmd; + /* + * We expect this function to only ever be called from either RX or TX + * thread context via iscsit_close_connection() once the other context + * has been reset -> returned sleeping pre-handler state. + */ + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) { + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD)) { + + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + iscsit_increment_maxcmdsn(cmd, sess); + se_cmd = &cmd->se_cmd; + /* + * Special cases for active iSCSI TMR, and + * transport_lookup_cmd_lun() failing from + * iscsit_get_lun_for_cmd() in iscsit_handle_scsi_cmd(). + */ + if (cmd->tmr_req && se_cmd->transport_wait_for_tasks) + se_cmd->transport_wait_for_tasks(se_cmd, 1, 1); + else if (cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) + transport_release_cmd(se_cmd); + else + iscsit_release_cmd(cmd); + + spin_lock_bh(&conn->cmd_lock); + continue; + } + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_increment_maxcmdsn(cmd, sess); + se_cmd = &cmd->se_cmd; + + if (se_cmd->transport_wait_for_tasks) + se_cmd->transport_wait_for_tasks(se_cmd, 1, 1); + + spin_lock_bh(&conn->cmd_lock); + } + spin_unlock_bh(&conn->cmd_lock); +} + +static void iscsit_stop_timers_for_cmds( + struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->data_direction == DMA_TO_DEVICE) + iscsit_stop_dataout_timer(cmd); + } + spin_unlock_bh(&conn->cmd_lock); +} + +int iscsit_close_connection( + struct iscsi_conn *conn) +{ + int conn_logout = (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT); + struct iscsi_session *sess = conn->sess; + + pr_debug("Closing iSCSI connection CID %hu on SID:" + " %u\n", conn->cid, sess->sid); + /* + * Always up conn_logout_comp just in case the RX Thread is sleeping + * and the logout response never got sent because the connection + * failed. + */ + complete(&conn->conn_logout_comp); + + iscsi_release_thread_set(conn); + + iscsit_stop_timers_for_cmds(conn); + iscsit_stop_nopin_response_timer(conn); + iscsit_stop_nopin_timer(conn); + iscsit_free_queue_reqs_for_conn(conn); + + /* + * During Connection recovery drop unacknowledged out of order + * commands for this connection, and prepare the other commands + * for realligence. + * + * During normal operation clear the out of order commands (but + * do not free the struct iscsi_ooo_cmdsn's) and release all + * struct iscsi_cmds. + */ + if (atomic_read(&conn->connection_recovery)) { + iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(conn); + iscsit_prepare_cmds_for_realligance(conn); + } else { + iscsit_clear_ooo_cmdsns_for_conn(conn); + iscsit_release_commands_from_conn(conn); + } + + /* + * Handle decrementing session or connection usage count if + * a logout response was not able to be sent because the + * connection failed. Fall back to Session Recovery here. + */ + if (atomic_read(&conn->conn_logout_remove)) { + if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_SESSION) { + iscsit_dec_conn_usage_count(conn); + iscsit_dec_session_usage_count(sess); + } + if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) + iscsit_dec_conn_usage_count(conn); + + atomic_set(&conn->conn_logout_remove, 0); + atomic_set(&sess->session_reinstatement, 0); + atomic_set(&sess->session_fall_back_to_erl0, 1); + } + + spin_lock_bh(&sess->conn_lock); + list_del(&conn->conn_list); + + /* + * Attempt to let the Initiator know this connection failed by + * sending an Connection Dropped Async Message on another + * active connection. + */ + if (atomic_read(&conn->connection_recovery)) + iscsit_build_conn_drop_async_message(conn); + + spin_unlock_bh(&sess->conn_lock); + + /* + * If connection reinstatement is being performed on this connection, + * up the connection reinstatement semaphore that is being blocked on + * in iscsit_cause_connection_reinstatement(). + */ + spin_lock_bh(&conn->state_lock); + if (atomic_read(&conn->sleep_on_conn_wait_comp)) { + spin_unlock_bh(&conn->state_lock); + complete(&conn->conn_wait_comp); + wait_for_completion(&conn->conn_post_wait_comp); + spin_lock_bh(&conn->state_lock); + } + + /* + * If connection reinstatement is being performed on this connection + * by receiving a REMOVECONNFORRECOVERY logout request, up the + * connection wait rcfr semaphore that is being blocked on + * an iscsit_connection_reinstatement_rcfr(). + */ + if (atomic_read(&conn->connection_wait_rcfr)) { + spin_unlock_bh(&conn->state_lock); + complete(&conn->conn_wait_rcfr_comp); + wait_for_completion(&conn->conn_post_wait_comp); + spin_lock_bh(&conn->state_lock); + } + atomic_set(&conn->connection_reinstatement, 1); + spin_unlock_bh(&conn->state_lock); + + /* + * If any other processes are accessing this connection pointer we + * must wait until they have completed. + */ + iscsit_check_conn_usage_count(conn); + + if (conn->conn_rx_hash.tfm) + crypto_free_hash(conn->conn_rx_hash.tfm); + if (conn->conn_tx_hash.tfm) + crypto_free_hash(conn->conn_tx_hash.tfm); + + if (conn->conn_cpumask) + free_cpumask_var(conn->conn_cpumask); + + kfree(conn->conn_ops); + conn->conn_ops = NULL; + + if (conn->sock) { + if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) { + kfree(conn->sock->file); + conn->sock->file = NULL; + } + sock_release(conn->sock); + } + conn->thread_set = NULL; + + pr_debug("Moving to TARG_CONN_STATE_FREE.\n"); + conn->conn_state = TARG_CONN_STATE_FREE; + kfree(conn); + + spin_lock_bh(&sess->conn_lock); + atomic_dec(&sess->nconn); + pr_debug("Decremented iSCSI connection count to %hu from node:" + " %s\n", atomic_read(&sess->nconn), + sess->sess_ops->InitiatorName); + /* + * Make sure that if one connection fails in an non ERL=2 iSCSI + * Session that they all fail. + */ + if ((sess->sess_ops->ErrorRecoveryLevel != 2) && !conn_logout && + !atomic_read(&sess->session_logout)) + atomic_set(&sess->session_fall_back_to_erl0, 1); + + /* + * If this was not the last connection in the session, and we are + * performing session reinstatement or falling back to ERL=0, call + * iscsit_stop_session() without sleeping to shutdown the other + * active connections. + */ + if (atomic_read(&sess->nconn)) { + if (!atomic_read(&sess->session_reinstatement) && + !atomic_read(&sess->session_fall_back_to_erl0)) { + spin_unlock_bh(&sess->conn_lock); + return 0; + } + if (!atomic_read(&sess->session_stop_active)) { + atomic_set(&sess->session_stop_active, 1); + spin_unlock_bh(&sess->conn_lock); + iscsit_stop_session(sess, 0, 0); + return 0; + } + spin_unlock_bh(&sess->conn_lock); + return 0; + } + + /* + * If this was the last connection in the session and one of the + * following is occurring: + * + * Session Reinstatement is not being performed, and are falling back + * to ERL=0 call iscsit_close_session(). + * + * Session Logout was requested. iscsit_close_session() will be called + * elsewhere. + * + * Session Continuation is not being performed, start the Time2Retain + * handler and check if sleep_on_sess_wait_sem is active. + */ + if (!atomic_read(&sess->session_reinstatement) && + atomic_read(&sess->session_fall_back_to_erl0)) { + spin_unlock_bh(&sess->conn_lock); + iscsit_close_session(sess); + + return 0; + } else if (atomic_read(&sess->session_logout)) { + pr_debug("Moving to TARG_SESS_STATE_FREE.\n"); + sess->session_state = TARG_SESS_STATE_FREE; + spin_unlock_bh(&sess->conn_lock); + + if (atomic_read(&sess->sleep_on_sess_wait_comp)) + complete(&sess->session_wait_comp); + + return 0; + } else { + pr_debug("Moving to TARG_SESS_STATE_FAILED.\n"); + sess->session_state = TARG_SESS_STATE_FAILED; + + if (!atomic_read(&sess->session_continuation)) { + spin_unlock_bh(&sess->conn_lock); + iscsit_start_time2retain_handler(sess); + } else + spin_unlock_bh(&sess->conn_lock); + + if (atomic_read(&sess->sleep_on_sess_wait_comp)) + complete(&sess->session_wait_comp); + + return 0; + } + spin_unlock_bh(&sess->conn_lock); + + return 0; +} + +int iscsit_close_session(struct iscsi_session *sess) +{ + struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + + if (atomic_read(&sess->nconn)) { + pr_err("%d connection(s) still exist for iSCSI session" + " to %s\n", atomic_read(&sess->nconn), + sess->sess_ops->InitiatorName); + BUG(); + } + + spin_lock_bh(&se_tpg->session_lock); + atomic_set(&sess->session_logout, 1); + atomic_set(&sess->session_reinstatement, 1); + iscsit_stop_time2retain_timer(sess); + spin_unlock_bh(&se_tpg->session_lock); + + /* + * transport_deregister_session_configfs() will clear the + * struct se_node_acl->nacl_sess pointer now as a iscsi_np process context + * can be setting it again with __transport_register_session() in + * iscsi_post_login_handler() again after the iscsit_stop_session() + * completes in iscsi_np context. + */ + transport_deregister_session_configfs(sess->se_sess); + + /* + * If any other processes are accessing this session pointer we must + * wait until they have completed. If we are in an interrupt (the + * time2retain handler) and contain and active session usage count we + * restart the timer and exit. + */ + if (!in_interrupt()) { + if (iscsit_check_session_usage_count(sess) == 1) + iscsit_stop_session(sess, 1, 1); + } else { + if (iscsit_check_session_usage_count(sess) == 2) { + atomic_set(&sess->session_logout, 0); + iscsit_start_time2retain_handler(sess); + return 0; + } + } + + transport_deregister_session(sess->se_sess); + + if (sess->sess_ops->ErrorRecoveryLevel == 2) + iscsit_free_connection_recovery_entires(sess); + + iscsit_free_all_ooo_cmdsns(sess); + + spin_lock_bh(&se_tpg->session_lock); + pr_debug("Moving to TARG_SESS_STATE_FREE.\n"); + sess->session_state = TARG_SESS_STATE_FREE; + pr_debug("Released iSCSI session from node: %s\n", + sess->sess_ops->InitiatorName); + tpg->nsessions--; + if (tpg->tpg_tiqn) + tpg->tpg_tiqn->tiqn_nsessions--; + + pr_debug("Decremented number of active iSCSI Sessions on" + " iSCSI TPG: %hu to %u\n", tpg->tpgt, tpg->nsessions); + + spin_lock(&sess_idr_lock); + idr_remove(&sess_idr, sess->session_index); + spin_unlock(&sess_idr_lock); + + kfree(sess->sess_ops); + sess->sess_ops = NULL; + spin_unlock_bh(&se_tpg->session_lock); + + kfree(sess); + return 0; +} + +static void iscsit_logout_post_handler_closesession( + struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + + iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD); + iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD); + + atomic_set(&conn->conn_logout_remove, 0); + complete(&conn->conn_logout_comp); + + iscsit_dec_conn_usage_count(conn); + iscsit_stop_session(sess, 1, 1); + iscsit_dec_session_usage_count(sess); + iscsit_close_session(sess); +} + +static void iscsit_logout_post_handler_samecid( + struct iscsi_conn *conn) +{ + iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD); + iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD); + + atomic_set(&conn->conn_logout_remove, 0); + complete(&conn->conn_logout_comp); + + iscsit_cause_connection_reinstatement(conn, 1); + iscsit_dec_conn_usage_count(conn); +} + +static void iscsit_logout_post_handler_diffcid( + struct iscsi_conn *conn, + u16 cid) +{ + struct iscsi_conn *l_conn; + struct iscsi_session *sess = conn->sess; + + if (!sess) + return; + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(l_conn, &sess->sess_conn_list, conn_list) { + if (l_conn->cid == cid) { + iscsit_inc_conn_usage_count(l_conn); + break; + } + } + spin_unlock_bh(&sess->conn_lock); + + if (!l_conn) + return; + + if (l_conn->sock) + l_conn->sock->ops->shutdown(l_conn->sock, RCV_SHUTDOWN); + + spin_lock_bh(&l_conn->state_lock); + pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n"); + l_conn->conn_state = TARG_CONN_STATE_IN_LOGOUT; + spin_unlock_bh(&l_conn->state_lock); + + iscsit_cause_connection_reinstatement(l_conn, 1); + iscsit_dec_conn_usage_count(l_conn); +} + +/* + * Return of 0 causes the TX thread to restart. + */ +static int iscsit_logout_post_handler( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int ret = 0; + + switch (cmd->logout_reason) { + case ISCSI_LOGOUT_REASON_CLOSE_SESSION: + switch (cmd->logout_response) { + case ISCSI_LOGOUT_SUCCESS: + case ISCSI_LOGOUT_CLEANUP_FAILED: + default: + iscsit_logout_post_handler_closesession(conn); + break; + } + ret = 0; + break; + case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION: + if (conn->cid == cmd->logout_cid) { + switch (cmd->logout_response) { + case ISCSI_LOGOUT_SUCCESS: + case ISCSI_LOGOUT_CLEANUP_FAILED: + default: + iscsit_logout_post_handler_samecid(conn); + break; + } + ret = 0; + } else { + switch (cmd->logout_response) { + case ISCSI_LOGOUT_SUCCESS: + iscsit_logout_post_handler_diffcid(conn, + cmd->logout_cid); + break; + case ISCSI_LOGOUT_CID_NOT_FOUND: + case ISCSI_LOGOUT_CLEANUP_FAILED: + default: + break; + } + ret = 1; + } + break; + case ISCSI_LOGOUT_REASON_RECOVERY: + switch (cmd->logout_response) { + case ISCSI_LOGOUT_SUCCESS: + case ISCSI_LOGOUT_CID_NOT_FOUND: + case ISCSI_LOGOUT_RECOVERY_UNSUPPORTED: + case ISCSI_LOGOUT_CLEANUP_FAILED: + default: + break; + } + ret = 1; + break; + default: + break; + + } + return ret; +} + +void iscsit_fail_session(struct iscsi_session *sess) +{ + struct iscsi_conn *conn; + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) { + pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n"); + conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT; + } + spin_unlock_bh(&sess->conn_lock); + + pr_debug("Moving to TARG_SESS_STATE_FAILED.\n"); + sess->session_state = TARG_SESS_STATE_FAILED; +} + +int iscsit_free_session(struct iscsi_session *sess) +{ + u16 conn_count = atomic_read(&sess->nconn); + struct iscsi_conn *conn, *conn_tmp = NULL; + int is_last; + + spin_lock_bh(&sess->conn_lock); + atomic_set(&sess->sleep_on_sess_wait_comp, 1); + + list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, + conn_list) { + if (conn_count == 0) + break; + + if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) { + is_last = 1; + } else { + iscsit_inc_conn_usage_count(conn_tmp); + is_last = 0; + } + iscsit_inc_conn_usage_count(conn); + + spin_unlock_bh(&sess->conn_lock); + iscsit_cause_connection_reinstatement(conn, 1); + spin_lock_bh(&sess->conn_lock); + + iscsit_dec_conn_usage_count(conn); + if (is_last == 0) + iscsit_dec_conn_usage_count(conn_tmp); + + conn_count--; + } + + if (atomic_read(&sess->nconn)) { + spin_unlock_bh(&sess->conn_lock); + wait_for_completion(&sess->session_wait_comp); + } else + spin_unlock_bh(&sess->conn_lock); + + iscsit_close_session(sess); + return 0; +} + +void iscsit_stop_session( + struct iscsi_session *sess, + int session_sleep, + int connection_sleep) +{ + u16 conn_count = atomic_read(&sess->nconn); + struct iscsi_conn *conn, *conn_tmp = NULL; + int is_last; + + spin_lock_bh(&sess->conn_lock); + if (session_sleep) + atomic_set(&sess->sleep_on_sess_wait_comp, 1); + + if (connection_sleep) { + list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, + conn_list) { + if (conn_count == 0) + break; + + if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) { + is_last = 1; + } else { + iscsit_inc_conn_usage_count(conn_tmp); + is_last = 0; + } + iscsit_inc_conn_usage_count(conn); + + spin_unlock_bh(&sess->conn_lock); + iscsit_cause_connection_reinstatement(conn, 1); + spin_lock_bh(&sess->conn_lock); + + iscsit_dec_conn_usage_count(conn); + if (is_last == 0) + iscsit_dec_conn_usage_count(conn_tmp); + conn_count--; + } + } else { + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) + iscsit_cause_connection_reinstatement(conn, 0); + } + + if (session_sleep && atomic_read(&sess->nconn)) { + spin_unlock_bh(&sess->conn_lock); + wait_for_completion(&sess->session_wait_comp); + } else + spin_unlock_bh(&sess->conn_lock); +} + +int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) +{ + struct iscsi_session *sess; + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + struct se_session *se_sess, *se_sess_tmp; + int session_count = 0; + + spin_lock_bh(&se_tpg->session_lock); + if (tpg->nsessions && !force) { + spin_unlock_bh(&se_tpg->session_lock); + return -1; + } + + list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list, + sess_list) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + + spin_lock(&sess->conn_lock); + if (atomic_read(&sess->session_fall_back_to_erl0) || + atomic_read(&sess->session_logout) || + (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { + spin_unlock(&sess->conn_lock); + continue; + } + atomic_set(&sess->session_reinstatement, 1); + spin_unlock(&sess->conn_lock); + spin_unlock_bh(&se_tpg->session_lock); + + iscsit_free_session(sess); + spin_lock_bh(&se_tpg->session_lock); + + session_count++; + } + spin_unlock_bh(&se_tpg->session_lock); + + pr_debug("Released %d iSCSI Session(s) from Target Portal" + " Group: %hu\n", session_count, tpg->tpgt); + return 0; +} + +MODULE_DESCRIPTION("iSCSI-Target Driver for mainline target infrastructure"); +MODULE_VERSION("4.1.x"); +MODULE_AUTHOR("nab@Linux-iSCSI.org"); +MODULE_LICENSE("GPL"); + +module_init(iscsi_target_init_module); +module_exit(iscsi_target_cleanup_module); diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h new file mode 100644 index 000000000000..5db2ddeed5eb --- /dev/null +++ b/drivers/target/iscsi/iscsi_target.h @@ -0,0 +1,42 @@ +#ifndef ISCSI_TARGET_H +#define ISCSI_TARGET_H + +extern struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *); +extern struct iscsi_tiqn *iscsit_get_tiqn(unsigned char *, int); +extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *); +extern struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *); +extern void iscsit_del_tiqn(struct iscsi_tiqn *); +extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *); +extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *); +extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, + char *, int); +extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, + struct iscsi_portal_group *); +extern int iscsit_del_np(struct iscsi_np *); +extern int iscsit_add_reject_from_cmd(u8, int, int, unsigned char *, struct iscsi_cmd *); +extern int iscsit_logout_closesession(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_logout_closeconnection(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_send_async_msg(struct iscsi_conn *, u16, u8, u8); +extern int iscsit_send_r2t(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_build_r2ts_for_cmd(struct iscsi_cmd *, struct iscsi_conn *, int); +extern void iscsit_thread_get_cpumask(struct iscsi_conn *); +extern int iscsi_target_tx_thread(void *); +extern int iscsi_target_rx_thread(void *); +extern int iscsit_close_connection(struct iscsi_conn *); +extern int iscsit_close_session(struct iscsi_session *); +extern void iscsit_fail_session(struct iscsi_session *); +extern int iscsit_free_session(struct iscsi_session *); +extern void iscsit_stop_session(struct iscsi_session *, int, int); +extern int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *, int); + +extern struct iscsit_global *iscsit_global; +extern struct target_fabric_configfs *lio_target_fabric_configfs; + +extern struct kmem_cache *lio_dr_cache; +extern struct kmem_cache *lio_ooo_cache; +extern struct kmem_cache *lio_cmd_cache; +extern struct kmem_cache *lio_qr_cache; +extern struct kmem_cache *lio_r2t_cache; + +#endif /*** ISCSI_TARGET_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c new file mode 100644 index 000000000000..11fd74307811 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -0,0 +1,490 @@ +/******************************************************************************* + * This file houses the main functions for the iSCSI CHAP support + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/string.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/scatterlist.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_nego.h" +#include "iscsi_target_auth.h" + +static unsigned char chap_asciihex_to_binaryhex(unsigned char val[2]) +{ + unsigned char result = 0; + /* + * MSB + */ + if ((val[0] >= 'a') && (val[0] <= 'f')) + result = ((val[0] - 'a' + 10) & 0xf) << 4; + else + if ((val[0] >= 'A') && (val[0] <= 'F')) + result = ((val[0] - 'A' + 10) & 0xf) << 4; + else /* digit */ + result = ((val[0] - '0') & 0xf) << 4; + /* + * LSB + */ + if ((val[1] >= 'a') && (val[1] <= 'f')) + result |= ((val[1] - 'a' + 10) & 0xf); + else + if ((val[1] >= 'A') && (val[1] <= 'F')) + result |= ((val[1] - 'A' + 10) & 0xf); + else /* digit */ + result |= ((val[1] - '0') & 0xf); + + return result; +} + +static int chap_string_to_hex(unsigned char *dst, unsigned char *src, int len) +{ + int i, j = 0; + + for (i = 0; i < len; i += 2) { + dst[j++] = (unsigned char) chap_asciihex_to_binaryhex(&src[i]); + } + + dst[j] = '\0'; + return j; +} + +static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len) +{ + int i; + + for (i = 0; i < src_len; i++) { + sprintf(&dst[i*2], "%02x", (int) src[i] & 0xff); + } +} + +static void chap_set_random(char *data, int length) +{ + long r; + unsigned n; + + while (length > 0) { + get_random_bytes(&r, sizeof(long)); + r = r ^ (r >> 8); + r = r ^ (r >> 4); + n = r & 0x7; + + get_random_bytes(&r, sizeof(long)); + r = r ^ (r >> 8); + r = r ^ (r >> 5); + n = (n << 3) | (r & 0x7); + + get_random_bytes(&r, sizeof(long)); + r = r ^ (r >> 8); + r = r ^ (r >> 5); + n = (n << 2) | (r & 0x3); + + *data++ = n; + length--; + } +} + +static void chap_gen_challenge( + struct iscsi_conn *conn, + int caller, + char *c_str, + unsigned int *c_len) +{ + unsigned char challenge_asciihex[CHAP_CHALLENGE_LENGTH * 2 + 1]; + struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol; + + memset(challenge_asciihex, 0, CHAP_CHALLENGE_LENGTH * 2 + 1); + + chap_set_random(chap->challenge, CHAP_CHALLENGE_LENGTH); + chap_binaryhex_to_asciihex(challenge_asciihex, chap->challenge, + CHAP_CHALLENGE_LENGTH); + /* + * Set CHAP_C, and copy the generated challenge into c_str. + */ + *c_len += sprintf(c_str + *c_len, "CHAP_C=0x%s", challenge_asciihex); + *c_len += 1; + + pr_debug("[%s] Sending CHAP_C=0x%s\n\n", (caller) ? "server" : "client", + challenge_asciihex); +} + + +static struct iscsi_chap *chap_server_open( + struct iscsi_conn *conn, + struct iscsi_node_auth *auth, + const char *a_str, + char *aic_str, + unsigned int *aic_len) +{ + struct iscsi_chap *chap; + + if (!(auth->naf_flags & NAF_USERID_SET) || + !(auth->naf_flags & NAF_PASSWORD_SET)) { + pr_err("CHAP user or password not set for" + " Initiator ACL\n"); + return NULL; + } + + conn->auth_protocol = kzalloc(sizeof(struct iscsi_chap), GFP_KERNEL); + if (!conn->auth_protocol) + return NULL; + + chap = (struct iscsi_chap *) conn->auth_protocol; + /* + * We only support MD5 MDA presently. + */ + if (strncmp(a_str, "CHAP_A=5", 8)) { + pr_err("CHAP_A is not MD5.\n"); + return NULL; + } + pr_debug("[server] Got CHAP_A=5\n"); + /* + * Send back CHAP_A set to MD5. + */ + *aic_len = sprintf(aic_str, "CHAP_A=5"); + *aic_len += 1; + chap->digest_type = CHAP_DIGEST_MD5; + pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type); + /* + * Set Identifier. + */ + chap->id = ISCSI_TPG_C(conn)->tpg_chap_id++; + *aic_len += sprintf(aic_str + *aic_len, "CHAP_I=%d", chap->id); + *aic_len += 1; + pr_debug("[server] Sending CHAP_I=%d\n", chap->id); + /* + * Generate Challenge. + */ + chap_gen_challenge(conn, 1, aic_str, aic_len); + + return chap; +} + +static void chap_close(struct iscsi_conn *conn) +{ + kfree(conn->auth_protocol); + conn->auth_protocol = NULL; +} + +static int chap_server_compute_md5( + struct iscsi_conn *conn, + struct iscsi_node_auth *auth, + char *nr_in_ptr, + char *nr_out_ptr, + unsigned int *nr_out_len) +{ + char *endptr; + unsigned char id, digest[MD5_SIGNATURE_SIZE]; + unsigned char type, response[MD5_SIGNATURE_SIZE * 2 + 2]; + unsigned char identifier[10], *challenge = NULL; + unsigned char *challenge_binhex = NULL; + unsigned char client_digest[MD5_SIGNATURE_SIZE]; + unsigned char server_digest[MD5_SIGNATURE_SIZE]; + unsigned char chap_n[MAX_CHAP_N_SIZE], chap_r[MAX_RESPONSE_LENGTH]; + struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol; + struct crypto_hash *tfm; + struct hash_desc desc; + struct scatterlist sg; + int auth_ret = -1, ret, challenge_len; + + memset(identifier, 0, 10); + memset(chap_n, 0, MAX_CHAP_N_SIZE); + memset(chap_r, 0, MAX_RESPONSE_LENGTH); + memset(digest, 0, MD5_SIGNATURE_SIZE); + memset(response, 0, MD5_SIGNATURE_SIZE * 2 + 2); + memset(client_digest, 0, MD5_SIGNATURE_SIZE); + memset(server_digest, 0, MD5_SIGNATURE_SIZE); + + challenge = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL); + if (!challenge) { + pr_err("Unable to allocate challenge buffer\n"); + goto out; + } + + challenge_binhex = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL); + if (!challenge_binhex) { + pr_err("Unable to allocate challenge_binhex buffer\n"); + goto out; + } + /* + * Extract CHAP_N. + */ + if (extract_param(nr_in_ptr, "CHAP_N", MAX_CHAP_N_SIZE, chap_n, + &type) < 0) { + pr_err("Could not find CHAP_N.\n"); + goto out; + } + if (type == HEX) { + pr_err("Could not find CHAP_N.\n"); + goto out; + } + + if (memcmp(chap_n, auth->userid, strlen(auth->userid)) != 0) { + pr_err("CHAP_N values do not match!\n"); + goto out; + } + pr_debug("[server] Got CHAP_N=%s\n", chap_n); + /* + * Extract CHAP_R. + */ + if (extract_param(nr_in_ptr, "CHAP_R", MAX_RESPONSE_LENGTH, chap_r, + &type) < 0) { + pr_err("Could not find CHAP_R.\n"); + goto out; + } + if (type != HEX) { + pr_err("Could not find CHAP_R.\n"); + goto out; + } + + pr_debug("[server] Got CHAP_R=%s\n", chap_r); + chap_string_to_hex(client_digest, chap_r, strlen(chap_r)); + + tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + pr_err("Unable to allocate struct crypto_hash\n"); + goto out; + } + desc.tfm = tfm; + desc.flags = 0; + + ret = crypto_hash_init(&desc); + if (ret < 0) { + pr_err("crypto_hash_init() failed\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)&chap->id, 1); + ret = crypto_hash_update(&desc, &sg, 1); + if (ret < 0) { + pr_err("crypto_hash_update() failed for id\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)&auth->password, strlen(auth->password)); + ret = crypto_hash_update(&desc, &sg, strlen(auth->password)); + if (ret < 0) { + pr_err("crypto_hash_update() failed for password\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)chap->challenge, CHAP_CHALLENGE_LENGTH); + ret = crypto_hash_update(&desc, &sg, CHAP_CHALLENGE_LENGTH); + if (ret < 0) { + pr_err("crypto_hash_update() failed for challenge\n"); + crypto_free_hash(tfm); + goto out; + } + + ret = crypto_hash_final(&desc, server_digest); + if (ret < 0) { + pr_err("crypto_hash_final() failed for server digest\n"); + crypto_free_hash(tfm); + goto out; + } + crypto_free_hash(tfm); + + chap_binaryhex_to_asciihex(response, server_digest, MD5_SIGNATURE_SIZE); + pr_debug("[server] MD5 Server Digest: %s\n", response); + + if (memcmp(server_digest, client_digest, MD5_SIGNATURE_SIZE) != 0) { + pr_debug("[server] MD5 Digests do not match!\n\n"); + goto out; + } else + pr_debug("[server] MD5 Digests match, CHAP connetication" + " successful.\n\n"); + /* + * One way authentication has succeeded, return now if mutual + * authentication is not enabled. + */ + if (!auth->authenticate_target) { + kfree(challenge); + kfree(challenge_binhex); + return 0; + } + /* + * Get CHAP_I. + */ + if (extract_param(nr_in_ptr, "CHAP_I", 10, identifier, &type) < 0) { + pr_err("Could not find CHAP_I.\n"); + goto out; + } + + if (type == HEX) + id = (unsigned char)simple_strtoul((char *)&identifier[2], + &endptr, 0); + else + id = (unsigned char)simple_strtoul(identifier, &endptr, 0); + /* + * RFC 1994 says Identifier is no more than octet (8 bits). + */ + pr_debug("[server] Got CHAP_I=%d\n", id); + /* + * Get CHAP_C. + */ + if (extract_param(nr_in_ptr, "CHAP_C", CHAP_CHALLENGE_STR_LEN, + challenge, &type) < 0) { + pr_err("Could not find CHAP_C.\n"); + goto out; + } + + if (type != HEX) { + pr_err("Could not find CHAP_C.\n"); + goto out; + } + pr_debug("[server] Got CHAP_C=%s\n", challenge); + challenge_len = chap_string_to_hex(challenge_binhex, challenge, + strlen(challenge)); + if (!challenge_len) { + pr_err("Unable to convert incoming challenge\n"); + goto out; + } + /* + * Generate CHAP_N and CHAP_R for mutual authentication. + */ + tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + pr_err("Unable to allocate struct crypto_hash\n"); + goto out; + } + desc.tfm = tfm; + desc.flags = 0; + + ret = crypto_hash_init(&desc); + if (ret < 0) { + pr_err("crypto_hash_init() failed\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)&id, 1); + ret = crypto_hash_update(&desc, &sg, 1); + if (ret < 0) { + pr_err("crypto_hash_update() failed for id\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)auth->password_mutual, + strlen(auth->password_mutual)); + ret = crypto_hash_update(&desc, &sg, strlen(auth->password_mutual)); + if (ret < 0) { + pr_err("crypto_hash_update() failed for" + " password_mutual\n"); + crypto_free_hash(tfm); + goto out; + } + /* + * Convert received challenge to binary hex. + */ + sg_init_one(&sg, (void *)challenge_binhex, challenge_len); + ret = crypto_hash_update(&desc, &sg, challenge_len); + if (ret < 0) { + pr_err("crypto_hash_update() failed for ma challenge\n"); + crypto_free_hash(tfm); + goto out; + } + + ret = crypto_hash_final(&desc, digest); + if (ret < 0) { + pr_err("crypto_hash_final() failed for ma digest\n"); + crypto_free_hash(tfm); + goto out; + } + crypto_free_hash(tfm); + /* + * Generate CHAP_N and CHAP_R. + */ + *nr_out_len = sprintf(nr_out_ptr, "CHAP_N=%s", auth->userid_mutual); + *nr_out_len += 1; + pr_debug("[server] Sending CHAP_N=%s\n", auth->userid_mutual); + /* + * Convert response from binary hex to ascii hext. + */ + chap_binaryhex_to_asciihex(response, digest, MD5_SIGNATURE_SIZE); + *nr_out_len += sprintf(nr_out_ptr + *nr_out_len, "CHAP_R=0x%s", + response); + *nr_out_len += 1; + pr_debug("[server] Sending CHAP_R=0x%s\n", response); + auth_ret = 0; +out: + kfree(challenge); + kfree(challenge_binhex); + return auth_ret; +} + +static int chap_got_response( + struct iscsi_conn *conn, + struct iscsi_node_auth *auth, + char *nr_in_ptr, + char *nr_out_ptr, + unsigned int *nr_out_len) +{ + struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol; + + switch (chap->digest_type) { + case CHAP_DIGEST_MD5: + if (chap_server_compute_md5(conn, auth, nr_in_ptr, + nr_out_ptr, nr_out_len) < 0) + return -1; + return 0; + default: + pr_err("Unknown CHAP digest type %d!\n", + chap->digest_type); + return -1; + } +} + +u32 chap_main_loop( + struct iscsi_conn *conn, + struct iscsi_node_auth *auth, + char *in_text, + char *out_text, + int *in_len, + int *out_len) +{ + struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol; + + if (!chap) { + chap = chap_server_open(conn, auth, in_text, out_text, out_len); + if (!chap) + return 2; + chap->chap_state = CHAP_STAGE_SERVER_AIC; + return 0; + } else if (chap->chap_state == CHAP_STAGE_SERVER_AIC) { + convert_null_to_semi(in_text, *in_len); + if (chap_got_response(conn, auth, in_text, out_text, + out_len) < 0) { + chap_close(conn); + return 2; + } + if (auth->authenticate_target) + chap->chap_state = CHAP_STAGE_SERVER_NR; + else + *out_len = 0; + chap_close(conn); + return 1; + } + + return 2; +} diff --git a/drivers/target/iscsi/iscsi_target_auth.h b/drivers/target/iscsi/iscsi_target_auth.h new file mode 100644 index 000000000000..2f463c09626d --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_auth.h @@ -0,0 +1,31 @@ +#ifndef _ISCSI_CHAP_H_ +#define _ISCSI_CHAP_H_ + +#define CHAP_DIGEST_MD5 5 +#define CHAP_DIGEST_SHA 6 + +#define CHAP_CHALLENGE_LENGTH 16 +#define CHAP_CHALLENGE_STR_LEN 4096 +#define MAX_RESPONSE_LENGTH 64 /* sufficient for MD5 */ +#define MAX_CHAP_N_SIZE 512 + +#define MD5_SIGNATURE_SIZE 16 /* 16 bytes in a MD5 message digest */ + +#define CHAP_STAGE_CLIENT_A 1 +#define CHAP_STAGE_SERVER_AIC 2 +#define CHAP_STAGE_CLIENT_NR 3 +#define CHAP_STAGE_CLIENT_NRIC 4 +#define CHAP_STAGE_SERVER_NR 5 + +extern u32 chap_main_loop(struct iscsi_conn *, struct iscsi_node_auth *, char *, char *, + int *, int *); + +struct iscsi_chap { + unsigned char digest_type; + unsigned char id; + unsigned char challenge[CHAP_CHALLENGE_LENGTH]; + unsigned int authenticate_target; + unsigned int chap_state; +} ____cacheline_aligned; + +#endif /*** _ISCSI_CHAP_H_ ***/ diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c new file mode 100644 index 000000000000..32bb92c44450 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -0,0 +1,1882 @@ +/******************************************************************************* + * This file contains the configfs implementation for iSCSI Target mode + * from the LIO-Target Project. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ****************************************************************************/ + +#include <linux/configfs.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> +#include <target/target_core_fabric_ops.h> +#include <target/target_core_fabric_configfs.h> +#include <target/target_core_fabric_lib.h> +#include <target/target_core_device.h> +#include <target/target_core_tpg.h> +#include <target/target_core_configfs.h> +#include <target/configfs_macros.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_device.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_nodeattrib.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_stat.h" +#include "iscsi_target_configfs.h" + +struct target_fabric_configfs *lio_target_fabric_configfs; + +struct lio_target_configfs_attribute { + struct configfs_attribute attr; + ssize_t (*show)(void *, char *); + ssize_t (*store)(void *, const char *, size_t); +}; + +struct iscsi_portal_group *lio_get_tpg_from_tpg_item( + struct config_item *item, + struct iscsi_tiqn **tiqn_out) +{ + struct se_portal_group *se_tpg = container_of(to_config_group(item), + struct se_portal_group, tpg_group); + struct iscsi_portal_group *tpg = + (struct iscsi_portal_group *)se_tpg->se_tpg_fabric_ptr; + int ret; + + if (!tpg) { + pr_err("Unable to locate struct iscsi_portal_group " + "pointer\n"); + return NULL; + } + ret = iscsit_get_tpg(tpg); + if (ret < 0) + return NULL; + + *tiqn_out = tpg->tpg_tiqn; + return tpg; +} + +/* Start items for lio_target_portal_cit */ + +static ssize_t lio_target_np_show_sctp( + struct se_tpg_np *se_tpg_np, + char *page) +{ + struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np, + struct iscsi_tpg_np, se_tpg_np); + struct iscsi_tpg_np *tpg_np_sctp; + ssize_t rb; + + tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP); + if (tpg_np_sctp) + rb = sprintf(page, "1\n"); + else + rb = sprintf(page, "0\n"); + + return rb; +} + +static ssize_t lio_target_np_store_sctp( + struct se_tpg_np *se_tpg_np, + const char *page, + size_t count) +{ + struct iscsi_np *np; + struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np, + struct iscsi_tpg_np, se_tpg_np); + struct iscsi_tpg_np *tpg_np_sctp = NULL; + char *endptr; + u32 op; + int ret; + + op = simple_strtoul(page, &endptr, 0); + if ((op != 1) && (op != 0)) { + pr_err("Illegal value for tpg_enable: %u\n", op); + return -EINVAL; + } + np = tpg_np->tpg_np; + if (!np) { + pr_err("Unable to locate struct iscsi_np from" + " struct iscsi_tpg_np\n"); + return -EINVAL; + } + + tpg = tpg_np->tpg; + if (iscsit_get_tpg(tpg) < 0) + return -EINVAL; + + if (op) { + /* + * Use existing np->np_sockaddr for SCTP network portal reference + */ + tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, + np->np_ip, tpg_np, ISCSI_SCTP_TCP); + if (!tpg_np_sctp || IS_ERR(tpg_np_sctp)) + goto out; + } else { + tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP); + if (!tpg_np_sctp) + goto out; + + ret = iscsit_tpg_del_network_portal(tpg, tpg_np_sctp); + if (ret < 0) + goto out; + } + + iscsit_put_tpg(tpg); + return count; +out: + iscsit_put_tpg(tpg); + return -EINVAL; +} + +TF_NP_BASE_ATTR(lio_target, sctp, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_portal_attrs[] = { + &lio_target_np_sctp.attr, + NULL, +}; + +/* Stop items for lio_target_portal_cit */ + +/* Start items for lio_target_np_cit */ + +#define MAX_PORTAL_LEN 256 + +struct se_tpg_np *lio_target_call_addnptotpg( + struct se_portal_group *se_tpg, + struct config_group *group, + const char *name) +{ + struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np; + char *str, *str2, *ip_str, *port_str; + struct __kernel_sockaddr_storage sockaddr; + struct sockaddr_in *sock_in; + struct sockaddr_in6 *sock_in6; + unsigned long port; + int ret; + char buf[MAX_PORTAL_LEN + 1]; + + if (strlen(name) > MAX_PORTAL_LEN) { + pr_err("strlen(name): %d exceeds MAX_PORTAL_LEN: %d\n", + (int)strlen(name), MAX_PORTAL_LEN); + return ERR_PTR(-EOVERFLOW); + } + memset(buf, 0, MAX_PORTAL_LEN + 1); + snprintf(buf, MAX_PORTAL_LEN, "%s", name); + + memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage)); + + str = strstr(buf, "["); + if (str) { + const char *end; + + str2 = strstr(str, "]"); + if (!str2) { + pr_err("Unable to locate trailing \"]\"" + " in IPv6 iSCSI network portal address\n"); + return ERR_PTR(-EINVAL); + } + str++; /* Skip over leading "[" */ + *str2 = '\0'; /* Terminate the IPv6 address */ + str2++; /* Skip over the "]" */ + port_str = strstr(str2, ":"); + if (!port_str) { + pr_err("Unable to locate \":port\"" + " in IPv6 iSCSI network portal address\n"); + return ERR_PTR(-EINVAL); + } + *port_str = '\0'; /* Terminate string for IP */ + port_str++; /* Skip over ":" */ + + ret = strict_strtoul(port_str, 0, &port); + if (ret < 0) { + pr_err("strict_strtoul() failed for port_str: %d\n", ret); + return ERR_PTR(ret); + } + sock_in6 = (struct sockaddr_in6 *)&sockaddr; + sock_in6->sin6_family = AF_INET6; + sock_in6->sin6_port = htons((unsigned short)port); + ret = in6_pton(str, IPV6_ADDRESS_SPACE, + (void *)&sock_in6->sin6_addr.in6_u, -1, &end); + if (ret <= 0) { + pr_err("in6_pton returned: %d\n", ret); + return ERR_PTR(-EINVAL); + } + } else { + str = ip_str = &buf[0]; + port_str = strstr(ip_str, ":"); + if (!port_str) { + pr_err("Unable to locate \":port\"" + " in IPv4 iSCSI network portal address\n"); + return ERR_PTR(-EINVAL); + } + *port_str = '\0'; /* Terminate string for IP */ + port_str++; /* Skip over ":" */ + + ret = strict_strtoul(port_str, 0, &port); + if (ret < 0) { + pr_err("strict_strtoul() failed for port_str: %d\n", ret); + return ERR_PTR(ret); + } + sock_in = (struct sockaddr_in *)&sockaddr; + sock_in->sin_family = AF_INET; + sock_in->sin_port = htons((unsigned short)port); + sock_in->sin_addr.s_addr = in_aton(ip_str); + } + tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); + ret = iscsit_get_tpg(tpg); + if (ret < 0) + return ERR_PTR(-EINVAL); + + pr_debug("LIO_Target_ConfigFS: REGISTER -> %s TPGT: %hu" + " PORTAL: %s\n", + config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), + tpg->tpgt, name); + /* + * Assume ISCSI_TCP by default. Other network portals for other + * iSCSI fabrics: + * + * Traditional iSCSI over SCTP (initial support) + * iSER/TCP (TODO, hardware available) + * iSER/SCTP (TODO, software emulation with osc-iwarp) + * iSER/IB (TODO, hardware available) + * + * can be enabled with atributes under + * sys/kernel/config/iscsi/$IQN/$TPG/np/$IP:$PORT/ + * + */ + tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, str, NULL, + ISCSI_TCP); + if (IS_ERR(tpg_np)) { + iscsit_put_tpg(tpg); + return ERR_PTR(PTR_ERR(tpg_np)); + } + pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n"); + + iscsit_put_tpg(tpg); + return &tpg_np->se_tpg_np; +} + +static void lio_target_call_delnpfromtpg( + struct se_tpg_np *se_tpg_np) +{ + struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np; + struct se_portal_group *se_tpg; + int ret; + + tpg_np = container_of(se_tpg_np, struct iscsi_tpg_np, se_tpg_np); + tpg = tpg_np->tpg; + ret = iscsit_get_tpg(tpg); + if (ret < 0) + return; + + se_tpg = &tpg->tpg_se_tpg; + pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu" + " PORTAL: %s:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), + tpg->tpgt, tpg_np->tpg_np->np_ip, tpg_np->tpg_np->np_port); + + ret = iscsit_tpg_del_network_portal(tpg, tpg_np); + if (ret < 0) + goto out; + + pr_debug("LIO_Target_ConfigFS: delnpfromtpg done!\n"); +out: + iscsit_put_tpg(tpg); +} + +/* End items for lio_target_np_cit */ + +/* Start items for lio_target_nacl_attrib_cit */ + +#define DEF_NACL_ATTRIB(name) \ +static ssize_t iscsi_nacl_attrib_show_##name( \ + struct se_node_acl *se_nacl, \ + char *page) \ +{ \ + struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \ + se_node_acl); \ + \ + return sprintf(page, "%u\n", ISCSI_NODE_ATTRIB(nacl)->name); \ +} \ + \ +static ssize_t iscsi_nacl_attrib_store_##name( \ + struct se_node_acl *se_nacl, \ + const char *page, \ + size_t count) \ +{ \ + struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \ + se_node_acl); \ + char *endptr; \ + u32 val; \ + int ret; \ + \ + val = simple_strtoul(page, &endptr, 0); \ + ret = iscsit_na_##name(nacl, val); \ + if (ret < 0) \ + return ret; \ + \ + return count; \ +} + +#define NACL_ATTR(_name, _mode) TF_NACL_ATTRIB_ATTR(iscsi, _name, _mode); +/* + * Define iscsi_node_attrib_s_dataout_timeout + */ +DEF_NACL_ATTRIB(dataout_timeout); +NACL_ATTR(dataout_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_dataout_timeout_retries + */ +DEF_NACL_ATTRIB(dataout_timeout_retries); +NACL_ATTR(dataout_timeout_retries, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_default_erl + */ +DEF_NACL_ATTRIB(default_erl); +NACL_ATTR(default_erl, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_nopin_timeout + */ +DEF_NACL_ATTRIB(nopin_timeout); +NACL_ATTR(nopin_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_nopin_response_timeout + */ +DEF_NACL_ATTRIB(nopin_response_timeout); +NACL_ATTR(nopin_response_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_random_datain_pdu_offsets + */ +DEF_NACL_ATTRIB(random_datain_pdu_offsets); +NACL_ATTR(random_datain_pdu_offsets, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_random_datain_seq_offsets + */ +DEF_NACL_ATTRIB(random_datain_seq_offsets); +NACL_ATTR(random_datain_seq_offsets, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_random_r2t_offsets + */ +DEF_NACL_ATTRIB(random_r2t_offsets); +NACL_ATTR(random_r2t_offsets, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_nacl_attrib_attrs[] = { + &iscsi_nacl_attrib_dataout_timeout.attr, + &iscsi_nacl_attrib_dataout_timeout_retries.attr, + &iscsi_nacl_attrib_default_erl.attr, + &iscsi_nacl_attrib_nopin_timeout.attr, + &iscsi_nacl_attrib_nopin_response_timeout.attr, + &iscsi_nacl_attrib_random_datain_pdu_offsets.attr, + &iscsi_nacl_attrib_random_datain_seq_offsets.attr, + &iscsi_nacl_attrib_random_r2t_offsets.attr, + NULL, +}; + +/* End items for lio_target_nacl_attrib_cit */ + +/* Start items for lio_target_nacl_auth_cit */ + +#define __DEF_NACL_AUTH_STR(prefix, name, flags) \ +static ssize_t __iscsi_##prefix##_show_##name( \ + struct iscsi_node_acl *nacl, \ + char *page) \ +{ \ + struct iscsi_node_auth *auth = &nacl->node_auth; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + return snprintf(page, PAGE_SIZE, "%s\n", auth->name); \ +} \ + \ +static ssize_t __iscsi_##prefix##_store_##name( \ + struct iscsi_node_acl *nacl, \ + const char *page, \ + size_t count) \ +{ \ + struct iscsi_node_auth *auth = &nacl->node_auth; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + snprintf(auth->name, PAGE_SIZE, "%s", page); \ + if (!strncmp("NULL", auth->name, 4)) \ + auth->naf_flags &= ~flags; \ + else \ + auth->naf_flags |= flags; \ + \ + if ((auth->naf_flags & NAF_USERID_IN_SET) && \ + (auth->naf_flags & NAF_PASSWORD_IN_SET)) \ + auth->authenticate_target = 1; \ + else \ + auth->authenticate_target = 0; \ + \ + return count; \ +} + +#define __DEF_NACL_AUTH_INT(prefix, name) \ +static ssize_t __iscsi_##prefix##_show_##name( \ + struct iscsi_node_acl *nacl, \ + char *page) \ +{ \ + struct iscsi_node_auth *auth = &nacl->node_auth; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + return snprintf(page, PAGE_SIZE, "%d\n", auth->name); \ +} + +#define DEF_NACL_AUTH_STR(name, flags) \ + __DEF_NACL_AUTH_STR(nacl_auth, name, flags) \ +static ssize_t iscsi_nacl_auth_show_##name( \ + struct se_node_acl *nacl, \ + char *page) \ +{ \ + return __iscsi_nacl_auth_show_##name(container_of(nacl, \ + struct iscsi_node_acl, se_node_acl), page); \ +} \ +static ssize_t iscsi_nacl_auth_store_##name( \ + struct se_node_acl *nacl, \ + const char *page, \ + size_t count) \ +{ \ + return __iscsi_nacl_auth_store_##name(container_of(nacl, \ + struct iscsi_node_acl, se_node_acl), page, count); \ +} + +#define DEF_NACL_AUTH_INT(name) \ + __DEF_NACL_AUTH_INT(nacl_auth, name) \ +static ssize_t iscsi_nacl_auth_show_##name( \ + struct se_node_acl *nacl, \ + char *page) \ +{ \ + return __iscsi_nacl_auth_show_##name(container_of(nacl, \ + struct iscsi_node_acl, se_node_acl), page); \ +} + +#define AUTH_ATTR(_name, _mode) TF_NACL_AUTH_ATTR(iscsi, _name, _mode); +#define AUTH_ATTR_RO(_name) TF_NACL_AUTH_ATTR_RO(iscsi, _name); + +/* + * One-way authentication userid + */ +DEF_NACL_AUTH_STR(userid, NAF_USERID_SET); +AUTH_ATTR(userid, S_IRUGO | S_IWUSR); +/* + * One-way authentication password + */ +DEF_NACL_AUTH_STR(password, NAF_PASSWORD_SET); +AUTH_ATTR(password, S_IRUGO | S_IWUSR); +/* + * Enforce mutual authentication + */ +DEF_NACL_AUTH_INT(authenticate_target); +AUTH_ATTR_RO(authenticate_target); +/* + * Mutual authentication userid + */ +DEF_NACL_AUTH_STR(userid_mutual, NAF_USERID_IN_SET); +AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR); +/* + * Mutual authentication password + */ +DEF_NACL_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET); +AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_nacl_auth_attrs[] = { + &iscsi_nacl_auth_userid.attr, + &iscsi_nacl_auth_password.attr, + &iscsi_nacl_auth_authenticate_target.attr, + &iscsi_nacl_auth_userid_mutual.attr, + &iscsi_nacl_auth_password_mutual.attr, + NULL, +}; + +/* End items for lio_target_nacl_auth_cit */ + +/* Start items for lio_target_nacl_param_cit */ + +#define DEF_NACL_PARAM(name) \ +static ssize_t iscsi_nacl_param_show_##name( \ + struct se_node_acl *se_nacl, \ + char *page) \ +{ \ + struct iscsi_session *sess; \ + struct se_session *se_sess; \ + ssize_t rb; \ + \ + spin_lock_bh(&se_nacl->nacl_sess_lock); \ + se_sess = se_nacl->nacl_sess; \ + if (!se_sess) { \ + rb = snprintf(page, PAGE_SIZE, \ + "No Active iSCSI Session\n"); \ + } else { \ + sess = se_sess->fabric_sess_ptr; \ + rb = snprintf(page, PAGE_SIZE, "%u\n", \ + (u32)sess->sess_ops->name); \ + } \ + spin_unlock_bh(&se_nacl->nacl_sess_lock); \ + \ + return rb; \ +} + +#define NACL_PARAM_ATTR(_name) TF_NACL_PARAM_ATTR_RO(iscsi, _name); + +DEF_NACL_PARAM(MaxConnections); +NACL_PARAM_ATTR(MaxConnections); + +DEF_NACL_PARAM(InitialR2T); +NACL_PARAM_ATTR(InitialR2T); + +DEF_NACL_PARAM(ImmediateData); +NACL_PARAM_ATTR(ImmediateData); + +DEF_NACL_PARAM(MaxBurstLength); +NACL_PARAM_ATTR(MaxBurstLength); + +DEF_NACL_PARAM(FirstBurstLength); +NACL_PARAM_ATTR(FirstBurstLength); + +DEF_NACL_PARAM(DefaultTime2Wait); +NACL_PARAM_ATTR(DefaultTime2Wait); + +DEF_NACL_PARAM(DefaultTime2Retain); +NACL_PARAM_ATTR(DefaultTime2Retain); + +DEF_NACL_PARAM(MaxOutstandingR2T); +NACL_PARAM_ATTR(MaxOutstandingR2T); + +DEF_NACL_PARAM(DataPDUInOrder); +NACL_PARAM_ATTR(DataPDUInOrder); + +DEF_NACL_PARAM(DataSequenceInOrder); +NACL_PARAM_ATTR(DataSequenceInOrder); + +DEF_NACL_PARAM(ErrorRecoveryLevel); +NACL_PARAM_ATTR(ErrorRecoveryLevel); + +static struct configfs_attribute *lio_target_nacl_param_attrs[] = { + &iscsi_nacl_param_MaxConnections.attr, + &iscsi_nacl_param_InitialR2T.attr, + &iscsi_nacl_param_ImmediateData.attr, + &iscsi_nacl_param_MaxBurstLength.attr, + &iscsi_nacl_param_FirstBurstLength.attr, + &iscsi_nacl_param_DefaultTime2Wait.attr, + &iscsi_nacl_param_DefaultTime2Retain.attr, + &iscsi_nacl_param_MaxOutstandingR2T.attr, + &iscsi_nacl_param_DataPDUInOrder.attr, + &iscsi_nacl_param_DataSequenceInOrder.attr, + &iscsi_nacl_param_ErrorRecoveryLevel.attr, + NULL, +}; + +/* End items for lio_target_nacl_param_cit */ + +/* Start items for lio_target_acl_cit */ + +static ssize_t lio_target_nacl_show_info( + struct se_node_acl *se_nacl, + char *page) +{ + struct iscsi_session *sess; + struct iscsi_conn *conn; + struct se_session *se_sess; + ssize_t rb = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (!se_sess) { + rb += sprintf(page+rb, "No active iSCSI Session for Initiator" + " Endpoint: %s\n", se_nacl->initiatorname); + } else { + sess = se_sess->fabric_sess_ptr; + + if (sess->sess_ops->InitiatorName) + rb += sprintf(page+rb, "InitiatorName: %s\n", + sess->sess_ops->InitiatorName); + if (sess->sess_ops->InitiatorAlias) + rb += sprintf(page+rb, "InitiatorAlias: %s\n", + sess->sess_ops->InitiatorAlias); + + rb += sprintf(page+rb, "LIO Session ID: %u " + "ISID: 0x%02x %02x %02x %02x %02x %02x " + "TSIH: %hu ", sess->sid, + sess->isid[0], sess->isid[1], sess->isid[2], + sess->isid[3], sess->isid[4], sess->isid[5], + sess->tsih); + rb += sprintf(page+rb, "SessionType: %s\n", + (sess->sess_ops->SessionType) ? + "Discovery" : "Normal"); + rb += sprintf(page+rb, "Session State: "); + switch (sess->session_state) { + case TARG_SESS_STATE_FREE: + rb += sprintf(page+rb, "TARG_SESS_FREE\n"); + break; + case TARG_SESS_STATE_ACTIVE: + rb += sprintf(page+rb, "TARG_SESS_STATE_ACTIVE\n"); + break; + case TARG_SESS_STATE_LOGGED_IN: + rb += sprintf(page+rb, "TARG_SESS_STATE_LOGGED_IN\n"); + break; + case TARG_SESS_STATE_FAILED: + rb += sprintf(page+rb, "TARG_SESS_STATE_FAILED\n"); + break; + case TARG_SESS_STATE_IN_CONTINUE: + rb += sprintf(page+rb, "TARG_SESS_STATE_IN_CONTINUE\n"); + break; + default: + rb += sprintf(page+rb, "ERROR: Unknown Session" + " State!\n"); + break; + } + + rb += sprintf(page+rb, "---------------------[iSCSI Session" + " Values]-----------------------\n"); + rb += sprintf(page+rb, " CmdSN/WR : CmdSN/WC : ExpCmdSN" + " : MaxCmdSN : ITT : TTT\n"); + rb += sprintf(page+rb, " 0x%08x 0x%08x 0x%08x 0x%08x" + " 0x%08x 0x%08x\n", + sess->cmdsn_window, + (sess->max_cmd_sn - sess->exp_cmd_sn) + 1, + sess->exp_cmd_sn, sess->max_cmd_sn, + sess->init_task_tag, sess->targ_xfer_tag); + rb += sprintf(page+rb, "----------------------[iSCSI" + " Connections]-------------------------\n"); + + spin_lock(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) { + rb += sprintf(page+rb, "CID: %hu Connection" + " State: ", conn->cid); + switch (conn->conn_state) { + case TARG_CONN_STATE_FREE: + rb += sprintf(page+rb, + "TARG_CONN_STATE_FREE\n"); + break; + case TARG_CONN_STATE_XPT_UP: + rb += sprintf(page+rb, + "TARG_CONN_STATE_XPT_UP\n"); + break; + case TARG_CONN_STATE_IN_LOGIN: + rb += sprintf(page+rb, + "TARG_CONN_STATE_IN_LOGIN\n"); + break; + case TARG_CONN_STATE_LOGGED_IN: + rb += sprintf(page+rb, + "TARG_CONN_STATE_LOGGED_IN\n"); + break; + case TARG_CONN_STATE_IN_LOGOUT: + rb += sprintf(page+rb, + "TARG_CONN_STATE_IN_LOGOUT\n"); + break; + case TARG_CONN_STATE_LOGOUT_REQUESTED: + rb += sprintf(page+rb, + "TARG_CONN_STATE_LOGOUT_REQUESTED\n"); + break; + case TARG_CONN_STATE_CLEANUP_WAIT: + rb += sprintf(page+rb, + "TARG_CONN_STATE_CLEANUP_WAIT\n"); + break; + default: + rb += sprintf(page+rb, + "ERROR: Unknown Connection State!\n"); + break; + } + + rb += sprintf(page+rb, " Address %s %s", conn->login_ip, + (conn->network_transport == ISCSI_TCP) ? + "TCP" : "SCTP"); + rb += sprintf(page+rb, " StatSN: 0x%08x\n", + conn->stat_sn); + } + spin_unlock(&sess->conn_lock); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return rb; +} + +TF_NACL_BASE_ATTR_RO(lio_target, info); + +static ssize_t lio_target_nacl_show_cmdsn_depth( + struct se_node_acl *se_nacl, + char *page) +{ + return sprintf(page, "%u\n", se_nacl->queue_depth); +} + +static ssize_t lio_target_nacl_store_cmdsn_depth( + struct se_node_acl *se_nacl, + const char *page, + size_t count) +{ + struct se_portal_group *se_tpg = se_nacl->se_tpg; + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); + struct config_item *acl_ci, *tpg_ci, *wwn_ci; + char *endptr; + u32 cmdsn_depth = 0; + int ret; + + cmdsn_depth = simple_strtoul(page, &endptr, 0); + if (cmdsn_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) { + pr_err("Passed cmdsn_depth: %u exceeds" + " TA_DEFAULT_CMDSN_DEPTH_MAX: %u\n", cmdsn_depth, + TA_DEFAULT_CMDSN_DEPTH_MAX); + return -EINVAL; + } + acl_ci = &se_nacl->acl_group.cg_item; + if (!acl_ci) { + pr_err("Unable to locatel acl_ci\n"); + return -EINVAL; + } + tpg_ci = &acl_ci->ci_parent->ci_group->cg_item; + if (!tpg_ci) { + pr_err("Unable to locate tpg_ci\n"); + return -EINVAL; + } + wwn_ci = &tpg_ci->ci_group->cg_item; + if (!wwn_ci) { + pr_err("Unable to locate config_item wwn_ci\n"); + return -EINVAL; + } + + if (iscsit_get_tpg(tpg) < 0) + return -EINVAL; + /* + * iscsit_tpg_set_initiator_node_queue_depth() assumes force=1 + */ + ret = iscsit_tpg_set_initiator_node_queue_depth(tpg, + config_item_name(acl_ci), cmdsn_depth, 1); + + pr_debug("LIO_Target_ConfigFS: %s/%s Set CmdSN Window: %u for" + "InitiatorName: %s\n", config_item_name(wwn_ci), + config_item_name(tpg_ci), cmdsn_depth, + config_item_name(acl_ci)); + + iscsit_put_tpg(tpg); + return (!ret) ? count : (ssize_t)ret; +} + +TF_NACL_BASE_ATTR(lio_target, cmdsn_depth, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_initiator_attrs[] = { + &lio_target_nacl_info.attr, + &lio_target_nacl_cmdsn_depth.attr, + NULL, +}; + +static struct se_node_acl *lio_tpg_alloc_fabric_acl( + struct se_portal_group *se_tpg) +{ + struct iscsi_node_acl *acl; + + acl = kzalloc(sizeof(struct iscsi_node_acl), GFP_KERNEL); + if (!acl) { + pr_err("Unable to allocate memory for struct iscsi_node_acl\n"); + return NULL; + } + + return &acl->se_node_acl; +} + +static struct se_node_acl *lio_target_make_nodeacl( + struct se_portal_group *se_tpg, + struct config_group *group, + const char *name) +{ + struct config_group *stats_cg; + struct iscsi_node_acl *acl; + struct se_node_acl *se_nacl_new, *se_nacl; + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); + u32 cmdsn_depth; + + se_nacl_new = lio_tpg_alloc_fabric_acl(se_tpg); + if (!se_nacl_new) + return ERR_PTR(-ENOMEM); + + acl = container_of(se_nacl_new, struct iscsi_node_acl, + se_node_acl); + + cmdsn_depth = ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth; + /* + * se_nacl_new may be released by core_tpg_add_initiator_node_acl() + * when converting a NdoeACL from demo mode -> explict + */ + se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, + name, cmdsn_depth); + if (IS_ERR(se_nacl)) + return se_nacl; + + stats_cg = &acl->se_node_acl.acl_fabric_stat_group; + + stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 2, + GFP_KERNEL); + if (!stats_cg->default_groups) { + pr_err("Unable to allocate memory for" + " stats_cg->default_groups\n"); + core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1); + kfree(acl); + return ERR_PTR(-ENOMEM); + } + + stats_cg->default_groups[0] = &NODE_STAT_GRPS(acl)->iscsi_sess_stats_group; + stats_cg->default_groups[1] = NULL; + config_group_init_type_name(&NODE_STAT_GRPS(acl)->iscsi_sess_stats_group, + "iscsi_sess_stats", &iscsi_stat_sess_cit); + + return se_nacl; +} + +static void lio_target_drop_nodeacl( + struct se_node_acl *se_nacl) +{ + struct se_portal_group *se_tpg = se_nacl->se_tpg; + struct iscsi_node_acl *acl = container_of(se_nacl, + struct iscsi_node_acl, se_node_acl); + struct config_item *df_item; + struct config_group *stats_cg; + int i; + + stats_cg = &acl->se_node_acl.acl_fabric_stat_group; + for (i = 0; stats_cg->default_groups[i]; i++) { + df_item = &stats_cg->default_groups[i]->cg_item; + stats_cg->default_groups[i] = NULL; + config_item_put(df_item); + } + kfree(stats_cg->default_groups); + + core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1); + kfree(acl); +} + +/* End items for lio_target_acl_cit */ + +/* Start items for lio_target_tpg_attrib_cit */ + +#define DEF_TPG_ATTRIB(name) \ + \ +static ssize_t iscsi_tpg_attrib_show_##name( \ + struct se_portal_group *se_tpg, \ + char *page) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + ssize_t rb; \ + \ + if (iscsit_get_tpg(tpg) < 0) \ + return -EINVAL; \ + \ + rb = sprintf(page, "%u\n", ISCSI_TPG_ATTRIB(tpg)->name); \ + iscsit_put_tpg(tpg); \ + return rb; \ +} \ + \ +static ssize_t iscsi_tpg_attrib_store_##name( \ + struct se_portal_group *se_tpg, \ + const char *page, \ + size_t count) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + char *endptr; \ + u32 val; \ + int ret; \ + \ + if (iscsit_get_tpg(tpg) < 0) \ + return -EINVAL; \ + \ + val = simple_strtoul(page, &endptr, 0); \ + ret = iscsit_ta_##name(tpg, val); \ + if (ret < 0) \ + goto out; \ + \ + iscsit_put_tpg(tpg); \ + return count; \ +out: \ + iscsit_put_tpg(tpg); \ + return ret; \ +} + +#define TPG_ATTR(_name, _mode) TF_TPG_ATTRIB_ATTR(iscsi, _name, _mode); + +/* + * Define iscsi_tpg_attrib_s_authentication + */ +DEF_TPG_ATTRIB(authentication); +TPG_ATTR(authentication, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_login_timeout + */ +DEF_TPG_ATTRIB(login_timeout); +TPG_ATTR(login_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_netif_timeout + */ +DEF_TPG_ATTRIB(netif_timeout); +TPG_ATTR(netif_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_generate_node_acls + */ +DEF_TPG_ATTRIB(generate_node_acls); +TPG_ATTR(generate_node_acls, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_default_cmdsn_depth + */ +DEF_TPG_ATTRIB(default_cmdsn_depth); +TPG_ATTR(default_cmdsn_depth, S_IRUGO | S_IWUSR); +/* + Define iscsi_tpg_attrib_s_cache_dynamic_acls + */ +DEF_TPG_ATTRIB(cache_dynamic_acls); +TPG_ATTR(cache_dynamic_acls, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_demo_mode_write_protect + */ +DEF_TPG_ATTRIB(demo_mode_write_protect); +TPG_ATTR(demo_mode_write_protect, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_prod_mode_write_protect + */ +DEF_TPG_ATTRIB(prod_mode_write_protect); +TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { + &iscsi_tpg_attrib_authentication.attr, + &iscsi_tpg_attrib_login_timeout.attr, + &iscsi_tpg_attrib_netif_timeout.attr, + &iscsi_tpg_attrib_generate_node_acls.attr, + &iscsi_tpg_attrib_default_cmdsn_depth.attr, + &iscsi_tpg_attrib_cache_dynamic_acls.attr, + &iscsi_tpg_attrib_demo_mode_write_protect.attr, + &iscsi_tpg_attrib_prod_mode_write_protect.attr, + NULL, +}; + +/* End items for lio_target_tpg_attrib_cit */ + +/* Start items for lio_target_tpg_param_cit */ + +#define DEF_TPG_PARAM(name) \ +static ssize_t iscsi_tpg_param_show_##name( \ + struct se_portal_group *se_tpg, \ + char *page) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + struct iscsi_param *param; \ + ssize_t rb; \ + \ + if (iscsit_get_tpg(tpg) < 0) \ + return -EINVAL; \ + \ + param = iscsi_find_param_from_key(__stringify(name), \ + tpg->param_list); \ + if (!param) { \ + iscsit_put_tpg(tpg); \ + return -EINVAL; \ + } \ + rb = snprintf(page, PAGE_SIZE, "%s\n", param->value); \ + \ + iscsit_put_tpg(tpg); \ + return rb; \ +} \ +static ssize_t iscsi_tpg_param_store_##name( \ + struct se_portal_group *se_tpg, \ + const char *page, \ + size_t count) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + char *buf; \ + int ret; \ + \ + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); \ + if (!buf) \ + return -ENOMEM; \ + snprintf(buf, PAGE_SIZE, "%s=%s", __stringify(name), page); \ + buf[strlen(buf)-1] = '\0'; /* Kill newline */ \ + \ + if (iscsit_get_tpg(tpg) < 0) { \ + kfree(buf); \ + return -EINVAL; \ + } \ + \ + ret = iscsi_change_param_value(buf, tpg->param_list, 1); \ + if (ret < 0) \ + goto out; \ + \ + kfree(buf); \ + iscsit_put_tpg(tpg); \ + return count; \ +out: \ + kfree(buf); \ + iscsit_put_tpg(tpg); \ + return -EINVAL; \ +} + +#define TPG_PARAM_ATTR(_name, _mode) TF_TPG_PARAM_ATTR(iscsi, _name, _mode); + +DEF_TPG_PARAM(AuthMethod); +TPG_PARAM_ATTR(AuthMethod, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(HeaderDigest); +TPG_PARAM_ATTR(HeaderDigest, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DataDigest); +TPG_PARAM_ATTR(DataDigest, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(MaxConnections); +TPG_PARAM_ATTR(MaxConnections, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(TargetAlias); +TPG_PARAM_ATTR(TargetAlias, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(InitialR2T); +TPG_PARAM_ATTR(InitialR2T, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(ImmediateData); +TPG_PARAM_ATTR(ImmediateData, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(MaxRecvDataSegmentLength); +TPG_PARAM_ATTR(MaxRecvDataSegmentLength, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(MaxBurstLength); +TPG_PARAM_ATTR(MaxBurstLength, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(FirstBurstLength); +TPG_PARAM_ATTR(FirstBurstLength, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DefaultTime2Wait); +TPG_PARAM_ATTR(DefaultTime2Wait, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DefaultTime2Retain); +TPG_PARAM_ATTR(DefaultTime2Retain, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(MaxOutstandingR2T); +TPG_PARAM_ATTR(MaxOutstandingR2T, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DataPDUInOrder); +TPG_PARAM_ATTR(DataPDUInOrder, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DataSequenceInOrder); +TPG_PARAM_ATTR(DataSequenceInOrder, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(ErrorRecoveryLevel); +TPG_PARAM_ATTR(ErrorRecoveryLevel, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(IFMarker); +TPG_PARAM_ATTR(IFMarker, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(OFMarker); +TPG_PARAM_ATTR(OFMarker, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(IFMarkInt); +TPG_PARAM_ATTR(IFMarkInt, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(OFMarkInt); +TPG_PARAM_ATTR(OFMarkInt, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_tpg_param_attrs[] = { + &iscsi_tpg_param_AuthMethod.attr, + &iscsi_tpg_param_HeaderDigest.attr, + &iscsi_tpg_param_DataDigest.attr, + &iscsi_tpg_param_MaxConnections.attr, + &iscsi_tpg_param_TargetAlias.attr, + &iscsi_tpg_param_InitialR2T.attr, + &iscsi_tpg_param_ImmediateData.attr, + &iscsi_tpg_param_MaxRecvDataSegmentLength.attr, + &iscsi_tpg_param_MaxBurstLength.attr, + &iscsi_tpg_param_FirstBurstLength.attr, + &iscsi_tpg_param_DefaultTime2Wait.attr, + &iscsi_tpg_param_DefaultTime2Retain.attr, + &iscsi_tpg_param_MaxOutstandingR2T.attr, + &iscsi_tpg_param_DataPDUInOrder.attr, + &iscsi_tpg_param_DataSequenceInOrder.attr, + &iscsi_tpg_param_ErrorRecoveryLevel.attr, + &iscsi_tpg_param_IFMarker.attr, + &iscsi_tpg_param_OFMarker.attr, + &iscsi_tpg_param_IFMarkInt.attr, + &iscsi_tpg_param_OFMarkInt.attr, + NULL, +}; + +/* End items for lio_target_tpg_param_cit */ + +/* Start items for lio_target_tpg_cit */ + +static ssize_t lio_target_tpg_show_enable( + struct se_portal_group *se_tpg, + char *page) +{ + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); + ssize_t len; + + spin_lock(&tpg->tpg_state_lock); + len = sprintf(page, "%d\n", + (tpg->tpg_state == TPG_STATE_ACTIVE) ? 1 : 0); + spin_unlock(&tpg->tpg_state_lock); + + return len; +} + +static ssize_t lio_target_tpg_store_enable( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); + char *endptr; + u32 op; + int ret = 0; + + op = simple_strtoul(page, &endptr, 0); + if ((op != 1) && (op != 0)) { + pr_err("Illegal value for tpg_enable: %u\n", op); + return -EINVAL; + } + + ret = iscsit_get_tpg(tpg); + if (ret < 0) + return -EINVAL; + + if (op) { + ret = iscsit_tpg_enable_portal_group(tpg); + if (ret < 0) + goto out; + } else { + /* + * iscsit_tpg_disable_portal_group() assumes force=1 + */ + ret = iscsit_tpg_disable_portal_group(tpg, 1); + if (ret < 0) + goto out; + } + + iscsit_put_tpg(tpg); + return count; +out: + iscsit_put_tpg(tpg); + return -EINVAL; +} + +TF_TPG_BASE_ATTR(lio_target, enable, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_tpg_attrs[] = { + &lio_target_tpg_enable.attr, + NULL, +}; + +/* End items for lio_target_tpg_cit */ + +/* Start items for lio_target_tiqn_cit */ + +struct se_portal_group *lio_target_tiqn_addtpg( + struct se_wwn *wwn, + struct config_group *group, + const char *name) +{ + struct iscsi_portal_group *tpg; + struct iscsi_tiqn *tiqn; + char *tpgt_str, *end_ptr; + int ret = 0; + unsigned short int tpgt; + + tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn); + /* + * Only tpgt_# directory groups can be created below + * target/iscsi/iqn.superturodiskarry/ + */ + tpgt_str = strstr(name, "tpgt_"); + if (!tpgt_str) { + pr_err("Unable to locate \"tpgt_#\" directory" + " group\n"); + return NULL; + } + tpgt_str += 5; /* Skip ahead of "tpgt_" */ + tpgt = (unsigned short int) simple_strtoul(tpgt_str, &end_ptr, 0); + + tpg = iscsit_alloc_portal_group(tiqn, tpgt); + if (!tpg) + return NULL; + + ret = core_tpg_register( + &lio_target_fabric_configfs->tf_ops, + wwn, &tpg->tpg_se_tpg, (void *)tpg, + TRANSPORT_TPG_TYPE_NORMAL); + if (ret < 0) + return NULL; + + ret = iscsit_tpg_add_portal_group(tiqn, tpg); + if (ret != 0) + goto out; + + pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn); + pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated TPG: %s\n", + name); + return &tpg->tpg_se_tpg; +out: + core_tpg_deregister(&tpg->tpg_se_tpg); + kfree(tpg); + return NULL; +} + +void lio_target_tiqn_deltpg(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg; + struct iscsi_tiqn *tiqn; + + tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); + tiqn = tpg->tpg_tiqn; + /* + * iscsit_tpg_del_portal_group() assumes force=1 + */ + pr_debug("LIO_Target_ConfigFS: DEREGISTER -> Releasing TPG\n"); + iscsit_tpg_del_portal_group(tiqn, tpg, 1); +} + +/* End items for lio_target_tiqn_cit */ + +/* Start LIO-Target TIQN struct contig_item lio_target_cit */ + +static ssize_t lio_target_wwn_show_attr_lio_version( + struct target_fabric_configfs *tf, + char *page) +{ + return sprintf(page, "RisingTide Systems Linux-iSCSI Target "ISCSIT_VERSION"\n"); +} + +TF_WWN_ATTR_RO(lio_target, lio_version); + +static struct configfs_attribute *lio_target_wwn_attrs[] = { + &lio_target_wwn_lio_version.attr, + NULL, +}; + +struct se_wwn *lio_target_call_coreaddtiqn( + struct target_fabric_configfs *tf, + struct config_group *group, + const char *name) +{ + struct config_group *stats_cg; + struct iscsi_tiqn *tiqn; + + tiqn = iscsit_add_tiqn((unsigned char *)name); + if (IS_ERR(tiqn)) + return ERR_PTR(PTR_ERR(tiqn)); + /* + * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group. + */ + stats_cg = &tiqn->tiqn_wwn.fabric_stat_group; + + stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 6, + GFP_KERNEL); + if (!stats_cg->default_groups) { + pr_err("Unable to allocate memory for" + " stats_cg->default_groups\n"); + iscsit_del_tiqn(tiqn); + return ERR_PTR(-ENOMEM); + } + + stats_cg->default_groups[0] = &WWN_STAT_GRPS(tiqn)->iscsi_instance_group; + stats_cg->default_groups[1] = &WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group; + stats_cg->default_groups[2] = &WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group; + stats_cg->default_groups[3] = &WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group; + stats_cg->default_groups[4] = &WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group; + stats_cg->default_groups[5] = NULL; + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_instance_group, + "iscsi_instance", &iscsi_stat_instance_cit); + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group, + "iscsi_sess_err", &iscsi_stat_sess_err_cit); + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group, + "iscsi_tgt_attr", &iscsi_stat_tgt_attr_cit); + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group, + "iscsi_login_stats", &iscsi_stat_login_cit); + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group, + "iscsi_logout_stats", &iscsi_stat_logout_cit); + + pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn); + pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated Node:" + " %s\n", name); + return &tiqn->tiqn_wwn; +} + +void lio_target_call_coredeltiqn( + struct se_wwn *wwn) +{ + struct iscsi_tiqn *tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn); + struct config_item *df_item; + struct config_group *stats_cg; + int i; + + stats_cg = &tiqn->tiqn_wwn.fabric_stat_group; + for (i = 0; stats_cg->default_groups[i]; i++) { + df_item = &stats_cg->default_groups[i]->cg_item; + stats_cg->default_groups[i] = NULL; + config_item_put(df_item); + } + kfree(stats_cg->default_groups); + + pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s\n", + tiqn->tiqn); + iscsit_del_tiqn(tiqn); +} + +/* End LIO-Target TIQN struct contig_lio_target_cit */ + +/* Start lio_target_discovery_auth_cit */ + +#define DEF_DISC_AUTH_STR(name, flags) \ + __DEF_NACL_AUTH_STR(disc, name, flags) \ +static ssize_t iscsi_disc_show_##name( \ + struct target_fabric_configfs *tf, \ + char *page) \ +{ \ + return __iscsi_disc_show_##name(&iscsit_global->discovery_acl, \ + page); \ +} \ +static ssize_t iscsi_disc_store_##name( \ + struct target_fabric_configfs *tf, \ + const char *page, \ + size_t count) \ +{ \ + return __iscsi_disc_store_##name(&iscsit_global->discovery_acl, \ + page, count); \ +} + +#define DEF_DISC_AUTH_INT(name) \ + __DEF_NACL_AUTH_INT(disc, name) \ +static ssize_t iscsi_disc_show_##name( \ + struct target_fabric_configfs *tf, \ + char *page) \ +{ \ + return __iscsi_disc_show_##name(&iscsit_global->discovery_acl, \ + page); \ +} + +#define DISC_AUTH_ATTR(_name, _mode) TF_DISC_ATTR(iscsi, _name, _mode) +#define DISC_AUTH_ATTR_RO(_name) TF_DISC_ATTR_RO(iscsi, _name) + +/* + * One-way authentication userid + */ +DEF_DISC_AUTH_STR(userid, NAF_USERID_SET); +DISC_AUTH_ATTR(userid, S_IRUGO | S_IWUSR); +/* + * One-way authentication password + */ +DEF_DISC_AUTH_STR(password, NAF_PASSWORD_SET); +DISC_AUTH_ATTR(password, S_IRUGO | S_IWUSR); +/* + * Enforce mutual authentication + */ +DEF_DISC_AUTH_INT(authenticate_target); +DISC_AUTH_ATTR_RO(authenticate_target); +/* + * Mutual authentication userid + */ +DEF_DISC_AUTH_STR(userid_mutual, NAF_USERID_IN_SET); +DISC_AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR); +/* + * Mutual authentication password + */ +DEF_DISC_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET); +DISC_AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR); + +/* + * enforce_discovery_auth + */ +static ssize_t iscsi_disc_show_enforce_discovery_auth( + struct target_fabric_configfs *tf, + char *page) +{ + struct iscsi_node_auth *discovery_auth = &iscsit_global->discovery_acl.node_auth; + + return sprintf(page, "%d\n", discovery_auth->enforce_discovery_auth); +} + +static ssize_t iscsi_disc_store_enforce_discovery_auth( + struct target_fabric_configfs *tf, + const char *page, + size_t count) +{ + struct iscsi_param *param; + struct iscsi_portal_group *discovery_tpg = iscsit_global->discovery_tpg; + char *endptr; + u32 op; + + op = simple_strtoul(page, &endptr, 0); + if ((op != 1) && (op != 0)) { + pr_err("Illegal value for enforce_discovery_auth:" + " %u\n", op); + return -EINVAL; + } + + if (!discovery_tpg) { + pr_err("iscsit_global->discovery_tpg is NULL\n"); + return -EINVAL; + } + + param = iscsi_find_param_from_key(AUTHMETHOD, + discovery_tpg->param_list); + if (!param) + return -EINVAL; + + if (op) { + /* + * Reset the AuthMethod key to CHAP. + */ + if (iscsi_update_param_value(param, CHAP) < 0) + return -EINVAL; + + discovery_tpg->tpg_attrib.authentication = 1; + iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 1; + pr_debug("LIO-CORE[0] Successfully enabled" + " authentication enforcement for iSCSI" + " Discovery TPG\n"); + } else { + /* + * Reset the AuthMethod key to CHAP,None + */ + if (iscsi_update_param_value(param, "CHAP,None") < 0) + return -EINVAL; + + discovery_tpg->tpg_attrib.authentication = 0; + iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 0; + pr_debug("LIO-CORE[0] Successfully disabled" + " authentication enforcement for iSCSI" + " Discovery TPG\n"); + } + + return count; +} + +DISC_AUTH_ATTR(enforce_discovery_auth, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_discovery_auth_attrs[] = { + &iscsi_disc_userid.attr, + &iscsi_disc_password.attr, + &iscsi_disc_authenticate_target.attr, + &iscsi_disc_userid_mutual.attr, + &iscsi_disc_password_mutual.attr, + &iscsi_disc_enforce_discovery_auth.attr, + NULL, +}; + +/* End lio_target_discovery_auth_cit */ + +/* Start functions for target_core_fabric_ops */ + +static char *iscsi_get_fabric_name(void) +{ + return "iSCSI"; +} + +static u32 iscsi_get_task_tag(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + return cmd->init_task_tag; +} + +static int iscsi_get_cmd_state(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + return cmd->i_state; +} + +static int iscsi_is_state_remove(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + return (cmd->i_state == ISTATE_REMOVE); +} + +static int lio_sess_logged_in(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + int ret; + /* + * Called with spin_lock_bh(&tpg_lock); and + * spin_lock(&se_tpg->session_lock); held. + */ + spin_lock(&sess->conn_lock); + ret = (sess->session_state != TARG_SESS_STATE_LOGGED_IN); + spin_unlock(&sess->conn_lock); + + return ret; +} + +static u32 lio_sess_get_index(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + + return sess->session_index; +} + +static u32 lio_sess_get_initiator_sid( + struct se_session *se_sess, + unsigned char *buf, + u32 size) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + /* + * iSCSI Initiator Session Identifier from RFC-3720. + */ + return snprintf(buf, size, "%02x%02x%02x%02x%02x%02x", + sess->isid[0], sess->isid[1], sess->isid[2], + sess->isid[3], sess->isid[4], sess->isid[5]); +} + +static int lio_queue_data_in(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + cmd->i_state = ISTATE_SEND_DATAIN; + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + return 0; +} + +static int lio_write_pending(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + if (!cmd->immediate_data && !cmd->unsolicited_data) + return iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 1); + + return 0; +} + +static int lio_write_pending_status(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + int ret; + + spin_lock_bh(&cmd->istate_lock); + ret = !(cmd->cmd_flags & ICF_GOT_LAST_DATAOUT); + spin_unlock_bh(&cmd->istate_lock); + + return ret; +} + +static int lio_queue_status(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + cmd->i_state = ISTATE_SEND_STATUS; + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + return 0; +} + +static u16 lio_set_fabric_sense_len(struct se_cmd *se_cmd, u32 sense_length) +{ + unsigned char *buffer = se_cmd->sense_buffer; + /* + * From RFC-3720 10.4.7. Data Segment - Sense and Response Data Segment + * 16-bit SenseLength. + */ + buffer[0] = ((sense_length >> 8) & 0xff); + buffer[1] = (sense_length & 0xff); + /* + * Return two byte offset into allocated sense_buffer. + */ + return 2; +} + +static u16 lio_get_fabric_sense_len(void) +{ + /* + * Return two byte offset into allocated sense_buffer. + */ + return 2; +} + +static int lio_queue_tm_rsp(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + cmd->i_state = ISTATE_SEND_TASKMGTRSP; + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + return 0; +} + +static char *lio_tpg_get_endpoint_wwn(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return &tpg->tpg_tiqn->tiqn[0]; +} + +static u16 lio_tpg_get_tag(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return tpg->tpgt; +} + +static u32 lio_tpg_get_default_depth(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth; +} + +static int lio_tpg_check_demo_mode(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->generate_node_acls; +} + +static int lio_tpg_check_demo_mode_cache(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->cache_dynamic_acls; +} + +static int lio_tpg_check_demo_mode_write_protect( + struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->demo_mode_write_protect; +} + +static int lio_tpg_check_prod_mode_write_protect( + struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->prod_mode_write_protect; +} + +static void lio_tpg_release_fabric_acl( + struct se_portal_group *se_tpg, + struct se_node_acl *se_acl) +{ + struct iscsi_node_acl *acl = container_of(se_acl, + struct iscsi_node_acl, se_node_acl); + kfree(acl); +} + +/* + * Called with spin_lock_bh(struct se_portal_group->session_lock) held.. + * + * Also, this function calls iscsit_inc_session_usage_count() on the + * struct iscsi_session in question. + */ +static int lio_tpg_shutdown_session(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + + spin_lock(&sess->conn_lock); + if (atomic_read(&sess->session_fall_back_to_erl0) || + atomic_read(&sess->session_logout) || + (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { + spin_unlock(&sess->conn_lock); + return 0; + } + atomic_set(&sess->session_reinstatement, 1); + spin_unlock(&sess->conn_lock); + + iscsit_inc_session_usage_count(sess); + iscsit_stop_time2retain_timer(sess); + + return 1; +} + +/* + * Calls iscsit_dec_session_usage_count() as inverse of + * lio_tpg_shutdown_session() + */ +static void lio_tpg_close_session(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + /* + * If the iSCSI Session for the iSCSI Initiator Node exists, + * forcefully shutdown the iSCSI NEXUS. + */ + iscsit_stop_session(sess, 1, 1); + iscsit_dec_session_usage_count(sess); + iscsit_close_session(sess); +} + +static void lio_tpg_stop_session( + struct se_session *se_sess, + int sess_sleep, + int conn_sleep) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + + iscsit_stop_session(sess, sess_sleep, conn_sleep); +} + +static void lio_tpg_fall_back_to_erl0(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + + iscsit_fall_back_to_erl0(sess); +} + +static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return tpg->tpg_tiqn->tiqn_index; +} + +static void lio_set_default_node_attributes(struct se_node_acl *se_acl) +{ + struct iscsi_node_acl *acl = container_of(se_acl, struct iscsi_node_acl, + se_node_acl); + + ISCSI_NODE_ATTRIB(acl)->nacl = acl; + iscsit_set_default_node_attribues(acl); +} + +static void lio_release_cmd(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + iscsit_release_cmd(cmd); +} + +/* End functions for target_core_fabric_ops */ + +int iscsi_target_register_configfs(void) +{ + struct target_fabric_configfs *fabric; + int ret; + + lio_target_fabric_configfs = NULL; + fabric = target_fabric_configfs_init(THIS_MODULE, "iscsi"); + if (IS_ERR(fabric)) { + pr_err("target_fabric_configfs_init() for" + " LIO-Target failed!\n"); + return PTR_ERR(fabric); + } + /* + * Setup the fabric API of function pointers used by target_core_mod.. + */ + fabric->tf_ops.get_fabric_name = &iscsi_get_fabric_name; + fabric->tf_ops.get_fabric_proto_ident = &iscsi_get_fabric_proto_ident; + fabric->tf_ops.tpg_get_wwn = &lio_tpg_get_endpoint_wwn; + fabric->tf_ops.tpg_get_tag = &lio_tpg_get_tag; + fabric->tf_ops.tpg_get_default_depth = &lio_tpg_get_default_depth; + fabric->tf_ops.tpg_get_pr_transport_id = &iscsi_get_pr_transport_id; + fabric->tf_ops.tpg_get_pr_transport_id_len = + &iscsi_get_pr_transport_id_len; + fabric->tf_ops.tpg_parse_pr_out_transport_id = + &iscsi_parse_pr_out_transport_id; + fabric->tf_ops.tpg_check_demo_mode = &lio_tpg_check_demo_mode; + fabric->tf_ops.tpg_check_demo_mode_cache = + &lio_tpg_check_demo_mode_cache; + fabric->tf_ops.tpg_check_demo_mode_write_protect = + &lio_tpg_check_demo_mode_write_protect; + fabric->tf_ops.tpg_check_prod_mode_write_protect = + &lio_tpg_check_prod_mode_write_protect; + fabric->tf_ops.tpg_alloc_fabric_acl = &lio_tpg_alloc_fabric_acl; + fabric->tf_ops.tpg_release_fabric_acl = &lio_tpg_release_fabric_acl; + fabric->tf_ops.tpg_get_inst_index = &lio_tpg_get_inst_index; + fabric->tf_ops.release_cmd = &lio_release_cmd; + fabric->tf_ops.shutdown_session = &lio_tpg_shutdown_session; + fabric->tf_ops.close_session = &lio_tpg_close_session; + fabric->tf_ops.stop_session = &lio_tpg_stop_session; + fabric->tf_ops.fall_back_to_erl0 = &lio_tpg_fall_back_to_erl0; + fabric->tf_ops.sess_logged_in = &lio_sess_logged_in; + fabric->tf_ops.sess_get_index = &lio_sess_get_index; + fabric->tf_ops.sess_get_initiator_sid = &lio_sess_get_initiator_sid; + fabric->tf_ops.write_pending = &lio_write_pending; + fabric->tf_ops.write_pending_status = &lio_write_pending_status; + fabric->tf_ops.set_default_node_attributes = + &lio_set_default_node_attributes; + fabric->tf_ops.get_task_tag = &iscsi_get_task_tag; + fabric->tf_ops.get_cmd_state = &iscsi_get_cmd_state; + fabric->tf_ops.queue_data_in = &lio_queue_data_in; + fabric->tf_ops.queue_status = &lio_queue_status; + fabric->tf_ops.queue_tm_rsp = &lio_queue_tm_rsp; + fabric->tf_ops.set_fabric_sense_len = &lio_set_fabric_sense_len; + fabric->tf_ops.get_fabric_sense_len = &lio_get_fabric_sense_len; + fabric->tf_ops.is_state_remove = &iscsi_is_state_remove; + /* + * Setup function pointers for generic logic in target_core_fabric_configfs.c + */ + fabric->tf_ops.fabric_make_wwn = &lio_target_call_coreaddtiqn; + fabric->tf_ops.fabric_drop_wwn = &lio_target_call_coredeltiqn; + fabric->tf_ops.fabric_make_tpg = &lio_target_tiqn_addtpg; + fabric->tf_ops.fabric_drop_tpg = &lio_target_tiqn_deltpg; + fabric->tf_ops.fabric_post_link = NULL; + fabric->tf_ops.fabric_pre_unlink = NULL; + fabric->tf_ops.fabric_make_np = &lio_target_call_addnptotpg; + fabric->tf_ops.fabric_drop_np = &lio_target_call_delnpfromtpg; + fabric->tf_ops.fabric_make_nodeacl = &lio_target_make_nodeacl; + fabric->tf_ops.fabric_drop_nodeacl = &lio_target_drop_nodeacl; + /* + * Setup default attribute lists for various fabric->tf_cit_tmpl + * sturct config_item_type's + */ + TF_CIT_TMPL(fabric)->tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs; + TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs; + + ret = target_fabric_configfs_register(fabric); + if (ret < 0) { + pr_err("target_fabric_configfs_register() for" + " LIO-Target failed!\n"); + target_fabric_configfs_free(fabric); + return ret; + } + + lio_target_fabric_configfs = fabric; + pr_debug("LIO_TARGET[0] - Set fabric ->" + " lio_target_fabric_configfs\n"); + return 0; +} + + +void iscsi_target_deregister_configfs(void) +{ + if (!lio_target_fabric_configfs) + return; + /* + * Shutdown discovery sessions and disable discovery TPG + */ + if (iscsit_global->discovery_tpg) + iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1); + + target_fabric_configfs_deregister(lio_target_fabric_configfs); + lio_target_fabric_configfs = NULL; + pr_debug("LIO_TARGET[0] - Cleared" + " lio_target_fabric_configfs\n"); +} diff --git a/drivers/target/iscsi/iscsi_target_configfs.h b/drivers/target/iscsi/iscsi_target_configfs.h new file mode 100644 index 000000000000..8cd5a63c4edc --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_configfs.h @@ -0,0 +1,7 @@ +#ifndef ISCSI_TARGET_CONFIGFS_H +#define ISCSI_TARGET_CONFIGFS_H + +extern int iscsi_target_register_configfs(void); +extern void iscsi_target_deregister_configfs(void); + +#endif /* ISCSI_TARGET_CONFIGFS_H */ diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h new file mode 100644 index 000000000000..470ed551eeb5 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -0,0 +1,859 @@ +#ifndef ISCSI_TARGET_CORE_H +#define ISCSI_TARGET_CORE_H + +#include <linux/in.h> +#include <linux/configfs.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> + +#define ISCSIT_VERSION "v4.1.0-rc1" +#define ISCSI_MAX_DATASN_MISSING_COUNT 16 +#define ISCSI_TX_THREAD_TCP_TIMEOUT 2 +#define ISCSI_RX_THREAD_TCP_TIMEOUT 2 +#define SECONDS_FOR_ASYNC_LOGOUT 10 +#define SECONDS_FOR_ASYNC_TEXT 10 +#define SECONDS_FOR_LOGOUT_COMP 15 +#define WHITE_SPACE " \t\v\f\n\r" + +/* struct iscsi_node_attrib sanity values */ +#define NA_DATAOUT_TIMEOUT 3 +#define NA_DATAOUT_TIMEOUT_MAX 60 +#define NA_DATAOUT_TIMEOUT_MIX 2 +#define NA_DATAOUT_TIMEOUT_RETRIES 5 +#define NA_DATAOUT_TIMEOUT_RETRIES_MAX 15 +#define NA_DATAOUT_TIMEOUT_RETRIES_MIN 1 +#define NA_NOPIN_TIMEOUT 5 +#define NA_NOPIN_TIMEOUT_MAX 60 +#define NA_NOPIN_TIMEOUT_MIN 3 +#define NA_NOPIN_RESPONSE_TIMEOUT 5 +#define NA_NOPIN_RESPONSE_TIMEOUT_MAX 60 +#define NA_NOPIN_RESPONSE_TIMEOUT_MIN 3 +#define NA_RANDOM_DATAIN_PDU_OFFSETS 0 +#define NA_RANDOM_DATAIN_SEQ_OFFSETS 0 +#define NA_RANDOM_R2T_OFFSETS 0 +#define NA_DEFAULT_ERL 0 +#define NA_DEFAULT_ERL_MAX 2 +#define NA_DEFAULT_ERL_MIN 0 + +/* struct iscsi_tpg_attrib sanity values */ +#define TA_AUTHENTICATION 1 +#define TA_LOGIN_TIMEOUT 15 +#define TA_LOGIN_TIMEOUT_MAX 30 +#define TA_LOGIN_TIMEOUT_MIN 5 +#define TA_NETIF_TIMEOUT 2 +#define TA_NETIF_TIMEOUT_MAX 15 +#define TA_NETIF_TIMEOUT_MIN 2 +#define TA_GENERATE_NODE_ACLS 0 +#define TA_DEFAULT_CMDSN_DEPTH 16 +#define TA_DEFAULT_CMDSN_DEPTH_MAX 512 +#define TA_DEFAULT_CMDSN_DEPTH_MIN 1 +#define TA_CACHE_DYNAMIC_ACLS 0 +/* Enabled by default in demo mode (generic_node_acls=1) */ +#define TA_DEMO_MODE_WRITE_PROTECT 1 +/* Disabled by default in production mode w/ explict ACLs */ +#define TA_PROD_MODE_WRITE_PROTECT 0 +#define TA_CACHE_CORE_NPS 0 + +enum tpg_np_network_transport_table { + ISCSI_TCP = 0, + ISCSI_SCTP_TCP = 1, + ISCSI_SCTP_UDP = 2, + ISCSI_IWARP_TCP = 3, + ISCSI_IWARP_SCTP = 4, + ISCSI_INFINIBAND = 5, +}; + +/* RFC-3720 7.1.4 Standard Connection State Diagram for a Target */ +enum target_conn_state_table { + TARG_CONN_STATE_FREE = 0x1, + TARG_CONN_STATE_XPT_UP = 0x3, + TARG_CONN_STATE_IN_LOGIN = 0x4, + TARG_CONN_STATE_LOGGED_IN = 0x5, + TARG_CONN_STATE_IN_LOGOUT = 0x6, + TARG_CONN_STATE_LOGOUT_REQUESTED = 0x7, + TARG_CONN_STATE_CLEANUP_WAIT = 0x8, +}; + +/* RFC-3720 7.3.2 Session State Diagram for a Target */ +enum target_sess_state_table { + TARG_SESS_STATE_FREE = 0x1, + TARG_SESS_STATE_ACTIVE = 0x2, + TARG_SESS_STATE_LOGGED_IN = 0x3, + TARG_SESS_STATE_FAILED = 0x4, + TARG_SESS_STATE_IN_CONTINUE = 0x5, +}; + +/* struct iscsi_data_count->type */ +enum data_count_type { + ISCSI_RX_DATA = 1, + ISCSI_TX_DATA = 2, +}; + +/* struct iscsi_datain_req->dr_complete */ +enum datain_req_comp_table { + DATAIN_COMPLETE_NORMAL = 1, + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY = 2, + DATAIN_COMPLETE_CONNECTION_RECOVERY = 3, +}; + +/* struct iscsi_datain_req->recovery */ +enum datain_req_rec_table { + DATAIN_WITHIN_COMMAND_RECOVERY = 1, + DATAIN_CONNECTION_RECOVERY = 2, +}; + +/* struct iscsi_portal_group->state */ +enum tpg_state_table { + TPG_STATE_FREE = 0, + TPG_STATE_ACTIVE = 1, + TPG_STATE_INACTIVE = 2, + TPG_STATE_COLD_RESET = 3, +}; + +/* struct iscsi_tiqn->tiqn_state */ +enum tiqn_state_table { + TIQN_STATE_ACTIVE = 1, + TIQN_STATE_SHUTDOWN = 2, +}; + +/* struct iscsi_cmd->cmd_flags */ +enum cmd_flags_table { + ICF_GOT_LAST_DATAOUT = 0x00000001, + ICF_GOT_DATACK_SNACK = 0x00000002, + ICF_NON_IMMEDIATE_UNSOLICITED_DATA = 0x00000004, + ICF_SENT_LAST_R2T = 0x00000008, + ICF_WITHIN_COMMAND_RECOVERY = 0x00000010, + ICF_CONTIG_MEMORY = 0x00000020, + ICF_ATTACHED_TO_RQUEUE = 0x00000040, + ICF_OOO_CMDSN = 0x00000080, + ICF_REJECT_FAIL_CONN = 0x00000100, +}; + +/* struct iscsi_cmd->i_state */ +enum cmd_i_state_table { + ISTATE_NO_STATE = 0, + ISTATE_NEW_CMD = 1, + ISTATE_DEFERRED_CMD = 2, + ISTATE_UNSOLICITED_DATA = 3, + ISTATE_RECEIVE_DATAOUT = 4, + ISTATE_RECEIVE_DATAOUT_RECOVERY = 5, + ISTATE_RECEIVED_LAST_DATAOUT = 6, + ISTATE_WITHIN_DATAOUT_RECOVERY = 7, + ISTATE_IN_CONNECTION_RECOVERY = 8, + ISTATE_RECEIVED_TASKMGT = 9, + ISTATE_SEND_ASYNCMSG = 10, + ISTATE_SENT_ASYNCMSG = 11, + ISTATE_SEND_DATAIN = 12, + ISTATE_SEND_LAST_DATAIN = 13, + ISTATE_SENT_LAST_DATAIN = 14, + ISTATE_SEND_LOGOUTRSP = 15, + ISTATE_SENT_LOGOUTRSP = 16, + ISTATE_SEND_NOPIN = 17, + ISTATE_SENT_NOPIN = 18, + ISTATE_SEND_REJECT = 19, + ISTATE_SENT_REJECT = 20, + ISTATE_SEND_R2T = 21, + ISTATE_SENT_R2T = 22, + ISTATE_SEND_R2T_RECOVERY = 23, + ISTATE_SENT_R2T_RECOVERY = 24, + ISTATE_SEND_LAST_R2T = 25, + ISTATE_SENT_LAST_R2T = 26, + ISTATE_SEND_LAST_R2T_RECOVERY = 27, + ISTATE_SENT_LAST_R2T_RECOVERY = 28, + ISTATE_SEND_STATUS = 29, + ISTATE_SEND_STATUS_BROKEN_PC = 30, + ISTATE_SENT_STATUS = 31, + ISTATE_SEND_STATUS_RECOVERY = 32, + ISTATE_SENT_STATUS_RECOVERY = 33, + ISTATE_SEND_TASKMGTRSP = 34, + ISTATE_SENT_TASKMGTRSP = 35, + ISTATE_SEND_TEXTRSP = 36, + ISTATE_SENT_TEXTRSP = 37, + ISTATE_SEND_NOPIN_WANT_RESPONSE = 38, + ISTATE_SENT_NOPIN_WANT_RESPONSE = 39, + ISTATE_SEND_NOPIN_NO_RESPONSE = 40, + ISTATE_REMOVE = 41, + ISTATE_FREE = 42, +}; + +/* Used for iscsi_recover_cmdsn() return values */ +enum recover_cmdsn_ret_table { + CMDSN_ERROR_CANNOT_RECOVER = -1, + CMDSN_NORMAL_OPERATION = 0, + CMDSN_LOWER_THAN_EXP = 1, + CMDSN_HIGHER_THAN_EXP = 2, +}; + +/* Used for iscsi_handle_immediate_data() return values */ +enum immedate_data_ret_table { + IMMEDIATE_DATA_CANNOT_RECOVER = -1, + IMMEDIATE_DATA_NORMAL_OPERATION = 0, + IMMEDIATE_DATA_ERL1_CRC_FAILURE = 1, +}; + +/* Used for iscsi_decide_dataout_action() return values */ +enum dataout_action_ret_table { + DATAOUT_CANNOT_RECOVER = -1, + DATAOUT_NORMAL = 0, + DATAOUT_SEND_R2T = 1, + DATAOUT_SEND_TO_TRANSPORT = 2, + DATAOUT_WITHIN_COMMAND_RECOVERY = 3, +}; + +/* Used for struct iscsi_node_auth->naf_flags */ +enum naf_flags_table { + NAF_USERID_SET = 0x01, + NAF_PASSWORD_SET = 0x02, + NAF_USERID_IN_SET = 0x04, + NAF_PASSWORD_IN_SET = 0x08, +}; + +/* Used by various struct timer_list to manage iSCSI specific state */ +enum iscsi_timer_flags_table { + ISCSI_TF_RUNNING = 0x01, + ISCSI_TF_STOP = 0x02, + ISCSI_TF_EXPIRED = 0x04, +}; + +/* Used for struct iscsi_np->np_flags */ +enum np_flags_table { + NPF_IP_NETWORK = 0x00, + NPF_SCTP_STRUCT_FILE = 0x01 /* Bugfix */ +}; + +/* Used for struct iscsi_np->np_thread_state */ +enum np_thread_state_table { + ISCSI_NP_THREAD_ACTIVE = 1, + ISCSI_NP_THREAD_INACTIVE = 2, + ISCSI_NP_THREAD_RESET = 3, + ISCSI_NP_THREAD_SHUTDOWN = 4, + ISCSI_NP_THREAD_EXIT = 5, +}; + +struct iscsi_conn_ops { + u8 HeaderDigest; /* [0,1] == [None,CRC32C] */ + u8 DataDigest; /* [0,1] == [None,CRC32C] */ + u32 MaxRecvDataSegmentLength; /* [512..2**24-1] */ + u8 OFMarker; /* [0,1] == [No,Yes] */ + u8 IFMarker; /* [0,1] == [No,Yes] */ + u32 OFMarkInt; /* [1..65535] */ + u32 IFMarkInt; /* [1..65535] */ +}; + +struct iscsi_sess_ops { + char InitiatorName[224]; + char InitiatorAlias[256]; + char TargetName[224]; + char TargetAlias[256]; + char TargetAddress[256]; + u16 TargetPortalGroupTag; /* [0..65535] */ + u16 MaxConnections; /* [1..65535] */ + u8 InitialR2T; /* [0,1] == [No,Yes] */ + u8 ImmediateData; /* [0,1] == [No,Yes] */ + u32 MaxBurstLength; /* [512..2**24-1] */ + u32 FirstBurstLength; /* [512..2**24-1] */ + u16 DefaultTime2Wait; /* [0..3600] */ + u16 DefaultTime2Retain; /* [0..3600] */ + u16 MaxOutstandingR2T; /* [1..65535] */ + u8 DataPDUInOrder; /* [0,1] == [No,Yes] */ + u8 DataSequenceInOrder; /* [0,1] == [No,Yes] */ + u8 ErrorRecoveryLevel; /* [0..2] */ + u8 SessionType; /* [0,1] == [Normal,Discovery]*/ +}; + +struct iscsi_queue_req { + int state; + struct iscsi_cmd *cmd; + struct list_head qr_list; +}; + +struct iscsi_data_count { + int data_length; + int sync_and_steering; + enum data_count_type type; + u32 iov_count; + u32 ss_iov_count; + u32 ss_marker_count; + struct kvec *iov; +}; + +struct iscsi_param_list { + struct list_head param_list; + struct list_head extra_response_list; +}; + +struct iscsi_datain_req { + enum datain_req_comp_table dr_complete; + int generate_recovery_values; + enum datain_req_rec_table recovery; + u32 begrun; + u32 runlength; + u32 data_length; + u32 data_offset; + u32 data_offset_end; + u32 data_sn; + u32 next_burst_len; + u32 read_data_done; + u32 seq_send_order; + struct list_head dr_list; +} ____cacheline_aligned; + +struct iscsi_ooo_cmdsn { + u16 cid; + u32 batch_count; + u32 cmdsn; + u32 exp_cmdsn; + struct iscsi_cmd *cmd; + struct list_head ooo_list; +} ____cacheline_aligned; + +struct iscsi_datain { + u8 flags; + u32 data_sn; + u32 length; + u32 offset; +} ____cacheline_aligned; + +struct iscsi_r2t { + int seq_complete; + int recovery_r2t; + int sent_r2t; + u32 r2t_sn; + u32 offset; + u32 targ_xfer_tag; + u32 xfer_len; + struct list_head r2t_list; +} ____cacheline_aligned; + +struct iscsi_cmd { + enum iscsi_timer_flags_table dataout_timer_flags; + /* DataOUT timeout retries */ + u8 dataout_timeout_retries; + /* Within command recovery count */ + u8 error_recovery_count; + /* iSCSI dependent state for out or order CmdSNs */ + enum cmd_i_state_table deferred_i_state; + /* iSCSI dependent state */ + enum cmd_i_state_table i_state; + /* Command is an immediate command (ISCSI_OP_IMMEDIATE set) */ + u8 immediate_cmd; + /* Immediate data present */ + u8 immediate_data; + /* iSCSI Opcode */ + u8 iscsi_opcode; + /* iSCSI Response Code */ + u8 iscsi_response; + /* Logout reason when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */ + u8 logout_reason; + /* Logout response code when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */ + u8 logout_response; + /* MaxCmdSN has been incremented */ + u8 maxcmdsn_inc; + /* Immediate Unsolicited Dataout */ + u8 unsolicited_data; + /* CID contained in logout PDU when opcode == ISCSI_INIT_LOGOUT_CMND */ + u16 logout_cid; + /* Command flags */ + enum cmd_flags_table cmd_flags; + /* Initiator Task Tag assigned from Initiator */ + u32 init_task_tag; + /* Target Transfer Tag assigned from Target */ + u32 targ_xfer_tag; + /* CmdSN assigned from Initiator */ + u32 cmd_sn; + /* ExpStatSN assigned from Initiator */ + u32 exp_stat_sn; + /* StatSN assigned to this ITT */ + u32 stat_sn; + /* DataSN Counter */ + u32 data_sn; + /* R2TSN Counter */ + u32 r2t_sn; + /* Last DataSN acknowledged via DataAck SNACK */ + u32 acked_data_sn; + /* Used for echoing NOPOUT ping data */ + u32 buf_ptr_size; + /* Used to store DataDigest */ + u32 data_crc; + /* Total size in bytes associated with command */ + u32 data_length; + /* Counter for MaxOutstandingR2T */ + u32 outstanding_r2ts; + /* Next R2T Offset when DataSequenceInOrder=Yes */ + u32 r2t_offset; + /* Iovec current and orig count for iscsi_cmd->iov_data */ + u32 iov_data_count; + u32 orig_iov_data_count; + /* Number of miscellaneous iovecs used for IP stack calls */ + u32 iov_misc_count; + /* Number of struct iscsi_pdu in struct iscsi_cmd->pdu_list */ + u32 pdu_count; + /* Next struct iscsi_pdu to send in struct iscsi_cmd->pdu_list */ + u32 pdu_send_order; + /* Current struct iscsi_pdu in struct iscsi_cmd->pdu_list */ + u32 pdu_start; + u32 residual_count; + /* Next struct iscsi_seq to send in struct iscsi_cmd->seq_list */ + u32 seq_send_order; + /* Number of struct iscsi_seq in struct iscsi_cmd->seq_list */ + u32 seq_count; + /* Current struct iscsi_seq in struct iscsi_cmd->seq_list */ + u32 seq_no; + /* Lowest offset in current DataOUT sequence */ + u32 seq_start_offset; + /* Highest offset in current DataOUT sequence */ + u32 seq_end_offset; + /* Total size in bytes received so far of READ data */ + u32 read_data_done; + /* Total size in bytes received so far of WRITE data */ + u32 write_data_done; + /* Counter for FirstBurstLength key */ + u32 first_burst_len; + /* Counter for MaxBurstLength key */ + u32 next_burst_len; + /* Transfer size used for IP stack calls */ + u32 tx_size; + /* Buffer used for various purposes */ + void *buf_ptr; + /* See include/linux/dma-mapping.h */ + enum dma_data_direction data_direction; + /* iSCSI PDU Header + CRC */ + unsigned char pdu[ISCSI_HDR_LEN + ISCSI_CRC_LEN]; + /* Number of times struct iscsi_cmd is present in immediate queue */ + atomic_t immed_queue_count; + atomic_t response_queue_count; + atomic_t transport_sent; + spinlock_t datain_lock; + spinlock_t dataout_timeout_lock; + /* spinlock for protecting struct iscsi_cmd->i_state */ + spinlock_t istate_lock; + /* spinlock for adding within command recovery entries */ + spinlock_t error_lock; + /* spinlock for adding R2Ts */ + spinlock_t r2t_lock; + /* DataIN List */ + struct list_head datain_list; + /* R2T List */ + struct list_head cmd_r2t_list; + struct completion reject_comp; + /* Timer for DataOUT */ + struct timer_list dataout_timer; + /* Iovecs for SCSI data payload RX/TX w/ kernel level sockets */ + struct kvec *iov_data; + /* Iovecs for miscellaneous purposes */ +#define ISCSI_MISC_IOVECS 5 + struct kvec iov_misc[ISCSI_MISC_IOVECS]; + /* Array of struct iscsi_pdu used for DataPDUInOrder=No */ + struct iscsi_pdu *pdu_list; + /* Current struct iscsi_pdu used for DataPDUInOrder=No */ + struct iscsi_pdu *pdu_ptr; + /* Array of struct iscsi_seq used for DataSequenceInOrder=No */ + struct iscsi_seq *seq_list; + /* Current struct iscsi_seq used for DataSequenceInOrder=No */ + struct iscsi_seq *seq_ptr; + /* TMR Request when iscsi_opcode == ISCSI_OP_SCSI_TMFUNC */ + struct iscsi_tmr_req *tmr_req; + /* Connection this command is alligient to */ + struct iscsi_conn *conn; + /* Pointer to connection recovery entry */ + struct iscsi_conn_recovery *cr; + /* Session the command is part of, used for connection recovery */ + struct iscsi_session *sess; + /* list_head for connection list */ + struct list_head i_list; + /* The TCM I/O descriptor that is accessed via container_of() */ + struct se_cmd se_cmd; + /* Sense buffer that will be mapped into outgoing status */ +#define ISCSI_SENSE_BUFFER_LEN (TRANSPORT_SENSE_BUFFER + 2) + unsigned char sense_buffer[ISCSI_SENSE_BUFFER_LEN]; + + struct scatterlist *t_mem_sg; + u32 t_mem_sg_nents; + + u32 padding; + u8 pad_bytes[4]; + + struct scatterlist *first_data_sg; + u32 first_data_sg_off; + u32 kmapped_nents; + +} ____cacheline_aligned; + +struct iscsi_tmr_req { + bool task_reassign:1; + u32 ref_cmd_sn; + u32 exp_data_sn; + struct iscsi_conn_recovery *conn_recovery; + struct se_tmr_req *se_tmr_req; +}; + +struct iscsi_conn { + /* Authentication Successful for this connection */ + u8 auth_complete; + /* State connection is currently in */ + u8 conn_state; + u8 conn_logout_reason; + u8 network_transport; + enum iscsi_timer_flags_table nopin_timer_flags; + enum iscsi_timer_flags_table nopin_response_timer_flags; + u8 tx_immediate_queue; + u8 tx_response_queue; + /* Used to know what thread encountered a transport failure */ + u8 which_thread; + /* connection id assigned by the Initiator */ + u16 cid; + /* Remote TCP Port */ + u16 login_port; + int net_size; + u32 auth_id; +#define CONNFLAG_SCTP_STRUCT_FILE 0x01 + u32 conn_flags; + /* Used for iscsi_tx_login_rsp() */ + u32 login_itt; + u32 exp_statsn; + /* Per connection status sequence number */ + u32 stat_sn; + /* IFMarkInt's Current Value */ + u32 if_marker; + /* OFMarkInt's Current Value */ + u32 of_marker; + /* Used for calculating OFMarker offset to next PDU */ + u32 of_marker_offset; + /* Complete Bad PDU for sending reject */ + unsigned char bad_hdr[ISCSI_HDR_LEN]; +#define IPV6_ADDRESS_SPACE 48 + unsigned char login_ip[IPV6_ADDRESS_SPACE]; + int conn_usage_count; + int conn_waiting_on_uc; + atomic_t check_immediate_queue; + atomic_t conn_logout_remove; + atomic_t connection_exit; + atomic_t connection_recovery; + atomic_t connection_reinstatement; + atomic_t connection_wait; + atomic_t connection_wait_rcfr; + atomic_t sleep_on_conn_wait_comp; + atomic_t transport_failed; + struct completion conn_post_wait_comp; + struct completion conn_wait_comp; + struct completion conn_wait_rcfr_comp; + struct completion conn_waiting_on_uc_comp; + struct completion conn_logout_comp; + struct completion tx_half_close_comp; + struct completion rx_half_close_comp; + /* socket used by this connection */ + struct socket *sock; + struct timer_list nopin_timer; + struct timer_list nopin_response_timer; + struct timer_list transport_timer; + /* Spinlock used for add/deleting cmd's from conn_cmd_list */ + spinlock_t cmd_lock; + spinlock_t conn_usage_lock; + spinlock_t immed_queue_lock; + spinlock_t nopin_timer_lock; + spinlock_t response_queue_lock; + spinlock_t state_lock; + /* libcrypto RX and TX contexts for crc32c */ + struct hash_desc conn_rx_hash; + struct hash_desc conn_tx_hash; + /* Used for scheduling TX and RX connection kthreads */ + cpumask_var_t conn_cpumask; + int conn_rx_reset_cpumask:1; + int conn_tx_reset_cpumask:1; + /* list_head of struct iscsi_cmd for this connection */ + struct list_head conn_cmd_list; + struct list_head immed_queue_list; + struct list_head response_queue_list; + struct iscsi_conn_ops *conn_ops; + struct iscsi_param_list *param_list; + /* Used for per connection auth state machine */ + void *auth_protocol; + struct iscsi_login_thread_s *login_thread; + struct iscsi_portal_group *tpg; + /* Pointer to parent session */ + struct iscsi_session *sess; + /* Pointer to thread_set in use for this conn's threads */ + struct iscsi_thread_set *thread_set; + /* list_head for session connection list */ + struct list_head conn_list; +} ____cacheline_aligned; + +struct iscsi_conn_recovery { + u16 cid; + u32 cmd_count; + u32 maxrecvdatasegmentlength; + int ready_for_reallegiance; + struct list_head conn_recovery_cmd_list; + spinlock_t conn_recovery_cmd_lock; + struct timer_list time2retain_timer; + struct iscsi_session *sess; + struct list_head cr_list; +} ____cacheline_aligned; + +struct iscsi_session { + u8 initiator_vendor; + u8 isid[6]; + enum iscsi_timer_flags_table time2retain_timer_flags; + u8 version_active; + u16 cid_called; + u16 conn_recovery_count; + u16 tsih; + /* state session is currently in */ + u32 session_state; + /* session wide counter: initiator assigned task tag */ + u32 init_task_tag; + /* session wide counter: target assigned task tag */ + u32 targ_xfer_tag; + u32 cmdsn_window; + + /* protects cmdsn values */ + struct mutex cmdsn_mutex; + /* session wide counter: expected command sequence number */ + u32 exp_cmd_sn; + /* session wide counter: maximum allowed command sequence number */ + u32 max_cmd_sn; + struct list_head sess_ooo_cmdsn_list; + + /* LIO specific session ID */ + u32 sid; + char auth_type[8]; + /* unique within the target */ + int session_index; + /* Used for session reference counting */ + int session_usage_count; + int session_waiting_on_uc; + u32 cmd_pdus; + u32 rsp_pdus; + u64 tx_data_octets; + u64 rx_data_octets; + u32 conn_digest_errors; + u32 conn_timeout_errors; + u64 creation_time; + spinlock_t session_stats_lock; + /* Number of active connections */ + atomic_t nconn; + atomic_t session_continuation; + atomic_t session_fall_back_to_erl0; + atomic_t session_logout; + atomic_t session_reinstatement; + atomic_t session_stop_active; + atomic_t sleep_on_sess_wait_comp; + atomic_t transport_wait_cmds; + /* connection list */ + struct list_head sess_conn_list; + struct list_head cr_active_list; + struct list_head cr_inactive_list; + spinlock_t conn_lock; + spinlock_t cr_a_lock; + spinlock_t cr_i_lock; + spinlock_t session_usage_lock; + spinlock_t ttt_lock; + struct completion async_msg_comp; + struct completion reinstatement_comp; + struct completion session_wait_comp; + struct completion session_waiting_on_uc_comp; + struct timer_list time2retain_timer; + struct iscsi_sess_ops *sess_ops; + struct se_session *se_sess; + struct iscsi_portal_group *tpg; +} ____cacheline_aligned; + +struct iscsi_login { + u8 auth_complete; + u8 checked_for_existing; + u8 current_stage; + u8 leading_connection; + u8 first_request; + u8 version_min; + u8 version_max; + char isid[6]; + u32 cmd_sn; + u32 init_task_tag; + u32 initial_exp_statsn; + u32 rsp_length; + u16 cid; + u16 tsih; + char *req; + char *rsp; + char *req_buf; + char *rsp_buf; +} ____cacheline_aligned; + +struct iscsi_node_attrib { + u32 dataout_timeout; + u32 dataout_timeout_retries; + u32 default_erl; + u32 nopin_timeout; + u32 nopin_response_timeout; + u32 random_datain_pdu_offsets; + u32 random_datain_seq_offsets; + u32 random_r2t_offsets; + u32 tmr_cold_reset; + u32 tmr_warm_reset; + struct iscsi_node_acl *nacl; +}; + +struct se_dev_entry_s; + +struct iscsi_node_auth { + enum naf_flags_table naf_flags; + int authenticate_target; + /* Used for iscsit_global->discovery_auth, + * set to zero (auth disabled) by default */ + int enforce_discovery_auth; +#define MAX_USER_LEN 256 +#define MAX_PASS_LEN 256 + char userid[MAX_USER_LEN]; + char password[MAX_PASS_LEN]; + char userid_mutual[MAX_USER_LEN]; + char password_mutual[MAX_PASS_LEN]; +}; + +#include "iscsi_target_stat.h" + +struct iscsi_node_stat_grps { + struct config_group iscsi_sess_stats_group; + struct config_group iscsi_conn_stats_group; +}; + +struct iscsi_node_acl { + struct iscsi_node_attrib node_attrib; + struct iscsi_node_auth node_auth; + struct iscsi_node_stat_grps node_stat_grps; + struct se_node_acl se_node_acl; +}; + +#define NODE_STAT_GRPS(nacl) (&(nacl)->node_stat_grps) + +#define ISCSI_NODE_ATTRIB(t) (&(t)->node_attrib) +#define ISCSI_NODE_AUTH(t) (&(t)->node_auth) + +struct iscsi_tpg_attrib { + u32 authentication; + u32 login_timeout; + u32 netif_timeout; + u32 generate_node_acls; + u32 cache_dynamic_acls; + u32 default_cmdsn_depth; + u32 demo_mode_write_protect; + u32 prod_mode_write_protect; + struct iscsi_portal_group *tpg; +}; + +struct iscsi_np { + int np_network_transport; + int np_ip_proto; + int np_sock_type; + enum np_thread_state_table np_thread_state; + enum iscsi_timer_flags_table np_login_timer_flags; + u32 np_exports; + enum np_flags_table np_flags; + unsigned char np_ip[IPV6_ADDRESS_SPACE]; + u16 np_port; + spinlock_t np_thread_lock; + struct completion np_restart_comp; + struct socket *np_socket; + struct __kernel_sockaddr_storage np_sockaddr; + struct task_struct *np_thread; + struct timer_list np_login_timer; + struct iscsi_portal_group *np_login_tpg; + struct list_head np_list; +} ____cacheline_aligned; + +struct iscsi_tpg_np { + struct iscsi_np *tpg_np; + struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np_parent; + struct list_head tpg_np_list; + struct list_head tpg_np_child_list; + struct list_head tpg_np_parent_list; + struct se_tpg_np se_tpg_np; + spinlock_t tpg_np_parent_lock; +}; + +struct iscsi_portal_group { + unsigned char tpg_chap_id; + /* TPG State */ + enum tpg_state_table tpg_state; + /* Target Portal Group Tag */ + u16 tpgt; + /* Id assigned to target sessions */ + u16 ntsih; + /* Number of active sessions */ + u32 nsessions; + /* Number of Network Portals available for this TPG */ + u32 num_tpg_nps; + /* Per TPG LIO specific session ID. */ + u32 sid; + /* Spinlock for adding/removing Network Portals */ + spinlock_t tpg_np_lock; + spinlock_t tpg_state_lock; + struct se_portal_group tpg_se_tpg; + struct mutex tpg_access_lock; + struct mutex np_login_lock; + struct iscsi_tpg_attrib tpg_attrib; + /* Pointer to default list of iSCSI parameters for TPG */ + struct iscsi_param_list *param_list; + struct iscsi_tiqn *tpg_tiqn; + struct list_head tpg_gnp_list; + struct list_head tpg_list; +} ____cacheline_aligned; + +#define ISCSI_TPG_C(c) ((struct iscsi_portal_group *)(c)->tpg) +#define ISCSI_TPG_LUN(c, l) ((iscsi_tpg_list_t *)(c)->tpg->tpg_lun_list_t[l]) +#define ISCSI_TPG_S(s) ((struct iscsi_portal_group *)(s)->tpg) +#define ISCSI_TPG_ATTRIB(t) (&(t)->tpg_attrib) +#define SE_TPG(tpg) (&(tpg)->tpg_se_tpg) + +struct iscsi_wwn_stat_grps { + struct config_group iscsi_stat_group; + struct config_group iscsi_instance_group; + struct config_group iscsi_sess_err_group; + struct config_group iscsi_tgt_attr_group; + struct config_group iscsi_login_stats_group; + struct config_group iscsi_logout_stats_group; +}; + +struct iscsi_tiqn { +#define ISCSI_IQN_LEN 224 + unsigned char tiqn[ISCSI_IQN_LEN]; + enum tiqn_state_table tiqn_state; + int tiqn_access_count; + u32 tiqn_active_tpgs; + u32 tiqn_ntpgs; + u32 tiqn_num_tpg_nps; + u32 tiqn_nsessions; + struct list_head tiqn_list; + struct list_head tiqn_tpg_list; + spinlock_t tiqn_state_lock; + spinlock_t tiqn_tpg_lock; + struct se_wwn tiqn_wwn; + struct iscsi_wwn_stat_grps tiqn_stat_grps; + int tiqn_index; + struct iscsi_sess_err_stats sess_err_stats; + struct iscsi_login_stats login_stats; + struct iscsi_logout_stats logout_stats; +} ____cacheline_aligned; + +#define WWN_STAT_GRPS(tiqn) (&(tiqn)->tiqn_stat_grps) + +struct iscsit_global { + /* In core shutdown */ + u32 in_shutdown; + u32 active_ts; + /* Unique identifier used for the authentication daemon */ + u32 auth_id; + u32 inactive_ts; + /* Thread Set bitmap count */ + int ts_bitmap_count; + /* Thread Set bitmap pointer */ + unsigned long *ts_bitmap; + /* Used for iSCSI discovery session authentication */ + struct iscsi_node_acl discovery_acl; + struct iscsi_portal_group *discovery_tpg; +}; + +#endif /* ISCSI_TARGET_CORE_H */ diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c new file mode 100644 index 000000000000..8c0495129513 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_datain_values.c @@ -0,0 +1,531 @@ +/******************************************************************************* + * This file contains the iSCSI Target DataIN value generation functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <scsi/iscsi_proto.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_datain_values.h" + +struct iscsi_datain_req *iscsit_allocate_datain_req(void) +{ + struct iscsi_datain_req *dr; + + dr = kmem_cache_zalloc(lio_dr_cache, GFP_ATOMIC); + if (!dr) { + pr_err("Unable to allocate memory for" + " struct iscsi_datain_req\n"); + return NULL; + } + INIT_LIST_HEAD(&dr->dr_list); + + return dr; +} + +void iscsit_attach_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr) +{ + spin_lock(&cmd->datain_lock); + list_add_tail(&dr->dr_list, &cmd->datain_list); + spin_unlock(&cmd->datain_lock); +} + +void iscsit_free_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr) +{ + spin_lock(&cmd->datain_lock); + list_del(&dr->dr_list); + spin_unlock(&cmd->datain_lock); + + kmem_cache_free(lio_dr_cache, dr); +} + +void iscsit_free_all_datain_reqs(struct iscsi_cmd *cmd) +{ + struct iscsi_datain_req *dr, *dr_tmp; + + spin_lock(&cmd->datain_lock); + list_for_each_entry_safe(dr, dr_tmp, &cmd->datain_list, dr_list) { + list_del(&dr->dr_list); + kmem_cache_free(lio_dr_cache, dr); + } + spin_unlock(&cmd->datain_lock); +} + +struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *cmd) +{ + struct iscsi_datain_req *dr; + + if (list_empty(&cmd->datain_list)) { + pr_err("cmd->datain_list is empty for ITT:" + " 0x%08x\n", cmd->init_task_tag); + return NULL; + } + list_for_each_entry(dr, &cmd->datain_list, dr_list) + break; + + return dr; +} + +/* + * For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=Yes. + */ +static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_yes( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + u32 next_burst_len, read_data_done, read_data_left; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + + dr = iscsit_get_datain_req(cmd); + if (!dr) + return NULL; + + if (dr->recovery && dr->generate_recovery_values) { + if (iscsit_create_recovery_datain_values_datasequenceinorder_yes( + cmd, dr) < 0) + return NULL; + + dr->generate_recovery_values = 0; + } + + next_burst_len = (!dr->recovery) ? + cmd->next_burst_len : dr->next_burst_len; + read_data_done = (!dr->recovery) ? + cmd->read_data_done : dr->read_data_done; + + read_data_left = (cmd->data_length - read_data_done); + if (!read_data_left) { + pr_err("ITT: 0x%08x read_data_left is zero!\n", + cmd->init_task_tag); + return NULL; + } + + if ((read_data_left <= conn->conn_ops->MaxRecvDataSegmentLength) && + (read_data_left <= (conn->sess->sess_ops->MaxBurstLength - + next_burst_len))) { + datain->length = read_data_left; + + datain->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS); + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + datain->flags |= ISCSI_FLAG_DATA_ACK; + } else { + if ((next_burst_len + + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) { + datain->length = + conn->conn_ops->MaxRecvDataSegmentLength; + next_burst_len += datain->length; + } else { + datain->length = (conn->sess->sess_ops->MaxBurstLength - + next_burst_len); + next_burst_len = 0; + + datain->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + datain->flags |= ISCSI_FLAG_DATA_ACK; + } + } + + datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++; + datain->offset = read_data_done; + + if (!dr->recovery) { + cmd->next_burst_len = next_burst_len; + cmd->read_data_done += datain->length; + } else { + dr->next_burst_len = next_burst_len; + dr->read_data_done += datain->length; + } + + if (!dr->recovery) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) + dr->dr_complete = DATAIN_COMPLETE_NORMAL; + + return dr; + } + + if (!dr->runlength) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } else { + if ((dr->begrun + dr->runlength) == dr->data_sn) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } + + return dr; +} + +/* + * For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=Yes. + */ +static struct iscsi_datain_req *iscsit_set_datain_values_no_and_yes( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + u32 offset, read_data_done, read_data_left, seq_send_order; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct iscsi_seq *seq; + + dr = iscsit_get_datain_req(cmd); + if (!dr) + return NULL; + + if (dr->recovery && dr->generate_recovery_values) { + if (iscsit_create_recovery_datain_values_datasequenceinorder_no( + cmd, dr) < 0) + return NULL; + + dr->generate_recovery_values = 0; + } + + read_data_done = (!dr->recovery) ? + cmd->read_data_done : dr->read_data_done; + seq_send_order = (!dr->recovery) ? + cmd->seq_send_order : dr->seq_send_order; + + read_data_left = (cmd->data_length - read_data_done); + if (!read_data_left) { + pr_err("ITT: 0x%08x read_data_left is zero!\n", + cmd->init_task_tag); + return NULL; + } + + seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order); + if (!seq) + return NULL; + + seq->sent = 1; + + if (!dr->recovery && !seq->next_burst_len) + seq->first_datasn = cmd->data_sn; + + offset = (seq->offset + seq->next_burst_len); + + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >= + cmd->data_length) { + datain->length = (cmd->data_length - offset); + datain->offset = offset; + + datain->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + datain->flags |= ISCSI_FLAG_DATA_ACK; + + seq->next_burst_len = 0; + seq_send_order++; + } else { + if ((seq->next_burst_len + + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) { + datain->length = + conn->conn_ops->MaxRecvDataSegmentLength; + datain->offset = (seq->offset + seq->next_burst_len); + + seq->next_burst_len += datain->length; + } else { + datain->length = (conn->sess->sess_ops->MaxBurstLength - + seq->next_burst_len); + datain->offset = (seq->offset + seq->next_burst_len); + + datain->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + datain->flags |= ISCSI_FLAG_DATA_ACK; + + seq->next_burst_len = 0; + seq_send_order++; + } + } + + if ((read_data_done + datain->length) == cmd->data_length) + datain->flags |= ISCSI_FLAG_DATA_STATUS; + + datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++; + if (!dr->recovery) { + cmd->seq_send_order = seq_send_order; + cmd->read_data_done += datain->length; + } else { + dr->seq_send_order = seq_send_order; + dr->read_data_done += datain->length; + } + + if (!dr->recovery) { + if (datain->flags & ISCSI_FLAG_CMD_FINAL) + seq->last_datasn = datain->data_sn; + if (datain->flags & ISCSI_FLAG_DATA_STATUS) + dr->dr_complete = DATAIN_COMPLETE_NORMAL; + + return dr; + } + + if (!dr->runlength) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } else { + if ((dr->begrun + dr->runlength) == dr->data_sn) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } + + return dr; +} + +/* + * For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=No. + */ +static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_no( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + u32 next_burst_len, read_data_done, read_data_left; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct iscsi_pdu *pdu; + + dr = iscsit_get_datain_req(cmd); + if (!dr) + return NULL; + + if (dr->recovery && dr->generate_recovery_values) { + if (iscsit_create_recovery_datain_values_datasequenceinorder_yes( + cmd, dr) < 0) + return NULL; + + dr->generate_recovery_values = 0; + } + + next_burst_len = (!dr->recovery) ? + cmd->next_burst_len : dr->next_burst_len; + read_data_done = (!dr->recovery) ? + cmd->read_data_done : dr->read_data_done; + + read_data_left = (cmd->data_length - read_data_done); + if (!read_data_left) { + pr_err("ITT: 0x%08x read_data_left is zero!\n", + cmd->init_task_tag); + return dr; + } + + pdu = iscsit_get_pdu_holder_for_seq(cmd, NULL); + if (!pdu) + return dr; + + if ((read_data_done + pdu->length) == cmd->data_length) { + pdu->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS); + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + pdu->flags |= ISCSI_FLAG_DATA_ACK; + + next_burst_len = 0; + } else { + if ((next_burst_len + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) + next_burst_len += pdu->length; + else { + pdu->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + pdu->flags |= ISCSI_FLAG_DATA_ACK; + + next_burst_len = 0; + } + } + + pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++; + if (!dr->recovery) { + cmd->next_burst_len = next_burst_len; + cmd->read_data_done += pdu->length; + } else { + dr->next_burst_len = next_burst_len; + dr->read_data_done += pdu->length; + } + + datain->flags = pdu->flags; + datain->length = pdu->length; + datain->offset = pdu->offset; + datain->data_sn = pdu->data_sn; + + if (!dr->recovery) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) + dr->dr_complete = DATAIN_COMPLETE_NORMAL; + + return dr; + } + + if (!dr->runlength) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } else { + if ((dr->begrun + dr->runlength) == dr->data_sn) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } + + return dr; +} + +/* + * For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=No. + */ +static struct iscsi_datain_req *iscsit_set_datain_values_no_and_no( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + u32 read_data_done, read_data_left, seq_send_order; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct iscsi_pdu *pdu; + struct iscsi_seq *seq = NULL; + + dr = iscsit_get_datain_req(cmd); + if (!dr) + return NULL; + + if (dr->recovery && dr->generate_recovery_values) { + if (iscsit_create_recovery_datain_values_datasequenceinorder_no( + cmd, dr) < 0) + return NULL; + + dr->generate_recovery_values = 0; + } + + read_data_done = (!dr->recovery) ? + cmd->read_data_done : dr->read_data_done; + seq_send_order = (!dr->recovery) ? + cmd->seq_send_order : dr->seq_send_order; + + read_data_left = (cmd->data_length - read_data_done); + if (!read_data_left) { + pr_err("ITT: 0x%08x read_data_left is zero!\n", + cmd->init_task_tag); + return NULL; + } + + seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order); + if (!seq) + return NULL; + + seq->sent = 1; + + if (!dr->recovery && !seq->next_burst_len) + seq->first_datasn = cmd->data_sn; + + pdu = iscsit_get_pdu_holder_for_seq(cmd, seq); + if (!pdu) + return NULL; + + if (seq->pdu_send_order == seq->pdu_count) { + pdu->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + pdu->flags |= ISCSI_FLAG_DATA_ACK; + + seq->next_burst_len = 0; + seq_send_order++; + } else + seq->next_burst_len += pdu->length; + + if ((read_data_done + pdu->length) == cmd->data_length) + pdu->flags |= ISCSI_FLAG_DATA_STATUS; + + pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++; + if (!dr->recovery) { + cmd->seq_send_order = seq_send_order; + cmd->read_data_done += pdu->length; + } else { + dr->seq_send_order = seq_send_order; + dr->read_data_done += pdu->length; + } + + datain->flags = pdu->flags; + datain->length = pdu->length; + datain->offset = pdu->offset; + datain->data_sn = pdu->data_sn; + + if (!dr->recovery) { + if (datain->flags & ISCSI_FLAG_CMD_FINAL) + seq->last_datasn = datain->data_sn; + if (datain->flags & ISCSI_FLAG_DATA_STATUS) + dr->dr_complete = DATAIN_COMPLETE_NORMAL; + + return dr; + } + + if (!dr->runlength) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } else { + if ((dr->begrun + dr->runlength) == dr->data_sn) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } + + return dr; +} + +struct iscsi_datain_req *iscsit_get_datain_values( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + struct iscsi_conn *conn = cmd->conn; + + if (conn->sess->sess_ops->DataSequenceInOrder && + conn->sess->sess_ops->DataPDUInOrder) + return iscsit_set_datain_values_yes_and_yes(cmd, datain); + else if (!conn->sess->sess_ops->DataSequenceInOrder && + conn->sess->sess_ops->DataPDUInOrder) + return iscsit_set_datain_values_no_and_yes(cmd, datain); + else if (conn->sess->sess_ops->DataSequenceInOrder && + !conn->sess->sess_ops->DataPDUInOrder) + return iscsit_set_datain_values_yes_and_no(cmd, datain); + else if (!conn->sess->sess_ops->DataSequenceInOrder && + !conn->sess->sess_ops->DataPDUInOrder) + return iscsit_set_datain_values_no_and_no(cmd, datain); + + return NULL; +} diff --git a/drivers/target/iscsi/iscsi_target_datain_values.h b/drivers/target/iscsi/iscsi_target_datain_values.h new file mode 100644 index 000000000000..646429ac5a02 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_datain_values.h @@ -0,0 +1,12 @@ +#ifndef ISCSI_TARGET_DATAIN_VALUES_H +#define ISCSI_TARGET_DATAIN_VALUES_H + +extern struct iscsi_datain_req *iscsit_allocate_datain_req(void); +extern void iscsit_attach_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *); +extern void iscsit_free_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *); +extern void iscsit_free_all_datain_reqs(struct iscsi_cmd *); +extern struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *); +extern struct iscsi_datain_req *iscsit_get_datain_values(struct iscsi_cmd *, + struct iscsi_datain *); + +#endif /*** ISCSI_TARGET_DATAIN_VALUES_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c new file mode 100644 index 000000000000..a19fa5eea88e --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_device.c @@ -0,0 +1,87 @@ +/******************************************************************************* + * This file contains the iSCSI Virtual Device and Disk Transport + * agnostic related functions. + * + \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <scsi/scsi_device.h> +#include <target/target_core_base.h> +#include <target/target_core_device.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_device.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" + +int iscsit_get_lun_for_tmr( + struct iscsi_cmd *cmd, + u64 lun) +{ + u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); + + return transport_lookup_tmr_lun(&cmd->se_cmd, unpacked_lun); +} + +int iscsit_get_lun_for_cmd( + struct iscsi_cmd *cmd, + unsigned char *cdb, + u64 lun) +{ + u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); + + return transport_lookup_cmd_lun(&cmd->se_cmd, unpacked_lun); +} + +void iscsit_determine_maxcmdsn(struct iscsi_session *sess) +{ + struct se_node_acl *se_nacl; + + /* + * This is a discovery session, the single queue slot was already + * assigned in iscsi_login_zero_tsih(). Since only Logout and + * Text Opcodes are allowed during discovery we do not have to worry + * about the HBA's queue depth here. + */ + if (sess->sess_ops->SessionType) + return; + + se_nacl = sess->se_sess->se_node_acl; + + /* + * This is a normal session, set the Session's CmdSN window to the + * struct se_node_acl->queue_depth. The value in struct se_node_acl->queue_depth + * has already been validated as a legal value in + * core_set_queue_depth_for_node(). + */ + sess->cmdsn_window = se_nacl->queue_depth; + sess->max_cmd_sn = (sess->max_cmd_sn + se_nacl->queue_depth) - 1; +} + +void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess) +{ + if (cmd->immediate_cmd || cmd->maxcmdsn_inc) + return; + + cmd->maxcmdsn_inc = 1; + + mutex_lock(&sess->cmdsn_mutex); + sess->max_cmd_sn += 1; + pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn); + mutex_unlock(&sess->cmdsn_mutex); +} diff --git a/drivers/target/iscsi/iscsi_target_device.h b/drivers/target/iscsi/iscsi_target_device.h new file mode 100644 index 000000000000..bef1cada15f8 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_device.h @@ -0,0 +1,9 @@ +#ifndef ISCSI_TARGET_DEVICE_H +#define ISCSI_TARGET_DEVICE_H + +extern int iscsit_get_lun_for_tmr(struct iscsi_cmd *, u64); +extern int iscsit_get_lun_for_cmd(struct iscsi_cmd *, unsigned char *, u64); +extern void iscsit_determine_maxcmdsn(struct iscsi_session *); +extern void iscsit_increment_maxcmdsn(struct iscsi_cmd *, struct iscsi_session *); + +#endif /* ISCSI_TARGET_DEVICE_H */ diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c new file mode 100644 index 000000000000..b7ffc3cd40cc --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -0,0 +1,1004 @@ +/****************************************************************************** + * This file contains error recovery level zero functions used by + * the iSCSI Target driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_tq.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" + +/* + * Used to set values in struct iscsi_cmd that iscsit_dataout_check_sequence() + * checks against to determine a PDU's Offset+Length is within the current + * DataOUT Sequence. Used for DataSequenceInOrder=Yes only. + */ +void iscsit_set_dataout_sequence_values( + struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn = cmd->conn; + /* + * Still set seq_start_offset and seq_end_offset for Unsolicited + * DataOUT, even if DataSequenceInOrder=No. + */ + if (cmd->unsolicited_data) { + cmd->seq_start_offset = cmd->write_data_done; + cmd->seq_end_offset = (cmd->write_data_done + + (cmd->data_length > + conn->sess->sess_ops->FirstBurstLength) ? + conn->sess->sess_ops->FirstBurstLength : cmd->data_length); + return; + } + + if (!conn->sess->sess_ops->DataSequenceInOrder) + return; + + if (!cmd->seq_start_offset && !cmd->seq_end_offset) { + cmd->seq_start_offset = cmd->write_data_done; + cmd->seq_end_offset = (cmd->data_length > + conn->sess->sess_ops->MaxBurstLength) ? + (cmd->write_data_done + + conn->sess->sess_ops->MaxBurstLength) : cmd->data_length; + } else { + cmd->seq_start_offset = cmd->seq_end_offset; + cmd->seq_end_offset = ((cmd->seq_end_offset + + conn->sess->sess_ops->MaxBurstLength) >= + cmd->data_length) ? cmd->data_length : + (cmd->seq_end_offset + + conn->sess->sess_ops->MaxBurstLength); + } +} + +static int iscsit_dataout_within_command_recovery_check( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + /* + * We do the within-command recovery checks here as it is + * the first function called in iscsi_check_pre_dataout(). + * Basically, if we are in within-command recovery and + * the PDU does not contain the offset the sequence needs, + * dump the payload. + * + * This only applies to DataPDUInOrder=Yes, for + * DataPDUInOrder=No we only re-request the failed PDU + * and check that all PDUs in a sequence are received + * upon end of sequence. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + if ((cmd->cmd_flags & ICF_WITHIN_COMMAND_RECOVERY) && + (cmd->write_data_done != hdr->offset)) + goto dump; + + cmd->cmd_flags &= ~ICF_WITHIN_COMMAND_RECOVERY; + } else { + struct iscsi_seq *seq; + + seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length); + if (!seq) + return DATAOUT_CANNOT_RECOVER; + /* + * Set the struct iscsi_seq pointer to reuse later. + */ + cmd->seq_ptr = seq; + + if (conn->sess->sess_ops->DataPDUInOrder) { + if ((seq->status == + DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) && + ((seq->offset != hdr->offset) || + (seq->data_sn != hdr->datasn))) + goto dump; + } else { + if ((seq->status == + DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) && + (seq->data_sn != hdr->datasn)) + goto dump; + } + + if (seq->status == DATAOUT_SEQUENCE_COMPLETE) + goto dump; + + if (seq->status != DATAOUT_SEQUENCE_COMPLETE) + seq->status = 0; + } + + return DATAOUT_NORMAL; + +dump: + pr_err("Dumping DataOUT PDU Offset: %u Length: %d DataSN:" + " 0x%08x\n", hdr->offset, payload_length, hdr->datasn); + return iscsit_dump_data_payload(conn, payload_length, 1); +} + +static int iscsit_dataout_check_unsolicited_sequence( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + u32 first_burst_len; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + + if ((hdr->offset < cmd->seq_start_offset) || + ((hdr->offset + payload_length) > cmd->seq_end_offset)) { + pr_err("Command ITT: 0x%08x with Offset: %u," + " Length: %u outside of Unsolicited Sequence %u:%u while" + " DataSequenceInOrder=Yes.\n", cmd->init_task_tag, + hdr->offset, payload_length, cmd->seq_start_offset, + cmd->seq_end_offset); + return DATAOUT_CANNOT_RECOVER; + } + + first_burst_len = (cmd->first_burst_len + payload_length); + + if (first_burst_len > conn->sess->sess_ops->FirstBurstLength) { + pr_err("Total %u bytes exceeds FirstBurstLength: %u" + " for this Unsolicited DataOut Burst.\n", + first_burst_len, conn->sess->sess_ops->FirstBurstLength); + transport_send_check_condition_and_sense(&cmd->se_cmd, + TCM_INCORRECT_AMOUNT_OF_DATA, 0); + return DATAOUT_CANNOT_RECOVER; + } + + /* + * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity + * checks for the current Unsolicited DataOUT Sequence. + */ + if (hdr->flags & ISCSI_FLAG_CMD_FINAL) { + /* + * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of + * sequence checks are handled in + * iscsit_dataout_datapduinorder_no_fbit(). + */ + if (!conn->sess->sess_ops->DataPDUInOrder) + goto out; + + if ((first_burst_len != cmd->data_length) && + (first_burst_len != conn->sess->sess_ops->FirstBurstLength)) { + pr_err("Unsolicited non-immediate data" + " received %u does not equal FirstBurstLength: %u, and" + " does not equal ExpXferLen %u.\n", first_burst_len, + conn->sess->sess_ops->FirstBurstLength, + cmd->data_length); + transport_send_check_condition_and_sense(&cmd->se_cmd, + TCM_INCORRECT_AMOUNT_OF_DATA, 0); + return DATAOUT_CANNOT_RECOVER; + } + } else { + if (first_burst_len == conn->sess->sess_ops->FirstBurstLength) { + pr_err("Command ITT: 0x%08x reached" + " FirstBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol" + " error.\n", cmd->init_task_tag, + conn->sess->sess_ops->FirstBurstLength); + return DATAOUT_CANNOT_RECOVER; + } + if (first_burst_len == cmd->data_length) { + pr_err("Command ITT: 0x%08x reached" + " ExpXferLen: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol" + " error.\n", cmd->init_task_tag, cmd->data_length); + return DATAOUT_CANNOT_RECOVER; + } + } + +out: + return DATAOUT_NORMAL; +} + +static int iscsit_dataout_check_sequence( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + u32 next_burst_len; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_seq *seq = NULL; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + /* + * For DataSequenceInOrder=Yes: Check that the offset and offset+length + * is within range as defined by iscsi_set_dataout_sequence_values(). + * + * For DataSequenceInOrder=No: Check that an struct iscsi_seq exists for + * offset+length tuple. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + /* + * Due to possibility of recovery DataOUT sent by the initiator + * fullfilling an Recovery R2T, it's best to just dump the + * payload here, instead of erroring out. + */ + if ((hdr->offset < cmd->seq_start_offset) || + ((hdr->offset + payload_length) > cmd->seq_end_offset)) { + pr_err("Command ITT: 0x%08x with Offset: %u," + " Length: %u outside of Sequence %u:%u while" + " DataSequenceInOrder=Yes.\n", cmd->init_task_tag, + hdr->offset, payload_length, cmd->seq_start_offset, + cmd->seq_end_offset); + + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return DATAOUT_CANNOT_RECOVER; + return DATAOUT_WITHIN_COMMAND_RECOVERY; + } + + next_burst_len = (cmd->next_burst_len + payload_length); + } else { + seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length); + if (!seq) + return DATAOUT_CANNOT_RECOVER; + /* + * Set the struct iscsi_seq pointer to reuse later. + */ + cmd->seq_ptr = seq; + + if (seq->status == DATAOUT_SEQUENCE_COMPLETE) { + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return DATAOUT_CANNOT_RECOVER; + return DATAOUT_WITHIN_COMMAND_RECOVERY; + } + + next_burst_len = (seq->next_burst_len + payload_length); + } + + if (next_burst_len > conn->sess->sess_ops->MaxBurstLength) { + pr_err("Command ITT: 0x%08x, NextBurstLength: %u and" + " Length: %u exceeds MaxBurstLength: %u. protocol" + " error.\n", cmd->init_task_tag, + (next_burst_len - payload_length), + payload_length, conn->sess->sess_ops->MaxBurstLength); + return DATAOUT_CANNOT_RECOVER; + } + + /* + * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity + * checks for the current DataOUT Sequence. + */ + if (hdr->flags & ISCSI_FLAG_CMD_FINAL) { + /* + * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of + * sequence checks are handled in + * iscsit_dataout_datapduinorder_no_fbit(). + */ + if (!conn->sess->sess_ops->DataPDUInOrder) + goto out; + + if (conn->sess->sess_ops->DataSequenceInOrder) { + if ((next_burst_len < + conn->sess->sess_ops->MaxBurstLength) && + ((cmd->write_data_done + payload_length) < + cmd->data_length)) { + pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL" + " before end of DataOUT sequence, protocol" + " error.\n", cmd->init_task_tag); + return DATAOUT_CANNOT_RECOVER; + } + } else { + if (next_burst_len < seq->xfer_len) { + pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL" + " before end of DataOUT sequence, protocol" + " error.\n", cmd->init_task_tag); + return DATAOUT_CANNOT_RECOVER; + } + } + } else { + if (conn->sess->sess_ops->DataSequenceInOrder) { + if (next_burst_len == + conn->sess->sess_ops->MaxBurstLength) { + pr_err("Command ITT: 0x%08x reached" + " MaxBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is" + " not set, protocol error.", cmd->init_task_tag, + conn->sess->sess_ops->MaxBurstLength); + return DATAOUT_CANNOT_RECOVER; + } + if ((cmd->write_data_done + payload_length) == + cmd->data_length) { + pr_err("Command ITT: 0x%08x reached" + " last DataOUT PDU in sequence but ISCSI_FLAG_" + "CMD_FINAL is not set, protocol error.\n", + cmd->init_task_tag); + return DATAOUT_CANNOT_RECOVER; + } + } else { + if (next_burst_len == seq->xfer_len) { + pr_err("Command ITT: 0x%08x reached" + " last DataOUT PDU in sequence but ISCSI_FLAG_" + "CMD_FINAL is not set, protocol error.\n", + cmd->init_task_tag); + return DATAOUT_CANNOT_RECOVER; + } + } + } + +out: + return DATAOUT_NORMAL; +} + +static int iscsit_dataout_check_datasn( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + int dump = 0, recovery = 0; + u32 data_sn = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + /* + * Considering the target has no method of re-requesting DataOUT + * by DataSN, if we receieve a greater DataSN than expected we + * assume the functions for DataPDUInOrder=[Yes,No] below will + * handle it. + * + * If the DataSN is less than expected, dump the payload. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) + data_sn = cmd->data_sn; + else { + struct iscsi_seq *seq = cmd->seq_ptr; + data_sn = seq->data_sn; + } + + if (hdr->datasn > data_sn) { + pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x" + " higher than expected 0x%08x.\n", cmd->init_task_tag, + hdr->datasn, data_sn); + recovery = 1; + goto recover; + } else if (hdr->datasn < data_sn) { + pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x" + " lower than expected 0x%08x, discarding payload.\n", + cmd->init_task_tag, hdr->datasn, data_sn); + dump = 1; + goto dump; + } + + return DATAOUT_NORMAL; + +recover: + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to perform within-command recovery" + " while ERL=0.\n"); + return DATAOUT_CANNOT_RECOVER; + } +dump: + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return DATAOUT_CANNOT_RECOVER; + + return (recovery || dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY : + DATAOUT_NORMAL; +} + +static int iscsit_dataout_pre_datapduinorder_yes( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + int dump = 0, recovery = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + /* + * For DataSequenceInOrder=Yes: If the offset is greater than the global + * DataPDUInOrder=Yes offset counter in struct iscsi_cmd a protcol error has + * occured and fail the connection. + * + * For DataSequenceInOrder=No: If the offset is greater than the per + * sequence DataPDUInOrder=Yes offset counter in struct iscsi_seq a protocol + * error has occured and fail the connection. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + if (hdr->offset != cmd->write_data_done) { + pr_err("Command ITT: 0x%08x, received offset" + " %u different than expected %u.\n", cmd->init_task_tag, + hdr->offset, cmd->write_data_done); + recovery = 1; + goto recover; + } + } else { + struct iscsi_seq *seq = cmd->seq_ptr; + + if (hdr->offset > seq->offset) { + pr_err("Command ITT: 0x%08x, received offset" + " %u greater than expected %u.\n", cmd->init_task_tag, + hdr->offset, seq->offset); + recovery = 1; + goto recover; + } else if (hdr->offset < seq->offset) { + pr_err("Command ITT: 0x%08x, received offset" + " %u less than expected %u, discarding payload.\n", + cmd->init_task_tag, hdr->offset, seq->offset); + dump = 1; + goto dump; + } + } + + return DATAOUT_NORMAL; + +recover: + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to perform within-command recovery" + " while ERL=0.\n"); + return DATAOUT_CANNOT_RECOVER; + } +dump: + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return DATAOUT_CANNOT_RECOVER; + + return (recovery) ? iscsit_recover_dataout_sequence(cmd, + hdr->offset, payload_length) : + (dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY : DATAOUT_NORMAL; +} + +static int iscsit_dataout_pre_datapduinorder_no( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_pdu *pdu; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + pdu = iscsit_get_pdu_holder(cmd, hdr->offset, payload_length); + if (!pdu) + return DATAOUT_CANNOT_RECOVER; + + cmd->pdu_ptr = pdu; + + switch (pdu->status) { + case ISCSI_PDU_NOT_RECEIVED: + case ISCSI_PDU_CRC_FAILED: + case ISCSI_PDU_TIMED_OUT: + break; + case ISCSI_PDU_RECEIVED_OK: + pr_err("Command ITT: 0x%08x received already gotten" + " Offset: %u, Length: %u\n", cmd->init_task_tag, + hdr->offset, payload_length); + return iscsit_dump_data_payload(cmd->conn, payload_length, 1); + default: + return DATAOUT_CANNOT_RECOVER; + } + + return DATAOUT_NORMAL; +} + +static int iscsit_dataout_update_r2t(struct iscsi_cmd *cmd, u32 offset, u32 length) +{ + struct iscsi_r2t *r2t; + + if (cmd->unsolicited_data) + return 0; + + r2t = iscsit_get_r2t_for_eos(cmd, offset, length); + if (!r2t) + return -1; + + spin_lock_bh(&cmd->r2t_lock); + r2t->seq_complete = 1; + cmd->outstanding_r2ts--; + spin_unlock_bh(&cmd->r2t_lock); + + return 0; +} + +static int iscsit_dataout_update_datapduinorder_no( + struct iscsi_cmd *cmd, + u32 data_sn, + int f_bit) +{ + int ret = 0; + struct iscsi_pdu *pdu = cmd->pdu_ptr; + + pdu->data_sn = data_sn; + + switch (pdu->status) { + case ISCSI_PDU_NOT_RECEIVED: + pdu->status = ISCSI_PDU_RECEIVED_OK; + break; + case ISCSI_PDU_CRC_FAILED: + pdu->status = ISCSI_PDU_RECEIVED_OK; + break; + case ISCSI_PDU_TIMED_OUT: + pdu->status = ISCSI_PDU_RECEIVED_OK; + break; + default: + return DATAOUT_CANNOT_RECOVER; + } + + if (f_bit) { + ret = iscsit_dataout_datapduinorder_no_fbit(cmd, pdu); + if (ret == DATAOUT_CANNOT_RECOVER) + return ret; + } + + return DATAOUT_NORMAL; +} + +static int iscsit_dataout_post_crc_passed( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + int ret, send_r2t = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_seq *seq = NULL; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + if (cmd->unsolicited_data) { + if ((cmd->first_burst_len + payload_length) == + conn->sess->sess_ops->FirstBurstLength) { + if (iscsit_dataout_update_r2t(cmd, hdr->offset, + payload_length) < 0) + return DATAOUT_CANNOT_RECOVER; + send_r2t = 1; + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + ret = iscsit_dataout_update_datapduinorder_no(cmd, + hdr->datasn, (hdr->flags & ISCSI_FLAG_CMD_FINAL)); + if (ret == DATAOUT_CANNOT_RECOVER) + return ret; + } + + cmd->first_burst_len += payload_length; + + if (conn->sess->sess_ops->DataSequenceInOrder) + cmd->data_sn++; + else { + seq = cmd->seq_ptr; + seq->data_sn++; + seq->offset += payload_length; + } + + if (send_r2t) { + if (seq) + seq->status = DATAOUT_SEQUENCE_COMPLETE; + cmd->first_burst_len = 0; + cmd->unsolicited_data = 0; + } + } else { + if (conn->sess->sess_ops->DataSequenceInOrder) { + if ((cmd->next_burst_len + payload_length) == + conn->sess->sess_ops->MaxBurstLength) { + if (iscsit_dataout_update_r2t(cmd, hdr->offset, + payload_length) < 0) + return DATAOUT_CANNOT_RECOVER; + send_r2t = 1; + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + ret = iscsit_dataout_update_datapduinorder_no( + cmd, hdr->datasn, + (hdr->flags & ISCSI_FLAG_CMD_FINAL)); + if (ret == DATAOUT_CANNOT_RECOVER) + return ret; + } + + cmd->next_burst_len += payload_length; + cmd->data_sn++; + + if (send_r2t) + cmd->next_burst_len = 0; + } else { + seq = cmd->seq_ptr; + + if ((seq->next_burst_len + payload_length) == + seq->xfer_len) { + if (iscsit_dataout_update_r2t(cmd, hdr->offset, + payload_length) < 0) + return DATAOUT_CANNOT_RECOVER; + send_r2t = 1; + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + ret = iscsit_dataout_update_datapduinorder_no( + cmd, hdr->datasn, + (hdr->flags & ISCSI_FLAG_CMD_FINAL)); + if (ret == DATAOUT_CANNOT_RECOVER) + return ret; + } + + seq->data_sn++; + seq->offset += payload_length; + seq->next_burst_len += payload_length; + + if (send_r2t) { + seq->next_burst_len = 0; + seq->status = DATAOUT_SEQUENCE_COMPLETE; + } + } + } + + if (send_r2t && conn->sess->sess_ops->DataSequenceInOrder) + cmd->data_sn = 0; + + cmd->write_data_done += payload_length; + + return (cmd->write_data_done == cmd->data_length) ? + DATAOUT_SEND_TO_TRANSPORT : (send_r2t) ? + DATAOUT_SEND_R2T : DATAOUT_NORMAL; +} + +static int iscsit_dataout_post_crc_failed( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + if (conn->sess->sess_ops->DataPDUInOrder) + goto recover; + /* + * The rest of this function is only called when DataPDUInOrder=No. + */ + pdu = cmd->pdu_ptr; + + switch (pdu->status) { + case ISCSI_PDU_NOT_RECEIVED: + pdu->status = ISCSI_PDU_CRC_FAILED; + break; + case ISCSI_PDU_CRC_FAILED: + break; + case ISCSI_PDU_TIMED_OUT: + pdu->status = ISCSI_PDU_CRC_FAILED; + break; + default: + return DATAOUT_CANNOT_RECOVER; + } + +recover: + return iscsit_recover_dataout_sequence(cmd, hdr->offset, payload_length); +} + +/* + * Called from iscsit_handle_data_out() before DataOUT Payload is received + * and CRC computed. + */ +extern int iscsit_check_pre_dataout( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + int ret; + struct iscsi_conn *conn = cmd->conn; + + ret = iscsit_dataout_within_command_recovery_check(cmd, buf); + if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) || + (ret == DATAOUT_CANNOT_RECOVER)) + return ret; + + ret = iscsit_dataout_check_datasn(cmd, buf); + if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) || + (ret == DATAOUT_CANNOT_RECOVER)) + return ret; + + if (cmd->unsolicited_data) { + ret = iscsit_dataout_check_unsolicited_sequence(cmd, buf); + if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) || + (ret == DATAOUT_CANNOT_RECOVER)) + return ret; + } else { + ret = iscsit_dataout_check_sequence(cmd, buf); + if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) || + (ret == DATAOUT_CANNOT_RECOVER)) + return ret; + } + + return (conn->sess->sess_ops->DataPDUInOrder) ? + iscsit_dataout_pre_datapduinorder_yes(cmd, buf) : + iscsit_dataout_pre_datapduinorder_no(cmd, buf); +} + +/* + * Called from iscsit_handle_data_out() after DataOUT Payload is received + * and CRC computed. + */ +int iscsit_check_post_dataout( + struct iscsi_cmd *cmd, + unsigned char *buf, + u8 data_crc_failed) +{ + struct iscsi_conn *conn = cmd->conn; + + cmd->dataout_timeout_retries = 0; + + if (!data_crc_failed) + return iscsit_dataout_post_crc_passed(cmd, buf); + else { + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from DataOUT CRC" + " failure while ERL=0, closing session.\n"); + iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR, + 1, 0, buf, cmd); + return DATAOUT_CANNOT_RECOVER; + } + + iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR, + 0, 0, buf, cmd); + return iscsit_dataout_post_crc_failed(cmd, buf); + } +} + +static void iscsit_handle_time2retain_timeout(unsigned long data) +{ + struct iscsi_session *sess = (struct iscsi_session *) data; + struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + + spin_lock_bh(&se_tpg->session_lock); + if (sess->time2retain_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&se_tpg->session_lock); + return; + } + if (atomic_read(&sess->session_reinstatement)) { + pr_err("Exiting Time2Retain handler because" + " session_reinstatement=1\n"); + spin_unlock_bh(&se_tpg->session_lock); + return; + } + sess->time2retain_timer_flags |= ISCSI_TF_EXPIRED; + + pr_err("Time2Retain timer expired for SID: %u, cleaning up" + " iSCSI session.\n", sess->sid); + { + struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; + + if (tiqn) { + spin_lock(&tiqn->sess_err_stats.lock); + strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name, + (void *)sess->sess_ops->InitiatorName); + tiqn->sess_err_stats.last_sess_failure_type = + ISCSI_SESS_ERR_CXN_TIMEOUT; + tiqn->sess_err_stats.cxn_timeout_errors++; + sess->conn_timeout_errors++; + spin_unlock(&tiqn->sess_err_stats.lock); + } + } + + spin_unlock_bh(&se_tpg->session_lock); + iscsit_close_session(sess); +} + +extern void iscsit_start_time2retain_handler(struct iscsi_session *sess) +{ + int tpg_active; + /* + * Only start Time2Retain timer when the assoicated TPG is still in + * an ACTIVE (eg: not disabled or shutdown) state. + */ + spin_lock(&ISCSI_TPG_S(sess)->tpg_state_lock); + tpg_active = (ISCSI_TPG_S(sess)->tpg_state == TPG_STATE_ACTIVE); + spin_unlock(&ISCSI_TPG_S(sess)->tpg_state_lock); + + if (!tpg_active) + return; + + if (sess->time2retain_timer_flags & ISCSI_TF_RUNNING) + return; + + pr_debug("Starting Time2Retain timer for %u seconds on" + " SID: %u\n", sess->sess_ops->DefaultTime2Retain, sess->sid); + + init_timer(&sess->time2retain_timer); + sess->time2retain_timer.expires = + (get_jiffies_64() + sess->sess_ops->DefaultTime2Retain * HZ); + sess->time2retain_timer.data = (unsigned long)sess; + sess->time2retain_timer.function = iscsit_handle_time2retain_timeout; + sess->time2retain_timer_flags &= ~ISCSI_TF_STOP; + sess->time2retain_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&sess->time2retain_timer); +} + +/* + * Called with spin_lock_bh(&struct se_portal_group->session_lock) held + */ +extern int iscsit_stop_time2retain_timer(struct iscsi_session *sess) +{ + struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + + if (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED) + return -1; + + if (!(sess->time2retain_timer_flags & ISCSI_TF_RUNNING)) + return 0; + + sess->time2retain_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&se_tpg->session_lock); + + del_timer_sync(&sess->time2retain_timer); + + spin_lock_bh(&se_tpg->session_lock); + sess->time2retain_timer_flags &= ~ISCSI_TF_RUNNING; + pr_debug("Stopped Time2Retain Timer for SID: %u\n", + sess->sid); + return 0; +} + +void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->state_lock); + if (atomic_read(&conn->connection_exit)) { + spin_unlock_bh(&conn->state_lock); + goto sleep; + } + + if (atomic_read(&conn->transport_failed)) { + spin_unlock_bh(&conn->state_lock); + goto sleep; + } + spin_unlock_bh(&conn->state_lock); + + iscsi_thread_set_force_reinstatement(conn); + +sleep: + wait_for_completion(&conn->conn_wait_rcfr_comp); + complete(&conn->conn_post_wait_comp); +} + +void iscsit_cause_connection_reinstatement(struct iscsi_conn *conn, int sleep) +{ + spin_lock_bh(&conn->state_lock); + if (atomic_read(&conn->connection_exit)) { + spin_unlock_bh(&conn->state_lock); + return; + } + + if (atomic_read(&conn->transport_failed)) { + spin_unlock_bh(&conn->state_lock); + return; + } + + if (atomic_read(&conn->connection_reinstatement)) { + spin_unlock_bh(&conn->state_lock); + return; + } + + if (iscsi_thread_set_force_reinstatement(conn) < 0) { + spin_unlock_bh(&conn->state_lock); + return; + } + + atomic_set(&conn->connection_reinstatement, 1); + if (!sleep) { + spin_unlock_bh(&conn->state_lock); + return; + } + + atomic_set(&conn->sleep_on_conn_wait_comp, 1); + spin_unlock_bh(&conn->state_lock); + + wait_for_completion(&conn->conn_wait_comp); + complete(&conn->conn_post_wait_comp); +} + +void iscsit_fall_back_to_erl0(struct iscsi_session *sess) +{ + pr_debug("Falling back to ErrorRecoveryLevel=0 for SID:" + " %u\n", sess->sid); + + atomic_set(&sess->session_fall_back_to_erl0, 1); +} + +static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + + if ((sess->sess_ops->ErrorRecoveryLevel == 2) && + !atomic_read(&sess->session_reinstatement) && + !atomic_read(&sess->session_fall_back_to_erl0)) + iscsit_connection_recovery_transport_reset(conn); + else { + pr_debug("Performing cleanup for failed iSCSI" + " Connection ID: %hu from %s\n", conn->cid, + sess->sess_ops->InitiatorName); + iscsit_close_connection(conn); + } +} + +extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->state_lock); + if (atomic_read(&conn->connection_exit)) { + spin_unlock_bh(&conn->state_lock); + return; + } + atomic_set(&conn->connection_exit, 1); + + if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) { + spin_unlock_bh(&conn->state_lock); + iscsit_close_connection(conn); + return; + } + + if (conn->conn_state == TARG_CONN_STATE_CLEANUP_WAIT) { + spin_unlock_bh(&conn->state_lock); + return; + } + + pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n"); + conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT; + spin_unlock_bh(&conn->state_lock); + + iscsit_handle_connection_cleanup(conn); +} + +/* + * This is the simple function that makes the magic of + * sync and steering happen in the follow paradoxical order: + * + * 0) Receive conn->of_marker (bytes left until next OFMarker) + * bytes into an offload buffer. When we pass the exact number + * of bytes in conn->of_marker, iscsit_dump_data_payload() and hence + * rx_data() will automatically receive the identical u32 marker + * values and store it in conn->of_marker_offset; + * 1) Now conn->of_marker_offset will contain the offset to the start + * of the next iSCSI PDU. Dump these remaining bytes into another + * offload buffer. + * 2) We are done! + * Next byte in the TCP stream will contain the next iSCSI PDU! + * Cool Huh?! + */ +int iscsit_recover_from_unknown_opcode(struct iscsi_conn *conn) +{ + /* + * Make sure the remaining bytes to next maker is a sane value. + */ + if (conn->of_marker > (conn->conn_ops->OFMarkInt * 4)) { + pr_err("Remaining bytes to OFMarker: %u exceeds" + " OFMarkInt bytes: %u.\n", conn->of_marker, + conn->conn_ops->OFMarkInt * 4); + return -1; + } + + pr_debug("Advancing %u bytes in TCP stream to get to the" + " next OFMarker.\n", conn->of_marker); + + if (iscsit_dump_data_payload(conn, conn->of_marker, 0) < 0) + return -1; + + /* + * Make sure the offset marker we retrived is a valid value. + */ + if (conn->of_marker_offset > (ISCSI_HDR_LEN + (ISCSI_CRC_LEN * 2) + + conn->conn_ops->MaxRecvDataSegmentLength)) { + pr_err("OfMarker offset value: %u exceeds limit.\n", + conn->of_marker_offset); + return -1; + } + + pr_debug("Discarding %u bytes of TCP stream to get to the" + " next iSCSI Opcode.\n", conn->of_marker_offset); + + if (iscsit_dump_data_payload(conn, conn->of_marker_offset, 0) < 0) + return -1; + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h new file mode 100644 index 000000000000..21acc9a06376 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl0.h @@ -0,0 +1,15 @@ +#ifndef ISCSI_TARGET_ERL0_H +#define ISCSI_TARGET_ERL0_H + +extern void iscsit_set_dataout_sequence_values(struct iscsi_cmd *); +extern int iscsit_check_pre_dataout(struct iscsi_cmd *, unsigned char *); +extern int iscsit_check_post_dataout(struct iscsi_cmd *, unsigned char *, u8); +extern void iscsit_start_time2retain_handler(struct iscsi_session *); +extern int iscsit_stop_time2retain_timer(struct iscsi_session *); +extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *); +extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); +extern void iscsit_fall_back_to_erl0(struct iscsi_session *); +extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *); +extern int iscsit_recover_from_unknown_opcode(struct iscsi_conn *); + +#endif /*** ISCSI_TARGET_ERL0_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c new file mode 100644 index 000000000000..980650792cf6 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -0,0 +1,1299 @@ +/******************************************************************************* + * This file contains error recovery level one used by the iSCSI Target driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/list.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_device.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target.h" + +#define OFFLOAD_BUF_SIZE 32768 + +/* + * Used to dump excess datain payload for certain error recovery + * situations. Receive in OFFLOAD_BUF_SIZE max of datain per rx_data(). + * + * dump_padding_digest denotes if padding and data digests need + * to be dumped. + */ +int iscsit_dump_data_payload( + struct iscsi_conn *conn, + u32 buf_len, + int dump_padding_digest) +{ + char *buf, pad_bytes[4]; + int ret = DATAOUT_WITHIN_COMMAND_RECOVERY, rx_got; + u32 length, padding, offset = 0, size; + struct kvec iov; + + length = (buf_len > OFFLOAD_BUF_SIZE) ? OFFLOAD_BUF_SIZE : buf_len; + + buf = kzalloc(length, GFP_ATOMIC); + if (!buf) { + pr_err("Unable to allocate %u bytes for offload" + " buffer.\n", length); + return -1; + } + memset(&iov, 0, sizeof(struct kvec)); + + while (offset < buf_len) { + size = ((offset + length) > buf_len) ? + (buf_len - offset) : length; + + iov.iov_len = size; + iov.iov_base = buf; + + rx_got = rx_data(conn, &iov, 1, size); + if (rx_got != size) { + ret = DATAOUT_CANNOT_RECOVER; + goto out; + } + + offset += size; + } + + if (!dump_padding_digest) + goto out; + + padding = ((-buf_len) & 3); + if (padding != 0) { + iov.iov_len = padding; + iov.iov_base = pad_bytes; + + rx_got = rx_data(conn, &iov, 1, padding); + if (rx_got != padding) { + ret = DATAOUT_CANNOT_RECOVER; + goto out; + } + } + + if (conn->conn_ops->DataDigest) { + u32 data_crc; + + iov.iov_len = ISCSI_CRC_LEN; + iov.iov_base = &data_crc; + + rx_got = rx_data(conn, &iov, 1, ISCSI_CRC_LEN); + if (rx_got != ISCSI_CRC_LEN) { + ret = DATAOUT_CANNOT_RECOVER; + goto out; + } + } + +out: + kfree(buf); + return ret; +} + +/* + * Used for retransmitting R2Ts from a R2T SNACK request. + */ +static int iscsit_send_recovery_r2t_for_snack( + struct iscsi_cmd *cmd, + struct iscsi_r2t *r2t) +{ + /* + * If the struct iscsi_r2t has not been sent yet, we can safely + * ignore retransmission + * of the R2TSN in question. + */ + spin_lock_bh(&cmd->r2t_lock); + if (!r2t->sent_r2t) { + spin_unlock_bh(&cmd->r2t_lock); + return 0; + } + r2t->sent_r2t = 0; + spin_unlock_bh(&cmd->r2t_lock); + + iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T); + + return 0; +} + +static int iscsit_handle_r2t_snack( + struct iscsi_cmd *cmd, + unsigned char *buf, + u32 begrun, + u32 runlength) +{ + u32 last_r2tsn; + struct iscsi_r2t *r2t; + + /* + * Make sure the initiator is not requesting retransmission + * of R2TSNs already acknowledged by a TMR TASK_REASSIGN. + */ + if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) && + (begrun <= cmd->acked_data_sn)) { + pr_err("ITT: 0x%08x, R2T SNACK requesting" + " retransmission of R2TSN: 0x%08x to 0x%08x but already" + " acked to R2TSN: 0x%08x by TMR TASK_REASSIGN," + " protocol error.\n", cmd->init_task_tag, begrun, + (begrun + runlength), cmd->acked_data_sn); + + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + + if (runlength) { + if ((begrun + runlength) > cmd->r2t_sn) { + pr_err("Command ITT: 0x%08x received R2T SNACK" + " with BegRun: 0x%08x, RunLength: 0x%08x, exceeds" + " current R2TSN: 0x%08x, protocol error.\n", + cmd->init_task_tag, begrun, runlength, cmd->r2t_sn); + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_INVALID, 1, 0, buf, cmd); + } + last_r2tsn = (begrun + runlength); + } else + last_r2tsn = cmd->r2t_sn; + + while (begrun < last_r2tsn) { + r2t = iscsit_get_holder_for_r2tsn(cmd, begrun); + if (!r2t) + return -1; + if (iscsit_send_recovery_r2t_for_snack(cmd, r2t) < 0) + return -1; + + begrun++; + } + + return 0; +} + +/* + * Generates Offsets and NextBurstLength based on Begrun and Runlength + * carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN. + * + * For DataSequenceInOrder=Yes and DataPDUInOrder=[Yes,No] only. + * + * FIXME: How is this handled for a RData SNACK? + */ +int iscsit_create_recovery_datain_values_datasequenceinorder_yes( + struct iscsi_cmd *cmd, + struct iscsi_datain_req *dr) +{ + u32 data_sn = 0, data_sn_count = 0; + u32 pdu_start = 0, seq_no = 0; + u32 begrun = dr->begrun; + struct iscsi_conn *conn = cmd->conn; + + while (begrun > data_sn++) { + data_sn_count++; + if ((dr->next_burst_len + + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) { + dr->read_data_done += + conn->conn_ops->MaxRecvDataSegmentLength; + dr->next_burst_len += + conn->conn_ops->MaxRecvDataSegmentLength; + } else { + dr->read_data_done += + (conn->sess->sess_ops->MaxBurstLength - + dr->next_burst_len); + dr->next_burst_len = 0; + pdu_start += data_sn_count; + data_sn_count = 0; + seq_no++; + } + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + cmd->seq_no = seq_no; + cmd->pdu_start = pdu_start; + cmd->pdu_send_order = data_sn_count; + } + + return 0; +} + +/* + * Generates Offsets and NextBurstLength based on Begrun and Runlength + * carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN. + * + * For DataSequenceInOrder=No and DataPDUInOrder=[Yes,No] only. + * + * FIXME: How is this handled for a RData SNACK? + */ +int iscsit_create_recovery_datain_values_datasequenceinorder_no( + struct iscsi_cmd *cmd, + struct iscsi_datain_req *dr) +{ + int found_seq = 0, i; + u32 data_sn, read_data_done = 0, seq_send_order = 0; + u32 begrun = dr->begrun; + u32 runlength = dr->runlength; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_seq *first_seq = NULL, *seq = NULL; + + if (!cmd->seq_list) { + pr_err("struct iscsi_cmd->seq_list is NULL!\n"); + return -1; + } + + /* + * Calculate read_data_done for all sequences containing a + * first_datasn and last_datasn less than the BegRun. + * + * Locate the struct iscsi_seq the BegRun lies within and calculate + * NextBurstLenghth up to the DataSN based on MaxRecvDataSegmentLength. + * + * Also use struct iscsi_seq->seq_send_order to determine where to start. + */ + for (i = 0; i < cmd->seq_count; i++) { + seq = &cmd->seq_list[i]; + + if (!seq->seq_send_order) + first_seq = seq; + + /* + * No data has been transferred for this DataIN sequence, so the + * seq->first_datasn and seq->last_datasn have not been set. + */ + if (!seq->sent) { +#if 0 + pr_err("Ignoring non-sent sequence 0x%08x ->" + " 0x%08x\n\n", seq->first_datasn, + seq->last_datasn); +#endif + continue; + } + + /* + * This DataIN sequence is precedes the received BegRun, add the + * total xfer_len of the sequence to read_data_done and reset + * seq->pdu_send_order. + */ + if ((seq->first_datasn < begrun) && + (seq->last_datasn < begrun)) { +#if 0 + pr_err("Pre BegRun sequence 0x%08x ->" + " 0x%08x\n", seq->first_datasn, + seq->last_datasn); +#endif + read_data_done += cmd->seq_list[i].xfer_len; + seq->next_burst_len = seq->pdu_send_order = 0; + continue; + } + + /* + * The BegRun lies within this DataIN sequence. + */ + if ((seq->first_datasn <= begrun) && + (seq->last_datasn >= begrun)) { +#if 0 + pr_err("Found sequence begrun: 0x%08x in" + " 0x%08x -> 0x%08x\n", begrun, + seq->first_datasn, seq->last_datasn); +#endif + seq_send_order = seq->seq_send_order; + data_sn = seq->first_datasn; + seq->next_burst_len = seq->pdu_send_order = 0; + found_seq = 1; + + /* + * For DataPDUInOrder=Yes, while the first DataSN of + * the sequence is less than the received BegRun, add + * the MaxRecvDataSegmentLength to read_data_done and + * to the sequence's next_burst_len; + * + * For DataPDUInOrder=No, while the first DataSN of the + * sequence is less than the received BegRun, find the + * struct iscsi_pdu of the DataSN in question and add the + * MaxRecvDataSegmentLength to read_data_done and to the + * sequence's next_burst_len; + */ + if (conn->sess->sess_ops->DataPDUInOrder) { + while (data_sn < begrun) { + seq->pdu_send_order++; + read_data_done += + conn->conn_ops->MaxRecvDataSegmentLength; + seq->next_burst_len += + conn->conn_ops->MaxRecvDataSegmentLength; + data_sn++; + } + } else { + int j; + struct iscsi_pdu *pdu; + + while (data_sn < begrun) { + seq->pdu_send_order++; + + for (j = 0; j < seq->pdu_count; j++) { + pdu = &cmd->pdu_list[ + seq->pdu_start + j]; + if (pdu->data_sn == data_sn) { + read_data_done += + pdu->length; + seq->next_burst_len += + pdu->length; + } + } + data_sn++; + } + } + continue; + } + + /* + * This DataIN sequence is larger than the received BegRun, + * reset seq->pdu_send_order and continue. + */ + if ((seq->first_datasn > begrun) || + (seq->last_datasn > begrun)) { +#if 0 + pr_err("Post BegRun sequence 0x%08x -> 0x%08x\n", + seq->first_datasn, seq->last_datasn); +#endif + seq->next_burst_len = seq->pdu_send_order = 0; + continue; + } + } + + if (!found_seq) { + if (!begrun) { + if (!first_seq) { + pr_err("ITT: 0x%08x, Begrun: 0x%08x" + " but first_seq is NULL\n", + cmd->init_task_tag, begrun); + return -1; + } + seq_send_order = first_seq->seq_send_order; + seq->next_burst_len = seq->pdu_send_order = 0; + goto done; + } + + pr_err("Unable to locate struct iscsi_seq for ITT: 0x%08x," + " BegRun: 0x%08x, RunLength: 0x%08x while" + " DataSequenceInOrder=No and DataPDUInOrder=%s.\n", + cmd->init_task_tag, begrun, runlength, + (conn->sess->sess_ops->DataPDUInOrder) ? "Yes" : "No"); + return -1; + } + +done: + dr->read_data_done = read_data_done; + dr->seq_send_order = seq_send_order; + + return 0; +} + +static int iscsit_handle_recovery_datain( + struct iscsi_cmd *cmd, + unsigned char *buf, + u32 begrun, + u32 runlength) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct se_cmd *se_cmd = &cmd->se_cmd; + + if (!atomic_read(&se_cmd->t_transport_complete)) { + pr_err("Ignoring ITT: 0x%08x Data SNACK\n", + cmd->init_task_tag); + return 0; + } + + /* + * Make sure the initiator is not requesting retransmission + * of DataSNs already acknowledged by a Data ACK SNACK. + */ + if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) && + (begrun <= cmd->acked_data_sn)) { + pr_err("ITT: 0x%08x, Data SNACK requesting" + " retransmission of DataSN: 0x%08x to 0x%08x but" + " already acked to DataSN: 0x%08x by Data ACK SNACK," + " protocol error.\n", cmd->init_task_tag, begrun, + (begrun + runlength), cmd->acked_data_sn); + + return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + + /* + * Make sure BegRun and RunLength in the Data SNACK are sane. + * Note: (cmd->data_sn - 1) will carry the maximum DataSN sent. + */ + if ((begrun + runlength) > (cmd->data_sn - 1)) { + pr_err("Initiator requesting BegRun: 0x%08x, RunLength" + ": 0x%08x greater than maximum DataSN: 0x%08x.\n", + begrun, runlength, (cmd->data_sn - 1)); + return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, + 1, 0, buf, cmd); + } + + dr = iscsit_allocate_datain_req(); + if (!dr) + return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 0, buf, cmd); + + dr->data_sn = dr->begrun = begrun; + dr->runlength = runlength; + dr->generate_recovery_values = 1; + dr->recovery = DATAIN_WITHIN_COMMAND_RECOVERY; + + iscsit_attach_datain_req(cmd, dr); + + cmd->i_state = ISTATE_SEND_DATAIN; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + return 0; +} + +int iscsit_handle_recovery_datain_or_r2t( + struct iscsi_conn *conn, + unsigned char *buf, + u32 init_task_tag, + u32 targ_xfer_tag, + u32 begrun, + u32 runlength) +{ + struct iscsi_cmd *cmd; + + cmd = iscsit_find_cmd_from_itt(conn, init_task_tag); + if (!cmd) + return 0; + + /* + * FIXME: This will not work for bidi commands. + */ + switch (cmd->data_direction) { + case DMA_TO_DEVICE: + return iscsit_handle_r2t_snack(cmd, buf, begrun, runlength); + case DMA_FROM_DEVICE: + return iscsit_handle_recovery_datain(cmd, buf, begrun, + runlength); + default: + pr_err("Unknown cmd->data_direction: 0x%02x\n", + cmd->data_direction); + return -1; + } + + return 0; +} + +/* #warning FIXME: Status SNACK needs to be dependent on OPCODE!!! */ +int iscsit_handle_status_snack( + struct iscsi_conn *conn, + u32 init_task_tag, + u32 targ_xfer_tag, + u32 begrun, + u32 runlength) +{ + struct iscsi_cmd *cmd = NULL; + u32 last_statsn; + int found_cmd; + + if (conn->exp_statsn > begrun) { + pr_err("Got Status SNACK Begrun: 0x%08x, RunLength:" + " 0x%08x but already got ExpStatSN: 0x%08x on CID:" + " %hu.\n", begrun, runlength, conn->exp_statsn, + conn->cid); + return 0; + } + + last_statsn = (!runlength) ? conn->stat_sn : (begrun + runlength); + + while (begrun < last_statsn) { + found_cmd = 0; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->stat_sn == begrun) { + found_cmd = 1; + break; + } + } + spin_unlock_bh(&conn->cmd_lock); + + if (!found_cmd) { + pr_err("Unable to find StatSN: 0x%08x for" + " a Status SNACK, assuming this was a" + " protactic SNACK for an untransmitted" + " StatSN, ignoring.\n", begrun); + begrun++; + continue; + } + + spin_lock_bh(&cmd->istate_lock); + if (cmd->i_state == ISTATE_SEND_DATAIN) { + spin_unlock_bh(&cmd->istate_lock); + pr_err("Ignoring Status SNACK for BegRun:" + " 0x%08x, RunLength: 0x%08x, assuming this was" + " a protactic SNACK for an untransmitted" + " StatSN\n", begrun, runlength); + begrun++; + continue; + } + spin_unlock_bh(&cmd->istate_lock); + + cmd->i_state = ISTATE_SEND_STATUS_RECOVERY; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + begrun++; + } + + return 0; +} + +int iscsit_handle_data_ack( + struct iscsi_conn *conn, + u32 targ_xfer_tag, + u32 begrun, + u32 runlength) +{ + struct iscsi_cmd *cmd = NULL; + + cmd = iscsit_find_cmd_from_ttt(conn, targ_xfer_tag); + if (!cmd) { + pr_err("Data ACK SNACK for TTT: 0x%08x is" + " invalid.\n", targ_xfer_tag); + return -1; + } + + if (begrun <= cmd->acked_data_sn) { + pr_err("ITT: 0x%08x Data ACK SNACK BegRUN: 0x%08x is" + " less than the already acked DataSN: 0x%08x.\n", + cmd->init_task_tag, begrun, cmd->acked_data_sn); + return -1; + } + + /* + * For Data ACK SNACK, BegRun is the next expected DataSN. + * (see iSCSI v19: 10.16.6) + */ + cmd->cmd_flags |= ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = (begrun - 1); + + pr_debug("Received Data ACK SNACK for ITT: 0x%08x," + " updated acked DataSN to 0x%08x.\n", + cmd->init_task_tag, cmd->acked_data_sn); + + return 0; +} + +static int iscsit_send_recovery_r2t( + struct iscsi_cmd *cmd, + u32 offset, + u32 xfer_len) +{ + int ret; + + spin_lock_bh(&cmd->r2t_lock); + ret = iscsit_add_r2t_to_list(cmd, offset, xfer_len, 1, 0); + spin_unlock_bh(&cmd->r2t_lock); + + return ret; +} + +int iscsit_dataout_datapduinorder_no_fbit( + struct iscsi_cmd *cmd, + struct iscsi_pdu *pdu) +{ + int i, send_recovery_r2t = 0, recovery = 0; + u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *first_pdu = NULL; + + /* + * Get an struct iscsi_pdu pointer to the first PDU, and total PDU count + * of the DataOUT sequence. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + for (i = 0; i < cmd->pdu_count; i++) { + if (cmd->pdu_list[i].seq_no == pdu->seq_no) { + if (!first_pdu) + first_pdu = &cmd->pdu_list[i]; + xfer_len += cmd->pdu_list[i].length; + pdu_count++; + } else if (pdu_count) + break; + } + } else { + struct iscsi_seq *seq = cmd->seq_ptr; + + first_pdu = &cmd->pdu_list[seq->pdu_start]; + pdu_count = seq->pdu_count; + } + + if (!first_pdu || !pdu_count) + return DATAOUT_CANNOT_RECOVER; + + /* + * Loop through the ending DataOUT Sequence checking each struct iscsi_pdu. + * The following ugly logic does batching of not received PDUs. + */ + for (i = 0; i < pdu_count; i++) { + if (first_pdu[i].status == ISCSI_PDU_RECEIVED_OK) { + if (!send_recovery_r2t) + continue; + + if (iscsit_send_recovery_r2t(cmd, offset, length) < 0) + return DATAOUT_CANNOT_RECOVER; + + send_recovery_r2t = length = offset = 0; + continue; + } + /* + * Set recovery = 1 for any missing, CRC failed, or timed + * out PDUs to let the DataOUT logic know that this sequence + * has not been completed yet. + * + * Also, only send a Recovery R2T for ISCSI_PDU_NOT_RECEIVED. + * We assume if the PDU either failed CRC or timed out + * that a Recovery R2T has already been sent. + */ + recovery = 1; + + if (first_pdu[i].status != ISCSI_PDU_NOT_RECEIVED) + continue; + + if (!offset) + offset = first_pdu[i].offset; + length += first_pdu[i].length; + + send_recovery_r2t = 1; + } + + if (send_recovery_r2t) + if (iscsit_send_recovery_r2t(cmd, offset, length) < 0) + return DATAOUT_CANNOT_RECOVER; + + return (!recovery) ? DATAOUT_NORMAL : DATAOUT_WITHIN_COMMAND_RECOVERY; +} + +static int iscsit_recalculate_dataout_values( + struct iscsi_cmd *cmd, + u32 pdu_offset, + u32 pdu_length, + u32 *r2t_offset, + u32 *r2t_length) +{ + int i; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu = NULL; + + if (conn->sess->sess_ops->DataSequenceInOrder) { + cmd->data_sn = 0; + + if (conn->sess->sess_ops->DataPDUInOrder) { + *r2t_offset = cmd->write_data_done; + *r2t_length = (cmd->seq_end_offset - + cmd->write_data_done); + return 0; + } + + *r2t_offset = cmd->seq_start_offset; + *r2t_length = (cmd->seq_end_offset - cmd->seq_start_offset); + + for (i = 0; i < cmd->pdu_count; i++) { + pdu = &cmd->pdu_list[i]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + if ((pdu->offset >= cmd->seq_start_offset) && + ((pdu->offset + pdu->length) <= + cmd->seq_end_offset)) { + if (!cmd->unsolicited_data) + cmd->next_burst_len -= pdu->length; + else + cmd->first_burst_len -= pdu->length; + + cmd->write_data_done -= pdu->length; + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + } else { + struct iscsi_seq *seq = NULL; + + seq = iscsit_get_seq_holder(cmd, pdu_offset, pdu_length); + if (!seq) + return -1; + + *r2t_offset = seq->orig_offset; + *r2t_length = seq->xfer_len; + + cmd->write_data_done -= (seq->offset - seq->orig_offset); + if (cmd->immediate_data) + cmd->first_burst_len = cmd->write_data_done; + + seq->data_sn = 0; + seq->offset = seq->orig_offset; + seq->next_burst_len = 0; + seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY; + + if (conn->sess->sess_ops->DataPDUInOrder) + return 0; + + for (i = 0; i < seq->pdu_count; i++) { + pdu = &cmd->pdu_list[i+seq->pdu_start]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + + return 0; +} + +int iscsit_recover_dataout_sequence( + struct iscsi_cmd *cmd, + u32 pdu_offset, + u32 pdu_length) +{ + u32 r2t_length = 0, r2t_offset = 0; + + spin_lock_bh(&cmd->istate_lock); + cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY; + spin_unlock_bh(&cmd->istate_lock); + + if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length, + &r2t_offset, &r2t_length) < 0) + return DATAOUT_CANNOT_RECOVER; + + iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length); + + return DATAOUT_WITHIN_COMMAND_RECOVERY; +} + +static struct iscsi_ooo_cmdsn *iscsit_allocate_ooo_cmdsn(void) +{ + struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL; + + ooo_cmdsn = kmem_cache_zalloc(lio_ooo_cache, GFP_ATOMIC); + if (!ooo_cmdsn) { + pr_err("Unable to allocate memory for" + " struct iscsi_ooo_cmdsn.\n"); + return NULL; + } + INIT_LIST_HEAD(&ooo_cmdsn->ooo_list); + + return ooo_cmdsn; +} + +/* + * Called with sess->cmdsn_mutex held. + */ +static int iscsit_attach_ooo_cmdsn( + struct iscsi_session *sess, + struct iscsi_ooo_cmdsn *ooo_cmdsn) +{ + struct iscsi_ooo_cmdsn *ooo_tail, *ooo_tmp; + /* + * We attach the struct iscsi_ooo_cmdsn entry to the out of order + * list in increasing CmdSN order. + * This allows iscsi_execute_ooo_cmdsns() to detect any + * additional CmdSN holes while performing delayed execution. + */ + if (list_empty(&sess->sess_ooo_cmdsn_list)) + list_add_tail(&ooo_cmdsn->ooo_list, + &sess->sess_ooo_cmdsn_list); + else { + ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev, + typeof(*ooo_tail), ooo_list); + /* + * CmdSN is greater than the tail of the list. + */ + if (ooo_tail->cmdsn < ooo_cmdsn->cmdsn) + list_add_tail(&ooo_cmdsn->ooo_list, + &sess->sess_ooo_cmdsn_list); + else { + /* + * CmdSN is either lower than the head, or somewhere + * in the middle. + */ + list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list, + ooo_list) { + while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn) + continue; + + list_add(&ooo_cmdsn->ooo_list, + &ooo_tmp->ooo_list); + break; + } + } + } + + return 0; +} + +/* + * Removes an struct iscsi_ooo_cmdsn from a session's list, + * called with struct iscsi_session->cmdsn_mutex held. + */ +void iscsit_remove_ooo_cmdsn( + struct iscsi_session *sess, + struct iscsi_ooo_cmdsn *ooo_cmdsn) +{ + list_del(&ooo_cmdsn->ooo_list); + kmem_cache_free(lio_ooo_cache, ooo_cmdsn); +} + +void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *conn) +{ + struct iscsi_ooo_cmdsn *ooo_cmdsn; + struct iscsi_session *sess = conn->sess; + + mutex_lock(&sess->cmdsn_mutex); + list_for_each_entry(ooo_cmdsn, &sess->sess_ooo_cmdsn_list, ooo_list) { + if (ooo_cmdsn->cid != conn->cid) + continue; + + ooo_cmdsn->cmd = NULL; + } + mutex_unlock(&sess->cmdsn_mutex); +} + +/* + * Called with sess->cmdsn_mutex held. + */ +int iscsit_execute_ooo_cmdsns(struct iscsi_session *sess) +{ + int ooo_count = 0; + struct iscsi_cmd *cmd = NULL; + struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp; + + list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp, + &sess->sess_ooo_cmdsn_list, ooo_list) { + if (ooo_cmdsn->cmdsn != sess->exp_cmd_sn) + continue; + + if (!ooo_cmdsn->cmd) { + sess->exp_cmd_sn++; + iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn); + continue; + } + + cmd = ooo_cmdsn->cmd; + cmd->i_state = cmd->deferred_i_state; + ooo_count++; + sess->exp_cmd_sn++; + pr_debug("Executing out of order CmdSN: 0x%08x," + " incremented ExpCmdSN to 0x%08x.\n", + cmd->cmd_sn, sess->exp_cmd_sn); + + iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn); + + if (iscsit_execute_cmd(cmd, 1) < 0) + return -1; + + continue; + } + + return ooo_count; +} + +/* + * Called either: + * + * 1. With sess->cmdsn_mutex held from iscsi_execute_ooo_cmdsns() + * or iscsi_check_received_cmdsn(). + * 2. With no locks held directly from iscsi_handle_XXX_pdu() functions + * for immediate commands. + */ +int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo) +{ + struct se_cmd *se_cmd = &cmd->se_cmd; + int lr = 0; + + spin_lock_bh(&cmd->istate_lock); + if (ooo) + cmd->cmd_flags &= ~ICF_OOO_CMDSN; + + switch (cmd->iscsi_opcode) { + case ISCSI_OP_SCSI_CMD: + /* + * Go ahead and send the CHECK_CONDITION status for + * any SCSI CDB exceptions that may have occurred, also + * handle the SCF_SCSI_RESERVATION_CONFLICT case here as well. + */ + if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) { + if (se_cmd->se_cmd_flags & + SCF_SCSI_RESERVATION_CONFLICT) { + cmd->i_state = ISTATE_SEND_STATUS; + spin_unlock_bh(&cmd->istate_lock); + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, + cmd->i_state); + return 0; + } + spin_unlock_bh(&cmd->istate_lock); + /* + * Determine if delayed TASK_ABORTED status for WRITEs + * should be sent now if no unsolicited data out + * payloads are expected, or if the delayed status + * should be sent after unsolicited data out with + * ISCSI_FLAG_CMD_FINAL set in iscsi_handle_data_out() + */ + if (transport_check_aborted_status(se_cmd, + (cmd->unsolicited_data == 0)) != 0) + return 0; + /* + * Otherwise send CHECK_CONDITION and sense for + * exception + */ + return transport_send_check_condition_and_sense(se_cmd, + se_cmd->scsi_sense_reason, 0); + } + /* + * Special case for delayed CmdSN with Immediate + * Data and/or Unsolicited Data Out attached. + */ + if (cmd->immediate_data) { + if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) { + spin_unlock_bh(&cmd->istate_lock); + return transport_generic_handle_data( + &cmd->se_cmd); + } + spin_unlock_bh(&cmd->istate_lock); + + if (!(cmd->cmd_flags & + ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) { + /* + * Send the delayed TASK_ABORTED status for + * WRITEs if no more unsolicitied data is + * expected. + */ + if (transport_check_aborted_status(se_cmd, 1) + != 0) + return 0; + + iscsit_set_dataout_sequence_values(cmd); + iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 0); + } + return 0; + } + /* + * The default handler. + */ + spin_unlock_bh(&cmd->istate_lock); + + if ((cmd->data_direction == DMA_TO_DEVICE) && + !(cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) { + /* + * Send the delayed TASK_ABORTED status for WRITEs if + * no more nsolicitied data is expected. + */ + if (transport_check_aborted_status(se_cmd, 1) != 0) + return 0; + + iscsit_set_dataout_sequence_values(cmd); + spin_lock_bh(&cmd->dataout_timeout_lock); + iscsit_start_dataout_timer(cmd, cmd->conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + } + return transport_handle_cdb_direct(&cmd->se_cmd); + + case ISCSI_OP_NOOP_OUT: + case ISCSI_OP_TEXT: + spin_unlock_bh(&cmd->istate_lock); + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + break; + case ISCSI_OP_SCSI_TMFUNC: + if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) { + spin_unlock_bh(&cmd->istate_lock); + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, + cmd->i_state); + return 0; + } + spin_unlock_bh(&cmd->istate_lock); + + return transport_generic_handle_tmr(&cmd->se_cmd); + case ISCSI_OP_LOGOUT: + spin_unlock_bh(&cmd->istate_lock); + switch (cmd->logout_reason) { + case ISCSI_LOGOUT_REASON_CLOSE_SESSION: + lr = iscsit_logout_closesession(cmd, cmd->conn); + break; + case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION: + lr = iscsit_logout_closeconnection(cmd, cmd->conn); + break; + case ISCSI_LOGOUT_REASON_RECOVERY: + lr = iscsit_logout_removeconnforrecovery(cmd, cmd->conn); + break; + default: + pr_err("Unknown iSCSI Logout Request Code:" + " 0x%02x\n", cmd->logout_reason); + return -1; + } + + return lr; + default: + spin_unlock_bh(&cmd->istate_lock); + pr_err("Cannot perform out of order execution for" + " unknown iSCSI Opcode: 0x%02x\n", cmd->iscsi_opcode); + return -1; + } + + return 0; +} + +void iscsit_free_all_ooo_cmdsns(struct iscsi_session *sess) +{ + struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp; + + mutex_lock(&sess->cmdsn_mutex); + list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp, + &sess->sess_ooo_cmdsn_list, ooo_list) { + + list_del(&ooo_cmdsn->ooo_list); + kmem_cache_free(lio_ooo_cache, ooo_cmdsn); + } + mutex_unlock(&sess->cmdsn_mutex); +} + +int iscsit_handle_ooo_cmdsn( + struct iscsi_session *sess, + struct iscsi_cmd *cmd, + u32 cmdsn) +{ + int batch = 0; + struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL, *ooo_tail = NULL; + + cmd->deferred_i_state = cmd->i_state; + cmd->i_state = ISTATE_DEFERRED_CMD; + cmd->cmd_flags |= ICF_OOO_CMDSN; + + if (list_empty(&sess->sess_ooo_cmdsn_list)) + batch = 1; + else { + ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev, + typeof(*ooo_tail), ooo_list); + if (ooo_tail->cmdsn != (cmdsn - 1)) + batch = 1; + } + + ooo_cmdsn = iscsit_allocate_ooo_cmdsn(); + if (!ooo_cmdsn) + return CMDSN_ERROR_CANNOT_RECOVER; + + ooo_cmdsn->cmd = cmd; + ooo_cmdsn->batch_count = (batch) ? + (cmdsn - sess->exp_cmd_sn) : 1; + ooo_cmdsn->cid = cmd->conn->cid; + ooo_cmdsn->exp_cmdsn = sess->exp_cmd_sn; + ooo_cmdsn->cmdsn = cmdsn; + + if (iscsit_attach_ooo_cmdsn(sess, ooo_cmdsn) < 0) { + kmem_cache_free(lio_ooo_cache, ooo_cmdsn); + return CMDSN_ERROR_CANNOT_RECOVER; + } + + return CMDSN_HIGHER_THAN_EXP; +} + +static int iscsit_set_dataout_timeout_values( + struct iscsi_cmd *cmd, + u32 *offset, + u32 *length) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_r2t *r2t; + + if (cmd->unsolicited_data) { + *offset = 0; + *length = (conn->sess->sess_ops->FirstBurstLength > + cmd->data_length) ? + cmd->data_length : + conn->sess->sess_ops->FirstBurstLength; + return 0; + } + + spin_lock_bh(&cmd->r2t_lock); + if (list_empty(&cmd->cmd_r2t_list)) { + pr_err("cmd->cmd_r2t_list is empty!\n"); + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + if (r2t->sent_r2t && !r2t->recovery_r2t && !r2t->seq_complete) { + *offset = r2t->offset; + *length = r2t->xfer_len; + spin_unlock_bh(&cmd->r2t_lock); + return 0; + } + } + spin_unlock_bh(&cmd->r2t_lock); + + pr_err("Unable to locate any incomplete DataOUT" + " sequences for ITT: 0x%08x.\n", cmd->init_task_tag); + + return -1; +} + +/* + * NOTE: Called from interrupt (timer) context. + */ +static void iscsit_handle_dataout_timeout(unsigned long data) +{ + u32 pdu_length = 0, pdu_offset = 0; + u32 r2t_length = 0, r2t_offset = 0; + struct iscsi_cmd *cmd = (struct iscsi_cmd *) data; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_session *sess = NULL; + struct iscsi_node_attrib *na; + + iscsit_inc_conn_usage_count(conn); + + spin_lock_bh(&cmd->dataout_timeout_lock); + if (cmd->dataout_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&cmd->dataout_timeout_lock); + iscsit_dec_conn_usage_count(conn); + return; + } + cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING; + sess = conn->sess; + na = iscsit_tpg_get_node_attrib(sess); + + if (!sess->sess_ops->ErrorRecoveryLevel) { + pr_debug("Unable to recover from DataOut timeout while" + " in ERL=0.\n"); + goto failure; + } + + if (++cmd->dataout_timeout_retries == na->dataout_timeout_retries) { + pr_debug("Command ITT: 0x%08x exceeded max retries" + " for DataOUT timeout %u, closing iSCSI connection.\n", + cmd->init_task_tag, na->dataout_timeout_retries); + goto failure; + } + + cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY; + + if (conn->sess->sess_ops->DataSequenceInOrder) { + if (conn->sess->sess_ops->DataPDUInOrder) { + pdu_offset = cmd->write_data_done; + if ((pdu_offset + (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len)) > cmd->data_length) + pdu_length = (cmd->data_length - + cmd->write_data_done); + else + pdu_length = (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len); + } else { + pdu_offset = cmd->seq_start_offset; + pdu_length = (cmd->seq_end_offset - + cmd->seq_start_offset); + } + } else { + if (iscsit_set_dataout_timeout_values(cmd, &pdu_offset, + &pdu_length) < 0) + goto failure; + } + + if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length, + &r2t_offset, &r2t_length) < 0) + goto failure; + + pr_debug("Command ITT: 0x%08x timed out waiting for" + " completion of %sDataOUT Sequence Offset: %u, Length: %u\n", + cmd->init_task_tag, (cmd->unsolicited_data) ? "Unsolicited " : + "", r2t_offset, r2t_length); + + if (iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length) < 0) + goto failure; + + iscsit_start_dataout_timer(cmd, conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + iscsit_dec_conn_usage_count(conn); + + return; + +failure: + spin_unlock_bh(&cmd->dataout_timeout_lock); + iscsit_cause_connection_reinstatement(conn, 0); + iscsit_dec_conn_usage_count(conn); +} + +void iscsit_mod_dataout_timer(struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess); + + spin_lock_bh(&cmd->dataout_timeout_lock); + if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&cmd->dataout_timeout_lock); + return; + } + + mod_timer(&cmd->dataout_timer, + (get_jiffies_64() + na->dataout_timeout * HZ)); + pr_debug("Updated DataOUT timer for ITT: 0x%08x", + cmd->init_task_tag); + spin_unlock_bh(&cmd->dataout_timeout_lock); +} + +/* + * Called with cmd->dataout_timeout_lock held. + */ +void iscsit_start_dataout_timer( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess); + + if (cmd->dataout_timer_flags & ISCSI_TF_RUNNING) + return; + + pr_debug("Starting DataOUT timer for ITT: 0x%08x on" + " CID: %hu.\n", cmd->init_task_tag, conn->cid); + + init_timer(&cmd->dataout_timer); + cmd->dataout_timer.expires = (get_jiffies_64() + na->dataout_timeout * HZ); + cmd->dataout_timer.data = (unsigned long)cmd; + cmd->dataout_timer.function = iscsit_handle_dataout_timeout; + cmd->dataout_timer_flags &= ~ISCSI_TF_STOP; + cmd->dataout_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&cmd->dataout_timer); +} + +void iscsit_stop_dataout_timer(struct iscsi_cmd *cmd) +{ + spin_lock_bh(&cmd->dataout_timeout_lock); + if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&cmd->dataout_timeout_lock); + return; + } + cmd->dataout_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&cmd->dataout_timeout_lock); + + del_timer_sync(&cmd->dataout_timer); + + spin_lock_bh(&cmd->dataout_timeout_lock); + cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING; + pr_debug("Stopped DataOUT Timer for ITT: 0x%08x\n", + cmd->init_task_tag); + spin_unlock_bh(&cmd->dataout_timeout_lock); +} diff --git a/drivers/target/iscsi/iscsi_target_erl1.h b/drivers/target/iscsi/iscsi_target_erl1.h new file mode 100644 index 000000000000..85e67e29de6b --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl1.h @@ -0,0 +1,26 @@ +#ifndef ISCSI_TARGET_ERL1_H +#define ISCSI_TARGET_ERL1_H + +extern int iscsit_dump_data_payload(struct iscsi_conn *, u32, int); +extern int iscsit_create_recovery_datain_values_datasequenceinorder_yes( + struct iscsi_cmd *, struct iscsi_datain_req *); +extern int iscsit_create_recovery_datain_values_datasequenceinorder_no( + struct iscsi_cmd *, struct iscsi_datain_req *); +extern int iscsit_handle_recovery_datain_or_r2t(struct iscsi_conn *, unsigned char *, + u32, u32, u32, u32); +extern int iscsit_handle_status_snack(struct iscsi_conn *, u32, u32, + u32, u32); +extern int iscsit_handle_data_ack(struct iscsi_conn *, u32, u32, u32); +extern int iscsit_dataout_datapduinorder_no_fbit(struct iscsi_cmd *, struct iscsi_pdu *); +extern int iscsit_recover_dataout_sequence(struct iscsi_cmd *, u32, u32); +extern void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *); +extern void iscsit_free_all_ooo_cmdsns(struct iscsi_session *); +extern int iscsit_execute_ooo_cmdsns(struct iscsi_session *); +extern int iscsit_execute_cmd(struct iscsi_cmd *, int); +extern int iscsit_handle_ooo_cmdsn(struct iscsi_session *, struct iscsi_cmd *, u32); +extern void iscsit_remove_ooo_cmdsn(struct iscsi_session *, struct iscsi_ooo_cmdsn *); +extern void iscsit_mod_dataout_timer(struct iscsi_cmd *); +extern void iscsit_start_dataout_timer(struct iscsi_cmd *, struct iscsi_conn *); +extern void iscsit_stop_dataout_timer(struct iscsi_cmd *); + +#endif /* ISCSI_TARGET_ERL1_H */ diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c new file mode 100644 index 000000000000..91a4d170bda4 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -0,0 +1,474 @@ +/******************************************************************************* + * This file contains error recovery level two functions used by + * the iSCSI Target driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_util.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target.h" + +/* + * FIXME: Does RData SNACK apply here as well? + */ +void iscsit_create_conn_recovery_datain_values( + struct iscsi_cmd *cmd, + u32 exp_data_sn) +{ + u32 data_sn = 0; + struct iscsi_conn *conn = cmd->conn; + + cmd->next_burst_len = 0; + cmd->read_data_done = 0; + + while (exp_data_sn > data_sn) { + if ((cmd->next_burst_len + + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) { + cmd->read_data_done += + conn->conn_ops->MaxRecvDataSegmentLength; + cmd->next_burst_len += + conn->conn_ops->MaxRecvDataSegmentLength; + } else { + cmd->read_data_done += + (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len); + cmd->next_burst_len = 0; + } + data_sn++; + } +} + +void iscsit_create_conn_recovery_dataout_values( + struct iscsi_cmd *cmd) +{ + u32 write_data_done = 0; + struct iscsi_conn *conn = cmd->conn; + + cmd->data_sn = 0; + cmd->next_burst_len = 0; + + while (cmd->write_data_done > write_data_done) { + if ((write_data_done + conn->sess->sess_ops->MaxBurstLength) <= + cmd->write_data_done) + write_data_done += conn->sess->sess_ops->MaxBurstLength; + else + break; + } + + cmd->write_data_done = write_data_done; +} + +static int iscsit_attach_active_connection_recovery_entry( + struct iscsi_session *sess, + struct iscsi_conn_recovery *cr) +{ + spin_lock(&sess->cr_a_lock); + list_add_tail(&cr->cr_list, &sess->cr_active_list); + spin_unlock(&sess->cr_a_lock); + + return 0; +} + +static int iscsit_attach_inactive_connection_recovery_entry( + struct iscsi_session *sess, + struct iscsi_conn_recovery *cr) +{ + spin_lock(&sess->cr_i_lock); + list_add_tail(&cr->cr_list, &sess->cr_inactive_list); + + sess->conn_recovery_count++; + pr_debug("Incremented connection recovery count to %u for" + " SID: %u\n", sess->conn_recovery_count, sess->sid); + spin_unlock(&sess->cr_i_lock); + + return 0; +} + +struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry( + struct iscsi_session *sess, + u16 cid) +{ + struct iscsi_conn_recovery *cr; + + spin_lock(&sess->cr_i_lock); + list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) { + if (cr->cid == cid) { + spin_unlock(&sess->cr_i_lock); + return cr; + } + } + spin_unlock(&sess->cr_i_lock); + + return NULL; +} + +void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) +{ + struct iscsi_cmd *cmd, *cmd_tmp; + struct iscsi_conn_recovery *cr, *cr_tmp; + + spin_lock(&sess->cr_a_lock); + list_for_each_entry_safe(cr, cr_tmp, &sess->cr_active_list, cr_list) { + list_del(&cr->cr_list); + spin_unlock(&sess->cr_a_lock); + + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, + &cr->conn_recovery_cmd_list, i_list) { + + list_del(&cmd->i_list); + cmd->conn = NULL; + spin_unlock(&cr->conn_recovery_cmd_lock); + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 1); + spin_lock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&cr->conn_recovery_cmd_lock); + spin_lock(&sess->cr_a_lock); + + kfree(cr); + } + spin_unlock(&sess->cr_a_lock); + + spin_lock(&sess->cr_i_lock); + list_for_each_entry_safe(cr, cr_tmp, &sess->cr_inactive_list, cr_list) { + list_del(&cr->cr_list); + spin_unlock(&sess->cr_i_lock); + + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, + &cr->conn_recovery_cmd_list, i_list) { + + list_del(&cmd->i_list); + cmd->conn = NULL; + spin_unlock(&cr->conn_recovery_cmd_lock); + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 1); + spin_lock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&cr->conn_recovery_cmd_lock); + spin_lock(&sess->cr_i_lock); + + kfree(cr); + } + spin_unlock(&sess->cr_i_lock); +} + +int iscsit_remove_active_connection_recovery_entry( + struct iscsi_conn_recovery *cr, + struct iscsi_session *sess) +{ + spin_lock(&sess->cr_a_lock); + list_del(&cr->cr_list); + + sess->conn_recovery_count--; + pr_debug("Decremented connection recovery count to %u for" + " SID: %u\n", sess->conn_recovery_count, sess->sid); + spin_unlock(&sess->cr_a_lock); + + kfree(cr); + + return 0; +} + +int iscsit_remove_inactive_connection_recovery_entry( + struct iscsi_conn_recovery *cr, + struct iscsi_session *sess) +{ + spin_lock(&sess->cr_i_lock); + list_del(&cr->cr_list); + spin_unlock(&sess->cr_i_lock); + + return 0; +} + +/* + * Called with cr->conn_recovery_cmd_lock help. + */ +int iscsit_remove_cmd_from_connection_recovery( + struct iscsi_cmd *cmd, + struct iscsi_session *sess) +{ + struct iscsi_conn_recovery *cr; + + if (!cmd->cr) { + pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x" + " is NULL!\n", cmd->init_task_tag); + BUG(); + } + cr = cmd->cr; + + list_del(&cmd->i_list); + return --cr->cmd_count; +} + +void iscsit_discard_cr_cmds_by_expstatsn( + struct iscsi_conn_recovery *cr, + u32 exp_statsn) +{ + u32 dropped_count = 0; + struct iscsi_cmd *cmd, *cmd_tmp; + struct iscsi_session *sess = cr->sess; + + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, + &cr->conn_recovery_cmd_list, i_list) { + + if (((cmd->deferred_i_state != ISTATE_SENT_STATUS) && + (cmd->deferred_i_state != ISTATE_REMOVE)) || + (cmd->stat_sn >= exp_statsn)) { + continue; + } + + dropped_count++; + pr_debug("Dropping Acknowledged ITT: 0x%08x, StatSN:" + " 0x%08x, CID: %hu.\n", cmd->init_task_tag, + cmd->stat_sn, cr->cid); + + iscsit_remove_cmd_from_connection_recovery(cmd, sess); + + spin_unlock(&cr->conn_recovery_cmd_lock); + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 0); + spin_lock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&cr->conn_recovery_cmd_lock); + + pr_debug("Dropped %u total acknowledged commands on" + " CID: %hu less than old ExpStatSN: 0x%08x\n", + dropped_count, cr->cid, exp_statsn); + + if (!cr->cmd_count) { + pr_debug("No commands to be reassigned for failed" + " connection CID: %hu on SID: %u\n", + cr->cid, sess->sid); + iscsit_remove_inactive_connection_recovery_entry(cr, sess); + iscsit_attach_active_connection_recovery_entry(sess, cr); + pr_debug("iSCSI connection recovery successful for CID:" + " %hu on SID: %u\n", cr->cid, sess->sid); + iscsit_remove_active_connection_recovery_entry(cr, sess); + } else { + iscsit_remove_inactive_connection_recovery_entry(cr, sess); + iscsit_attach_active_connection_recovery_entry(sess, cr); + } +} + +int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn) +{ + u32 dropped_count = 0; + struct iscsi_cmd *cmd, *cmd_tmp; + struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp; + struct iscsi_session *sess = conn->sess; + + mutex_lock(&sess->cmdsn_mutex); + list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp, + &sess->sess_ooo_cmdsn_list, ooo_list) { + + if (ooo_cmdsn->cid != conn->cid) + continue; + + dropped_count++; + pr_debug("Dropping unacknowledged CmdSN:" + " 0x%08x during connection recovery on CID: %hu\n", + ooo_cmdsn->cmdsn, conn->cid); + iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn); + } + mutex_unlock(&sess->cmdsn_mutex); + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) { + if (!(cmd->cmd_flags & ICF_OOO_CMDSN)) + continue; + + list_del(&cmd->i_list); + + spin_unlock_bh(&conn->cmd_lock); + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 1); + spin_lock_bh(&conn->cmd_lock); + } + spin_unlock_bh(&conn->cmd_lock); + + pr_debug("Dropped %u total unacknowledged commands on CID:" + " %hu for ExpCmdSN: 0x%08x.\n", dropped_count, conn->cid, + sess->exp_cmd_sn); + return 0; +} + +int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) +{ + u32 cmd_count = 0; + struct iscsi_cmd *cmd, *cmd_tmp; + struct iscsi_conn_recovery *cr; + + /* + * Allocate an struct iscsi_conn_recovery for this connection. + * Each struct iscsi_cmd contains an struct iscsi_conn_recovery pointer + * (struct iscsi_cmd->cr) so we need to allocate this before preparing the + * connection's command list for connection recovery. + */ + cr = kzalloc(sizeof(struct iscsi_conn_recovery), GFP_KERNEL); + if (!cr) { + pr_err("Unable to allocate memory for" + " struct iscsi_conn_recovery.\n"); + return -1; + } + INIT_LIST_HEAD(&cr->cr_list); + INIT_LIST_HEAD(&cr->conn_recovery_cmd_list); + spin_lock_init(&cr->conn_recovery_cmd_lock); + /* + * Only perform connection recovery on ISCSI_OP_SCSI_CMD or + * ISCSI_OP_NOOP_OUT opcodes. For all other opcodes call + * list_del(&cmd->i_list); to release the command to the + * session pool and remove it from the connection's list. + * + * Also stop the DataOUT timer, which will be restarted after + * sending the TMR response. + */ + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) { + + if ((cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD) && + (cmd->iscsi_opcode != ISCSI_OP_NOOP_OUT)) { + pr_debug("Not performing realligence on" + " Opcode: 0x%02x, ITT: 0x%08x, CmdSN: 0x%08x," + " CID: %hu\n", cmd->iscsi_opcode, + cmd->init_task_tag, cmd->cmd_sn, conn->cid); + + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 0); + spin_lock_bh(&conn->cmd_lock); + continue; + } + + /* + * Special case where commands greater than or equal to + * the session's ExpCmdSN are attached to the connection + * list but not to the out of order CmdSN list. The one + * obvious case is when a command with immediate data + * attached must only check the CmdSN against ExpCmdSN + * after the data is received. The special case below + * is when the connection fails before data is received, + * but also may apply to other PDUs, so it has been + * made generic here. + */ + if (!(cmd->cmd_flags & ICF_OOO_CMDSN) && !cmd->immediate_cmd && + (cmd->cmd_sn >= conn->sess->exp_cmd_sn)) { + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 1); + spin_lock_bh(&conn->cmd_lock); + continue; + } + + cmd_count++; + pr_debug("Preparing Opcode: 0x%02x, ITT: 0x%08x," + " CmdSN: 0x%08x, StatSN: 0x%08x, CID: %hu for" + " realligence.\n", cmd->iscsi_opcode, + cmd->init_task_tag, cmd->cmd_sn, cmd->stat_sn, + conn->cid); + + cmd->deferred_i_state = cmd->i_state; + cmd->i_state = ISTATE_IN_CONNECTION_RECOVERY; + + if (cmd->data_direction == DMA_TO_DEVICE) + iscsit_stop_dataout_timer(cmd); + + cmd->sess = conn->sess; + + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_free_all_datain_reqs(cmd); + + if ((cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) && + cmd->se_cmd.transport_wait_for_tasks) + cmd->se_cmd.transport_wait_for_tasks(&cmd->se_cmd, + 0, 0); + /* + * Add the struct iscsi_cmd to the connection recovery cmd list + */ + spin_lock(&cr->conn_recovery_cmd_lock); + list_add_tail(&cmd->i_list, &cr->conn_recovery_cmd_list); + spin_unlock(&cr->conn_recovery_cmd_lock); + + spin_lock_bh(&conn->cmd_lock); + cmd->cr = cr; + cmd->conn = NULL; + } + spin_unlock_bh(&conn->cmd_lock); + /* + * Fill in the various values in the preallocated struct iscsi_conn_recovery. + */ + cr->cid = conn->cid; + cr->cmd_count = cmd_count; + cr->maxrecvdatasegmentlength = conn->conn_ops->MaxRecvDataSegmentLength; + cr->sess = conn->sess; + + iscsit_attach_inactive_connection_recovery_entry(conn->sess, cr); + + return 0; +} + +int iscsit_connection_recovery_transport_reset(struct iscsi_conn *conn) +{ + atomic_set(&conn->connection_recovery, 1); + + if (iscsit_close_connection(conn) < 0) + return -1; + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_erl2.h b/drivers/target/iscsi/iscsi_target_erl2.h new file mode 100644 index 000000000000..22f8d24780a6 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl2.h @@ -0,0 +1,18 @@ +#ifndef ISCSI_TARGET_ERL2_H +#define ISCSI_TARGET_ERL2_H + +extern void iscsit_create_conn_recovery_datain_values(struct iscsi_cmd *, u32); +extern void iscsit_create_conn_recovery_dataout_values(struct iscsi_cmd *); +extern struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry( + struct iscsi_session *, u16); +extern void iscsit_free_connection_recovery_entires(struct iscsi_session *); +extern int iscsit_remove_active_connection_recovery_entry( + struct iscsi_conn_recovery *, struct iscsi_session *); +extern int iscsit_remove_cmd_from_connection_recovery(struct iscsi_cmd *, + struct iscsi_session *); +extern void iscsit_discard_cr_cmds_by_expstatsn(struct iscsi_conn_recovery *, u32); +extern int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *); +extern int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *); +extern int iscsit_connection_recovery_transport_reset(struct iscsi_conn *); + +#endif /*** ISCSI_TARGET_ERL2_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c new file mode 100644 index 000000000000..bcaf82f47037 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -0,0 +1,1232 @@ +/******************************************************************************* + * This file contains the login functions used by the iSCSI Target driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/string.h> +#include <linux/kthread.h> +#include <linux/crypto.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_tq.h" +#include "iscsi_target_device.h" +#include "iscsi_target_nego.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_login.h" +#include "iscsi_target_stat.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_parameters.h" + +extern struct idr sess_idr; +extern struct mutex auth_id_lock; +extern spinlock_t sess_idr_lock; + +static int iscsi_login_init_conn(struct iscsi_conn *conn) +{ + INIT_LIST_HEAD(&conn->conn_list); + INIT_LIST_HEAD(&conn->conn_cmd_list); + INIT_LIST_HEAD(&conn->immed_queue_list); + INIT_LIST_HEAD(&conn->response_queue_list); + init_completion(&conn->conn_post_wait_comp); + init_completion(&conn->conn_wait_comp); + init_completion(&conn->conn_wait_rcfr_comp); + init_completion(&conn->conn_waiting_on_uc_comp); + init_completion(&conn->conn_logout_comp); + init_completion(&conn->rx_half_close_comp); + init_completion(&conn->tx_half_close_comp); + spin_lock_init(&conn->cmd_lock); + spin_lock_init(&conn->conn_usage_lock); + spin_lock_init(&conn->immed_queue_lock); + spin_lock_init(&conn->nopin_timer_lock); + spin_lock_init(&conn->response_queue_lock); + spin_lock_init(&conn->state_lock); + + if (!zalloc_cpumask_var(&conn->conn_cpumask, GFP_KERNEL)) { + pr_err("Unable to allocate conn->conn_cpumask\n"); + return -ENOMEM; + } + + return 0; +} + +/* + * Used by iscsi_target_nego.c:iscsi_target_locate_portal() to setup + * per struct iscsi_conn libcrypto contexts for crc32c and crc32-intel + */ +int iscsi_login_setup_crypto(struct iscsi_conn *conn) +{ + /* + * Setup slicing by CRC32C algorithm for RX and TX libcrypto contexts + * which will default to crc32c_intel.ko for cpu_has_xmm4_2, or fallback + * to software 1x8 byte slicing from crc32c.ko + */ + conn->conn_rx_hash.flags = 0; + conn->conn_rx_hash.tfm = crypto_alloc_hash("crc32c", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(conn->conn_rx_hash.tfm)) { + pr_err("crypto_alloc_hash() failed for conn_rx_tfm\n"); + return -ENOMEM; + } + + conn->conn_tx_hash.flags = 0; + conn->conn_tx_hash.tfm = crypto_alloc_hash("crc32c", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(conn->conn_tx_hash.tfm)) { + pr_err("crypto_alloc_hash() failed for conn_tx_tfm\n"); + crypto_free_hash(conn->conn_rx_hash.tfm); + return -ENOMEM; + } + + return 0; +} + +static int iscsi_login_check_initiator_version( + struct iscsi_conn *conn, + u8 version_max, + u8 version_min) +{ + if ((version_max != 0x00) || (version_min != 0x00)) { + pr_err("Unsupported iSCSI IETF Pre-RFC Revision," + " version Min/Max 0x%02x/0x%02x, rejecting login.\n", + version_min, version_max); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_NO_VERSION); + return -1; + } + + return 0; +} + +int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) +{ + int sessiontype; + struct iscsi_param *initiatorname_param = NULL, *sessiontype_param = NULL; + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_session *sess = NULL, *sess_p = NULL; + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + struct se_session *se_sess, *se_sess_tmp; + + initiatorname_param = iscsi_find_param_from_key( + INITIATORNAME, conn->param_list); + if (!initiatorname_param) + return -1; + + sessiontype_param = iscsi_find_param_from_key( + SESSIONTYPE, conn->param_list); + if (!sessiontype_param) + return -1; + + sessiontype = (strncmp(sessiontype_param->value, NORMAL, 6)) ? 1 : 0; + + spin_lock_bh(&se_tpg->session_lock); + list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list, + sess_list) { + + sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr; + spin_lock(&sess_p->conn_lock); + if (atomic_read(&sess_p->session_fall_back_to_erl0) || + atomic_read(&sess_p->session_logout) || + (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { + spin_unlock(&sess_p->conn_lock); + continue; + } + if (!memcmp((void *)sess_p->isid, (void *)conn->sess->isid, 6) && + (!strcmp((void *)sess_p->sess_ops->InitiatorName, + (void *)initiatorname_param->value) && + (sess_p->sess_ops->SessionType == sessiontype))) { + atomic_set(&sess_p->session_reinstatement, 1); + spin_unlock(&sess_p->conn_lock); + iscsit_inc_session_usage_count(sess_p); + iscsit_stop_time2retain_timer(sess_p); + sess = sess_p; + break; + } + spin_unlock(&sess_p->conn_lock); + } + spin_unlock_bh(&se_tpg->session_lock); + /* + * If the Time2Retain handler has expired, the session is already gone. + */ + if (!sess) + return 0; + + pr_debug("%s iSCSI Session SID %u is still active for %s," + " preforming session reinstatement.\n", (sessiontype) ? + "Discovery" : "Normal", sess->sid, + sess->sess_ops->InitiatorName); + + spin_lock_bh(&sess->conn_lock); + if (sess->session_state == TARG_SESS_STATE_FAILED) { + spin_unlock_bh(&sess->conn_lock); + iscsit_dec_session_usage_count(sess); + return iscsit_close_session(sess); + } + spin_unlock_bh(&sess->conn_lock); + + iscsit_stop_session(sess, 1, 1); + iscsit_dec_session_usage_count(sess); + + return iscsit_close_session(sess); +} + +static void iscsi_login_set_conn_values( + struct iscsi_session *sess, + struct iscsi_conn *conn, + u16 cid) +{ + conn->sess = sess; + conn->cid = cid; + /* + * Generate a random Status sequence number (statsn) for the new + * iSCSI connection. + */ + get_random_bytes(&conn->stat_sn, sizeof(u32)); + + mutex_lock(&auth_id_lock); + conn->auth_id = iscsit_global->auth_id++; + mutex_unlock(&auth_id_lock); +} + +/* + * This is the leading connection of a new session, + * or session reinstatement. + */ +static int iscsi_login_zero_tsih_s1( + struct iscsi_conn *conn, + unsigned char *buf) +{ + struct iscsi_session *sess = NULL; + struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf; + + sess = kzalloc(sizeof(struct iscsi_session), GFP_KERNEL); + if (!sess) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + pr_err("Could not allocate memory for session\n"); + return -1; + } + + iscsi_login_set_conn_values(sess, conn, pdu->cid); + sess->init_task_tag = pdu->itt; + memcpy((void *)&sess->isid, (void *)pdu->isid, 6); + sess->exp_cmd_sn = pdu->cmdsn; + INIT_LIST_HEAD(&sess->sess_conn_list); + INIT_LIST_HEAD(&sess->sess_ooo_cmdsn_list); + INIT_LIST_HEAD(&sess->cr_active_list); + INIT_LIST_HEAD(&sess->cr_inactive_list); + init_completion(&sess->async_msg_comp); + init_completion(&sess->reinstatement_comp); + init_completion(&sess->session_wait_comp); + init_completion(&sess->session_waiting_on_uc_comp); + mutex_init(&sess->cmdsn_mutex); + spin_lock_init(&sess->conn_lock); + spin_lock_init(&sess->cr_a_lock); + spin_lock_init(&sess->cr_i_lock); + spin_lock_init(&sess->session_usage_lock); + spin_lock_init(&sess->ttt_lock); + + if (!idr_pre_get(&sess_idr, GFP_KERNEL)) { + pr_err("idr_pre_get() for sess_idr failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + spin_lock(&sess_idr_lock); + idr_get_new(&sess_idr, NULL, &sess->session_index); + spin_unlock(&sess_idr_lock); + + sess->creation_time = get_jiffies_64(); + spin_lock_init(&sess->session_stats_lock); + /* + * The FFP CmdSN window values will be allocated from the TPG's + * Initiator Node's ACL once the login has been successfully completed. + */ + sess->max_cmd_sn = pdu->cmdsn; + + sess->sess_ops = kzalloc(sizeof(struct iscsi_sess_ops), GFP_KERNEL); + if (!sess->sess_ops) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + pr_err("Unable to allocate memory for" + " struct iscsi_sess_ops.\n"); + return -1; + } + + sess->se_sess = transport_init_session(); + if (!sess->se_sess) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + return 0; +} + +static int iscsi_login_zero_tsih_s2( + struct iscsi_conn *conn) +{ + struct iscsi_node_attrib *na; + struct iscsi_session *sess = conn->sess; + unsigned char buf[32]; + + sess->tpg = conn->tpg; + + /* + * Assign a new TPG Session Handle. Note this is protected with + * struct iscsi_portal_group->np_login_sem from iscsit_access_np(). + */ + sess->tsih = ++ISCSI_TPG_S(sess)->ntsih; + if (!sess->tsih) + sess->tsih = ++ISCSI_TPG_S(sess)->ntsih; + + /* + * Create the default params from user defined values.. + */ + if (iscsi_copy_param_list(&conn->param_list, + ISCSI_TPG_C(conn)->param_list, 1) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + iscsi_set_keys_to_negotiate(0, conn->param_list); + + if (sess->sess_ops->SessionType) + return iscsi_set_keys_irrelevant_for_discovery( + conn->param_list); + + na = iscsit_tpg_get_node_attrib(sess); + + /* + * Need to send TargetPortalGroupTag back in first login response + * on any iSCSI connection where the Initiator provides TargetName. + * See 5.3.1. Login Phase Start + * + * In our case, we have already located the struct iscsi_tiqn at this point. + */ + memset(buf, 0, 32); + sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt); + if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + /* + * Workaround for Initiators that have broken connection recovery logic. + * + * "We would really like to get rid of this." Linux-iSCSI.org team + */ + memset(buf, 0, 32); + sprintf(buf, "ErrorRecoveryLevel=%d", na->default_erl); + if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + if (iscsi_login_disable_FIM_keys(conn->param_list, conn) < 0) + return -1; + + return 0; +} + +/* + * Remove PSTATE_NEGOTIATE for the four FIM related keys. + * The Initiator node will be able to enable FIM by proposing them itself. + */ +int iscsi_login_disable_FIM_keys( + struct iscsi_param_list *param_list, + struct iscsi_conn *conn) +{ + struct iscsi_param *param; + + param = iscsi_find_param_from_key("OFMarker", param_list); + if (!param) { + pr_err("iscsi_find_param_from_key() for" + " OFMarker failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + param->state &= ~PSTATE_NEGOTIATE; + + param = iscsi_find_param_from_key("OFMarkInt", param_list); + if (!param) { + pr_err("iscsi_find_param_from_key() for" + " IFMarker failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + param->state &= ~PSTATE_NEGOTIATE; + + param = iscsi_find_param_from_key("IFMarker", param_list); + if (!param) { + pr_err("iscsi_find_param_from_key() for" + " IFMarker failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + param->state &= ~PSTATE_NEGOTIATE; + + param = iscsi_find_param_from_key("IFMarkInt", param_list); + if (!param) { + pr_err("iscsi_find_param_from_key() for" + " IFMarker failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + param->state &= ~PSTATE_NEGOTIATE; + + return 0; +} + +static int iscsi_login_non_zero_tsih_s1( + struct iscsi_conn *conn, + unsigned char *buf) +{ + struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf; + + iscsi_login_set_conn_values(NULL, conn, pdu->cid); + return 0; +} + +/* + * Add a new connection to an existing session. + */ +static int iscsi_login_non_zero_tsih_s2( + struct iscsi_conn *conn, + unsigned char *buf) +{ + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_session *sess = NULL, *sess_p = NULL; + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + struct se_session *se_sess, *se_sess_tmp; + struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf; + + spin_lock_bh(&se_tpg->session_lock); + list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list, + sess_list) { + + sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (atomic_read(&sess_p->session_fall_back_to_erl0) || + atomic_read(&sess_p->session_logout) || + (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) + continue; + if (!memcmp((const void *)sess_p->isid, + (const void *)pdu->isid, 6) && + (sess_p->tsih == pdu->tsih)) { + iscsit_inc_session_usage_count(sess_p); + iscsit_stop_time2retain_timer(sess_p); + sess = sess_p; + break; + } + } + spin_unlock_bh(&se_tpg->session_lock); + + /* + * If the Time2Retain handler has expired, the session is already gone. + */ + if (!sess) { + pr_err("Initiator attempting to add a connection to" + " a non-existent session, rejecting iSCSI Login.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_NO_SESSION); + return -1; + } + + /* + * Stop the Time2Retain timer if this is a failed session, we restart + * the timer if the login is not successful. + */ + spin_lock_bh(&sess->conn_lock); + if (sess->session_state == TARG_SESS_STATE_FAILED) + atomic_set(&sess->session_continuation, 1); + spin_unlock_bh(&sess->conn_lock); + + iscsi_login_set_conn_values(sess, conn, pdu->cid); + + if (iscsi_copy_param_list(&conn->param_list, + ISCSI_TPG_C(conn)->param_list, 0) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + iscsi_set_keys_to_negotiate(0, conn->param_list); + /* + * Need to send TargetPortalGroupTag back in first login response + * on any iSCSI connection where the Initiator provides TargetName. + * See 5.3.1. Login Phase Start + * + * In our case, we have already located the struct iscsi_tiqn at this point. + */ + memset(buf, 0, 32); + sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt); + if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + return iscsi_login_disable_FIM_keys(conn->param_list, conn); +} + +int iscsi_login_post_auth_non_zero_tsih( + struct iscsi_conn *conn, + u16 cid, + u32 exp_statsn) +{ + struct iscsi_conn *conn_ptr = NULL; + struct iscsi_conn_recovery *cr = NULL; + struct iscsi_session *sess = conn->sess; + + /* + * By following item 5 in the login table, if we have found + * an existing ISID and a valid/existing TSIH and an existing + * CID we do connection reinstatement. Currently we dont not + * support it so we send back an non-zero status class to the + * initiator and release the new connection. + */ + conn_ptr = iscsit_get_conn_from_cid_rcfr(sess, cid); + if ((conn_ptr)) { + pr_err("Connection exists with CID %hu for %s," + " performing connection reinstatement.\n", + conn_ptr->cid, sess->sess_ops->InitiatorName); + + iscsit_connection_reinstatement_rcfr(conn_ptr); + iscsit_dec_conn_usage_count(conn_ptr); + } + + /* + * Check for any connection recovery entires containing CID. + * We use the original ExpStatSN sent in the first login request + * to acknowledge commands for the failed connection. + * + * Also note that an explict logout may have already been sent, + * but the response may not be sent due to additional connection + * loss. + */ + if (sess->sess_ops->ErrorRecoveryLevel == 2) { + cr = iscsit_get_inactive_connection_recovery_entry( + sess, cid); + if ((cr)) { + pr_debug("Performing implicit logout" + " for connection recovery on CID: %hu\n", + conn->cid); + iscsit_discard_cr_cmds_by_expstatsn(cr, exp_statsn); + } + } + + /* + * Else we follow item 4 from the login table in that we have + * found an existing ISID and a valid/existing TSIH and a new + * CID we go ahead and continue to add a new connection to the + * session. + */ + pr_debug("Adding CID %hu to existing session for %s.\n", + cid, sess->sess_ops->InitiatorName); + + if ((atomic_read(&sess->nconn) + 1) > sess->sess_ops->MaxConnections) { + pr_err("Adding additional connection to this session" + " would exceed MaxConnections %d, login failed.\n", + sess->sess_ops->MaxConnections); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_ISID_ERROR); + return -1; + } + + return 0; +} + +static void iscsi_post_login_start_timers(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + + if (!sess->sess_ops->SessionType) + iscsit_start_nopin_timer(conn); +} + +static int iscsi_post_login_handler( + struct iscsi_np *np, + struct iscsi_conn *conn, + u8 zero_tsih) +{ + int stop_timer = 0; + struct iscsi_session *sess = conn->sess; + struct se_session *se_sess = sess->se_sess; + struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + struct iscsi_thread_set *ts; + + iscsit_inc_conn_usage_count(conn); + + iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_SUCCESS, + ISCSI_LOGIN_STATUS_ACCEPT); + + pr_debug("Moving to TARG_CONN_STATE_LOGGED_IN.\n"); + conn->conn_state = TARG_CONN_STATE_LOGGED_IN; + + iscsi_set_connection_parameters(conn->conn_ops, conn->param_list); + iscsit_set_sync_and_steering_values(conn); + /* + * SCSI Initiator -> SCSI Target Port Mapping + */ + ts = iscsi_get_thread_set(); + if (!zero_tsih) { + iscsi_set_session_parameters(sess->sess_ops, + conn->param_list, 0); + iscsi_release_param_list(conn->param_list); + conn->param_list = NULL; + + spin_lock_bh(&sess->conn_lock); + atomic_set(&sess->session_continuation, 0); + if (sess->session_state == TARG_SESS_STATE_FAILED) { + pr_debug("Moving to" + " TARG_SESS_STATE_LOGGED_IN.\n"); + sess->session_state = TARG_SESS_STATE_LOGGED_IN; + stop_timer = 1; + } + + pr_debug("iSCSI Login successful on CID: %hu from %s to" + " %s:%hu,%hu\n", conn->cid, conn->login_ip, np->np_ip, + np->np_port, tpg->tpgt); + + list_add_tail(&conn->conn_list, &sess->sess_conn_list); + atomic_inc(&sess->nconn); + pr_debug("Incremented iSCSI Connection count to %hu" + " from node: %s\n", atomic_read(&sess->nconn), + sess->sess_ops->InitiatorName); + spin_unlock_bh(&sess->conn_lock); + + iscsi_post_login_start_timers(conn); + iscsi_activate_thread_set(conn, ts); + /* + * Determine CPU mask to ensure connection's RX and TX kthreads + * are scheduled on the same CPU. + */ + iscsit_thread_get_cpumask(conn); + conn->conn_rx_reset_cpumask = 1; + conn->conn_tx_reset_cpumask = 1; + + iscsit_dec_conn_usage_count(conn); + if (stop_timer) { + spin_lock_bh(&se_tpg->session_lock); + iscsit_stop_time2retain_timer(sess); + spin_unlock_bh(&se_tpg->session_lock); + } + iscsit_dec_session_usage_count(sess); + return 0; + } + + iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1); + iscsi_release_param_list(conn->param_list); + conn->param_list = NULL; + + iscsit_determine_maxcmdsn(sess); + + spin_lock_bh(&se_tpg->session_lock); + __transport_register_session(&sess->tpg->tpg_se_tpg, + se_sess->se_node_acl, se_sess, (void *)sess); + pr_debug("Moving to TARG_SESS_STATE_LOGGED_IN.\n"); + sess->session_state = TARG_SESS_STATE_LOGGED_IN; + + pr_debug("iSCSI Login successful on CID: %hu from %s to %s:%hu,%hu\n", + conn->cid, conn->login_ip, np->np_ip, np->np_port, tpg->tpgt); + + spin_lock_bh(&sess->conn_lock); + list_add_tail(&conn->conn_list, &sess->sess_conn_list); + atomic_inc(&sess->nconn); + pr_debug("Incremented iSCSI Connection count to %hu from node:" + " %s\n", atomic_read(&sess->nconn), + sess->sess_ops->InitiatorName); + spin_unlock_bh(&sess->conn_lock); + + sess->sid = tpg->sid++; + if (!sess->sid) + sess->sid = tpg->sid++; + pr_debug("Established iSCSI session from node: %s\n", + sess->sess_ops->InitiatorName); + + tpg->nsessions++; + if (tpg->tpg_tiqn) + tpg->tpg_tiqn->tiqn_nsessions++; + + pr_debug("Incremented number of active iSCSI sessions to %u on" + " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt); + spin_unlock_bh(&se_tpg->session_lock); + + iscsi_post_login_start_timers(conn); + iscsi_activate_thread_set(conn, ts); + /* + * Determine CPU mask to ensure connection's RX and TX kthreads + * are scheduled on the same CPU. + */ + iscsit_thread_get_cpumask(conn); + conn->conn_rx_reset_cpumask = 1; + conn->conn_tx_reset_cpumask = 1; + + iscsit_dec_conn_usage_count(conn); + + return 0; +} + +static void iscsi_handle_login_thread_timeout(unsigned long data) +{ + struct iscsi_np *np = (struct iscsi_np *) data; + + spin_lock_bh(&np->np_thread_lock); + pr_err("iSCSI Login timeout on Network Portal %s:%hu\n", + np->np_ip, np->np_port); + + if (np->np_login_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&np->np_thread_lock); + return; + } + + if (np->np_thread) + send_sig(SIGINT, np->np_thread, 1); + + np->np_login_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&np->np_thread_lock); +} + +static void iscsi_start_login_thread_timer(struct iscsi_np *np) +{ + /* + * This used the TA_LOGIN_TIMEOUT constant because at this + * point we do not have access to ISCSI_TPG_ATTRIB(tpg)->login_timeout + */ + spin_lock_bh(&np->np_thread_lock); + init_timer(&np->np_login_timer); + np->np_login_timer.expires = (get_jiffies_64() + TA_LOGIN_TIMEOUT * HZ); + np->np_login_timer.data = (unsigned long)np; + np->np_login_timer.function = iscsi_handle_login_thread_timeout; + np->np_login_timer_flags &= ~ISCSI_TF_STOP; + np->np_login_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&np->np_login_timer); + + pr_debug("Added timeout timer to iSCSI login request for" + " %u seconds.\n", TA_LOGIN_TIMEOUT); + spin_unlock_bh(&np->np_thread_lock); +} + +static void iscsi_stop_login_thread_timer(struct iscsi_np *np) +{ + spin_lock_bh(&np->np_thread_lock); + if (!(np->np_login_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&np->np_thread_lock); + return; + } + np->np_login_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&np->np_thread_lock); + + del_timer_sync(&np->np_login_timer); + + spin_lock_bh(&np->np_thread_lock); + np->np_login_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&np->np_thread_lock); +} + +int iscsi_target_setup_login_socket( + struct iscsi_np *np, + struct __kernel_sockaddr_storage *sockaddr) +{ + struct socket *sock; + int backlog = 5, ret, opt = 0, len; + + switch (np->np_network_transport) { + case ISCSI_TCP: + np->np_ip_proto = IPPROTO_TCP; + np->np_sock_type = SOCK_STREAM; + break; + case ISCSI_SCTP_TCP: + np->np_ip_proto = IPPROTO_SCTP; + np->np_sock_type = SOCK_STREAM; + break; + case ISCSI_SCTP_UDP: + np->np_ip_proto = IPPROTO_SCTP; + np->np_sock_type = SOCK_SEQPACKET; + break; + case ISCSI_IWARP_TCP: + case ISCSI_IWARP_SCTP: + case ISCSI_INFINIBAND: + default: + pr_err("Unsupported network_transport: %d\n", + np->np_network_transport); + return -EINVAL; + } + + ret = sock_create(sockaddr->ss_family, np->np_sock_type, + np->np_ip_proto, &sock); + if (ret < 0) { + pr_err("sock_create() failed.\n"); + return ret; + } + np->np_socket = sock; + /* + * The SCTP stack needs struct socket->file. + */ + if ((np->np_network_transport == ISCSI_SCTP_TCP) || + (np->np_network_transport == ISCSI_SCTP_UDP)) { + if (!sock->file) { + sock->file = kzalloc(sizeof(struct file), GFP_KERNEL); + if (!sock->file) { + pr_err("Unable to allocate struct" + " file for SCTP\n"); + ret = -ENOMEM; + goto fail; + } + np->np_flags |= NPF_SCTP_STRUCT_FILE; + } + } + /* + * Setup the np->np_sockaddr from the passed sockaddr setup + * in iscsi_target_configfs.c code.. + */ + memcpy((void *)&np->np_sockaddr, (void *)sockaddr, + sizeof(struct __kernel_sockaddr_storage)); + + if (sockaddr->ss_family == AF_INET6) + len = sizeof(struct sockaddr_in6); + else + len = sizeof(struct sockaddr_in); + /* + * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY. + */ + opt = 1; + if (np->np_network_transport == ISCSI_TCP) { + ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, + (char *)&opt, sizeof(opt)); + if (ret < 0) { + pr_err("kernel_setsockopt() for TCP_NODELAY" + " failed: %d\n", ret); + goto fail; + } + } + + ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, + (char *)&opt, sizeof(opt)); + if (ret < 0) { + pr_err("kernel_setsockopt() for SO_REUSEADDR" + " failed\n"); + goto fail; + } + + ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len); + if (ret < 0) { + pr_err("kernel_bind() failed: %d\n", ret); + goto fail; + } + + ret = kernel_listen(sock, backlog); + if (ret != 0) { + pr_err("kernel_listen() failed: %d\n", ret); + goto fail; + } + + return 0; + +fail: + np->np_socket = NULL; + if (sock) { + if (np->np_flags & NPF_SCTP_STRUCT_FILE) { + kfree(sock->file); + sock->file = NULL; + } + + sock_release(sock); + } + return ret; +} + +static int __iscsi_target_login_thread(struct iscsi_np *np) +{ + u8 buffer[ISCSI_HDR_LEN], iscsi_opcode, zero_tsih = 0; + int err, ret = 0, ip_proto, sock_type, set_sctp_conn_flag, stop; + struct iscsi_conn *conn = NULL; + struct iscsi_login *login; + struct iscsi_portal_group *tpg = NULL; + struct socket *new_sock, *sock; + struct kvec iov; + struct iscsi_login_req *pdu; + struct sockaddr_in sock_in; + struct sockaddr_in6 sock_in6; + + flush_signals(current); + set_sctp_conn_flag = 0; + sock = np->np_socket; + ip_proto = np->np_ip_proto; + sock_type = np->np_sock_type; + + spin_lock_bh(&np->np_thread_lock); + if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; + complete(&np->np_restart_comp); + } else { + np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; + } + spin_unlock_bh(&np->np_thread_lock); + + if (kernel_accept(sock, &new_sock, 0) < 0) { + spin_lock_bh(&np->np_thread_lock); + if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + spin_unlock_bh(&np->np_thread_lock); + complete(&np->np_restart_comp); + /* Get another socket */ + return 1; + } + spin_unlock_bh(&np->np_thread_lock); + goto out; + } + /* + * The SCTP stack needs struct socket->file. + */ + if ((np->np_network_transport == ISCSI_SCTP_TCP) || + (np->np_network_transport == ISCSI_SCTP_UDP)) { + if (!new_sock->file) { + new_sock->file = kzalloc( + sizeof(struct file), GFP_KERNEL); + if (!new_sock->file) { + pr_err("Unable to allocate struct" + " file for SCTP\n"); + sock_release(new_sock); + /* Get another socket */ + return 1; + } + set_sctp_conn_flag = 1; + } + } + + iscsi_start_login_thread_timer(np); + + conn = kzalloc(sizeof(struct iscsi_conn), GFP_KERNEL); + if (!conn) { + pr_err("Could not allocate memory for" + " new connection\n"); + if (set_sctp_conn_flag) { + kfree(new_sock->file); + new_sock->file = NULL; + } + sock_release(new_sock); + /* Get another socket */ + return 1; + } + + pr_debug("Moving to TARG_CONN_STATE_FREE.\n"); + conn->conn_state = TARG_CONN_STATE_FREE; + conn->sock = new_sock; + + if (set_sctp_conn_flag) + conn->conn_flags |= CONNFLAG_SCTP_STRUCT_FILE; + + pr_debug("Moving to TARG_CONN_STATE_XPT_UP.\n"); + conn->conn_state = TARG_CONN_STATE_XPT_UP; + + /* + * Allocate conn->conn_ops early as a failure calling + * iscsit_tx_login_rsp() below will call tx_data(). + */ + conn->conn_ops = kzalloc(sizeof(struct iscsi_conn_ops), GFP_KERNEL); + if (!conn->conn_ops) { + pr_err("Unable to allocate memory for" + " struct iscsi_conn_ops.\n"); + goto new_sess_out; + } + /* + * Perform the remaining iSCSI connection initialization items.. + */ + if (iscsi_login_init_conn(conn) < 0) + goto new_sess_out; + + memset(buffer, 0, ISCSI_HDR_LEN); + memset(&iov, 0, sizeof(struct kvec)); + iov.iov_base = buffer; + iov.iov_len = ISCSI_HDR_LEN; + + if (rx_data(conn, &iov, 1, ISCSI_HDR_LEN) <= 0) { + pr_err("rx_data() returned an error.\n"); + goto new_sess_out; + } + + iscsi_opcode = (buffer[0] & ISCSI_OPCODE_MASK); + if (!(iscsi_opcode & ISCSI_OP_LOGIN)) { + pr_err("First opcode is not login request," + " failing login request.\n"); + goto new_sess_out; + } + + pdu = (struct iscsi_login_req *) buffer; + pdu->cid = be16_to_cpu(pdu->cid); + pdu->tsih = be16_to_cpu(pdu->tsih); + pdu->itt = be32_to_cpu(pdu->itt); + pdu->cmdsn = be32_to_cpu(pdu->cmdsn); + pdu->exp_statsn = be32_to_cpu(pdu->exp_statsn); + /* + * Used by iscsit_tx_login_rsp() for Login Resonses PDUs + * when Status-Class != 0. + */ + conn->login_itt = pdu->itt; + + spin_lock_bh(&np->np_thread_lock); + if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { + spin_unlock_bh(&np->np_thread_lock); + pr_err("iSCSI Network Portal on %s:%hu currently not" + " active.\n", np->np_ip, np->np_port); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + goto new_sess_out; + } + spin_unlock_bh(&np->np_thread_lock); + + if (np->np_sockaddr.ss_family == AF_INET6) { + memset(&sock_in6, 0, sizeof(struct sockaddr_in6)); + + if (conn->sock->ops->getname(conn->sock, + (struct sockaddr *)&sock_in6, &err, 1) < 0) { + pr_err("sock_ops->getname() failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + goto new_sess_out; + } +#if 0 + if (!iscsi_ntop6((const unsigned char *) + &sock_in6.sin6_addr.in6_u, + (char *)&conn->ipv6_login_ip[0], + IPV6_ADDRESS_SPACE)) { + pr_err("iscsi_ntop6() failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + goto new_sess_out; + } +#else + pr_debug("Skipping iscsi_ntop6()\n"); +#endif + } else { + memset(&sock_in, 0, sizeof(struct sockaddr_in)); + + if (conn->sock->ops->getname(conn->sock, + (struct sockaddr *)&sock_in, &err, 1) < 0) { + pr_err("sock_ops->getname() failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + goto new_sess_out; + } + sprintf(conn->login_ip, "%pI4", &sock_in.sin_addr.s_addr); + conn->login_port = ntohs(sock_in.sin_port); + } + + conn->network_transport = np->np_network_transport; + + pr_debug("Received iSCSI login request from %s on %s Network" + " Portal %s:%hu\n", conn->login_ip, + (conn->network_transport == ISCSI_TCP) ? "TCP" : "SCTP", + np->np_ip, np->np_port); + + pr_debug("Moving to TARG_CONN_STATE_IN_LOGIN.\n"); + conn->conn_state = TARG_CONN_STATE_IN_LOGIN; + + if (iscsi_login_check_initiator_version(conn, pdu->max_version, + pdu->min_version) < 0) + goto new_sess_out; + + zero_tsih = (pdu->tsih == 0x0000); + if ((zero_tsih)) { + /* + * This is the leading connection of a new session. + * We wait until after authentication to check for + * session reinstatement. + */ + if (iscsi_login_zero_tsih_s1(conn, buffer) < 0) + goto new_sess_out; + } else { + /* + * Add a new connection to an existing session. + * We check for a non-existant session in + * iscsi_login_non_zero_tsih_s2() below based + * on ISID/TSIH, but wait until after authentication + * to check for connection reinstatement, etc. + */ + if (iscsi_login_non_zero_tsih_s1(conn, buffer) < 0) + goto new_sess_out; + } + + /* + * This will process the first login request, and call + * iscsi_target_locate_portal(), and return a valid struct iscsi_login. + */ + login = iscsi_target_init_negotiation(np, conn, buffer); + if (!login) { + tpg = conn->tpg; + goto new_sess_out; + } + + tpg = conn->tpg; + if (!tpg) { + pr_err("Unable to locate struct iscsi_conn->tpg\n"); + goto new_sess_out; + } + + if (zero_tsih) { + if (iscsi_login_zero_tsih_s2(conn) < 0) { + iscsi_target_nego_release(login, conn); + goto new_sess_out; + } + } else { + if (iscsi_login_non_zero_tsih_s2(conn, buffer) < 0) { + iscsi_target_nego_release(login, conn); + goto old_sess_out; + } + } + + if (iscsi_target_start_negotiation(login, conn) < 0) + goto new_sess_out; + + if (!conn->sess) { + pr_err("struct iscsi_conn session pointer is NULL!\n"); + goto new_sess_out; + } + + iscsi_stop_login_thread_timer(np); + + if (signal_pending(current)) + goto new_sess_out; + + ret = iscsi_post_login_handler(np, conn, zero_tsih); + + if (ret < 0) + goto new_sess_out; + + iscsit_deaccess_np(np, tpg); + tpg = NULL; + /* Get another socket */ + return 1; + +new_sess_out: + pr_err("iSCSI Login negotiation failed.\n"); + iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + if (!zero_tsih || !conn->sess) + goto old_sess_out; + if (conn->sess->se_sess) + transport_free_session(conn->sess->se_sess); + if (conn->sess->session_index != 0) { + spin_lock_bh(&sess_idr_lock); + idr_remove(&sess_idr, conn->sess->session_index); + spin_unlock_bh(&sess_idr_lock); + } + if (conn->sess->sess_ops) + kfree(conn->sess->sess_ops); + if (conn->sess) + kfree(conn->sess); +old_sess_out: + iscsi_stop_login_thread_timer(np); + /* + * If login negotiation fails check if the Time2Retain timer + * needs to be restarted. + */ + if (!zero_tsih && conn->sess) { + spin_lock_bh(&conn->sess->conn_lock); + if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { + struct se_portal_group *se_tpg = + &ISCSI_TPG_C(conn)->tpg_se_tpg; + + atomic_set(&conn->sess->session_continuation, 0); + spin_unlock_bh(&conn->sess->conn_lock); + spin_lock_bh(&se_tpg->session_lock); + iscsit_start_time2retain_handler(conn->sess); + spin_unlock_bh(&se_tpg->session_lock); + } else + spin_unlock_bh(&conn->sess->conn_lock); + iscsit_dec_session_usage_count(conn->sess); + } + + if (!IS_ERR(conn->conn_rx_hash.tfm)) + crypto_free_hash(conn->conn_rx_hash.tfm); + if (!IS_ERR(conn->conn_tx_hash.tfm)) + crypto_free_hash(conn->conn_tx_hash.tfm); + + if (conn->conn_cpumask) + free_cpumask_var(conn->conn_cpumask); + + kfree(conn->conn_ops); + + if (conn->param_list) { + iscsi_release_param_list(conn->param_list); + conn->param_list = NULL; + } + if (conn->sock) { + if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) { + kfree(conn->sock->file); + conn->sock->file = NULL; + } + sock_release(conn->sock); + } + kfree(conn); + + if (tpg) { + iscsit_deaccess_np(np, tpg); + tpg = NULL; + } + +out: + stop = kthread_should_stop(); + if (!stop && signal_pending(current)) { + spin_lock_bh(&np->np_thread_lock); + stop = (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN); + spin_unlock_bh(&np->np_thread_lock); + } + /* Wait for another socket.. */ + if (!stop) + return 1; + + iscsi_stop_login_thread_timer(np); + spin_lock_bh(&np->np_thread_lock); + np->np_thread_state = ISCSI_NP_THREAD_EXIT; + spin_unlock_bh(&np->np_thread_lock); + return 0; +} + +int iscsi_target_login_thread(void *arg) +{ + struct iscsi_np *np = (struct iscsi_np *)arg; + int ret; + + allow_signal(SIGINT); + + while (!kthread_should_stop()) { + ret = __iscsi_target_login_thread(np); + /* + * We break and exit here unless another sock_accept() call + * is expected. + */ + if (ret != 1) + break; + } + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h new file mode 100644 index 000000000000..091dcae2532b --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -0,0 +1,12 @@ +#ifndef ISCSI_TARGET_LOGIN_H +#define ISCSI_TARGET_LOGIN_H + +extern int iscsi_login_setup_crypto(struct iscsi_conn *); +extern int iscsi_check_for_session_reinstatement(struct iscsi_conn *); +extern int iscsi_login_post_auth_non_zero_tsih(struct iscsi_conn *, u16, u32); +extern int iscsi_target_setup_login_socket(struct iscsi_np *, + struct __kernel_sockaddr_storage *); +extern int iscsi_target_login_thread(void *); +extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *); + +#endif /*** ISCSI_TARGET_LOGIN_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c new file mode 100644 index 000000000000..713a4d23557a --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -0,0 +1,1067 @@ +/******************************************************************************* + * This file contains main functions related to iSCSI Parameter negotiation. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/ctype.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_tpg.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_login.h" +#include "iscsi_target_nego.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_auth.h" + +#define MAX_LOGIN_PDUS 7 +#define TEXT_LEN 4096 + +void convert_null_to_semi(char *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (buf[i] == '\0') + buf[i] = ';'; +} + +int strlen_semi(char *buf) +{ + int i = 0; + + while (buf[i] != '\0') { + if (buf[i] == ';') + return i; + i++; + } + + return -1; +} + +int extract_param( + const char *in_buf, + const char *pattern, + unsigned int max_length, + char *out_buf, + unsigned char *type) +{ + char *ptr; + int len; + + if (!in_buf || !pattern || !out_buf || !type) + return -1; + + ptr = strstr(in_buf, pattern); + if (!ptr) + return -1; + + ptr = strstr(ptr, "="); + if (!ptr) + return -1; + + ptr += 1; + if (*ptr == '0' && (*(ptr+1) == 'x' || *(ptr+1) == 'X')) { + ptr += 2; /* skip 0x */ + *type = HEX; + } else + *type = DECIMAL; + + len = strlen_semi(ptr); + if (len < 0) + return -1; + + if (len > max_length) { + pr_err("Length of input: %d exeeds max_length:" + " %d\n", len, max_length); + return -1; + } + memcpy(out_buf, ptr, len); + out_buf[len] = '\0'; + + return 0; +} + +static u32 iscsi_handle_authentication( + struct iscsi_conn *conn, + char *in_buf, + char *out_buf, + int in_length, + int *out_length, + unsigned char *authtype) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_auth *auth; + struct iscsi_node_acl *iscsi_nacl; + struct se_node_acl *se_nacl; + + if (!sess->sess_ops->SessionType) { + /* + * For SessionType=Normal + */ + se_nacl = conn->sess->se_sess->se_node_acl; + if (!se_nacl) { + pr_err("Unable to locate struct se_node_acl for" + " CHAP auth\n"); + return -1; + } + iscsi_nacl = container_of(se_nacl, struct iscsi_node_acl, + se_node_acl); + if (!iscsi_nacl) { + pr_err("Unable to locate struct iscsi_node_acl for" + " CHAP auth\n"); + return -1; + } + + auth = ISCSI_NODE_AUTH(iscsi_nacl); + } else { + /* + * For SessionType=Discovery + */ + auth = &iscsit_global->discovery_acl.node_auth; + } + + if (strstr("CHAP", authtype)) + strcpy(conn->sess->auth_type, "CHAP"); + else + strcpy(conn->sess->auth_type, NONE); + + if (strstr("None", authtype)) + return 1; +#ifdef CANSRP + else if (strstr("SRP", authtype)) + return srp_main_loop(conn, auth, in_buf, out_buf, + &in_length, out_length); +#endif + else if (strstr("CHAP", authtype)) + return chap_main_loop(conn, auth, in_buf, out_buf, + &in_length, out_length); + else if (strstr("SPKM1", authtype)) + return 2; + else if (strstr("SPKM2", authtype)) + return 2; + else if (strstr("KRB5", authtype)) + return 2; + else + return 2; +} + +static void iscsi_remove_failed_auth_entry(struct iscsi_conn *conn) +{ + kfree(conn->auth_protocol); +} + +static int iscsi_target_check_login_request( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + int req_csg, req_nsg, rsp_csg, rsp_nsg; + u32 payload_length; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + switch (login_req->opcode & ISCSI_OPCODE_MASK) { + case ISCSI_OP_LOGIN: + break; + default: + pr_err("Received unknown opcode 0x%02x.\n", + login_req->opcode & ISCSI_OPCODE_MASK); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if ((login_req->flags & ISCSI_FLAG_LOGIN_CONTINUE) && + (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) { + pr_err("Login request has both ISCSI_FLAG_LOGIN_CONTINUE" + " and ISCSI_FLAG_LOGIN_TRANSIT set, protocol error.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + req_csg = (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2; + rsp_csg = (login_rsp->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2; + req_nsg = (login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK); + rsp_nsg = (login_rsp->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK); + + if (req_csg != login->current_stage) { + pr_err("Initiator unexpectedly changed login stage" + " from %d to %d, login failed.\n", login->current_stage, + req_csg); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if ((req_nsg == 2) || (req_csg >= 2) || + ((login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT) && + (req_nsg <= req_csg))) { + pr_err("Illegal login_req->flags Combination, CSG: %d," + " NSG: %d, ISCSI_FLAG_LOGIN_TRANSIT: %d.\n", req_csg, + req_nsg, (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if ((login_req->max_version != login->version_max) || + (login_req->min_version != login->version_min)) { + pr_err("Login request changed Version Max/Nin" + " unexpectedly to 0x%02x/0x%02x, protocol error\n", + login_req->max_version, login_req->min_version); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if (memcmp(login_req->isid, login->isid, 6) != 0) { + pr_err("Login request changed ISID unexpectedly," + " protocol error.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if (login_req->itt != login->init_task_tag) { + pr_err("Login request changed ITT unexpectedly to" + " 0x%08x, protocol error.\n", login_req->itt); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if (payload_length > MAX_KEY_VALUE_PAIRS) { + pr_err("Login request payload exceeds default" + " MaxRecvDataSegmentLength: %u, protocol error.\n", + MAX_KEY_VALUE_PAIRS); + return -1; + } + + return 0; +} + +static int iscsi_target_check_first_request( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + struct iscsi_param *param = NULL; + struct se_node_acl *se_nacl; + + login->first_request = 0; + + list_for_each_entry(param, &conn->param_list->param_list, p_list) { + if (!strncmp(param->name, SESSIONTYPE, 11)) { + if (!IS_PSTATE_ACCEPTOR(param)) { + pr_err("SessionType key not received" + " in first login request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + return -1; + } + if (!strncmp(param->value, DISCOVERY, 9)) + return 0; + } + + if (!strncmp(param->name, INITIATORNAME, 13)) { + if (!IS_PSTATE_ACCEPTOR(param)) { + if (!login->leading_connection) + continue; + + pr_err("InitiatorName key not received" + " in first login request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + return -1; + } + + /* + * For non-leading connections, double check that the + * received InitiatorName matches the existing session's + * struct iscsi_node_acl. + */ + if (!login->leading_connection) { + se_nacl = conn->sess->se_sess->se_node_acl; + if (!se_nacl) { + pr_err("Unable to locate" + " struct se_node_acl\n"); + iscsit_tx_login_rsp(conn, + ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_TGT_NOT_FOUND); + return -1; + } + + if (strcmp(param->value, + se_nacl->initiatorname)) { + pr_err("Incorrect" + " InitiatorName: %s for this" + " iSCSI Initiator Node.\n", + param->value); + iscsit_tx_login_rsp(conn, + ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_TGT_NOT_FOUND); + return -1; + } + } + } + } + + return 0; +} + +static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_login *login) +{ + u32 padding = 0; + struct iscsi_session *sess = conn->sess; + struct iscsi_login_rsp *login_rsp; + + login_rsp = (struct iscsi_login_rsp *) login->rsp; + + login_rsp->opcode = ISCSI_OP_LOGIN_RSP; + hton24(login_rsp->dlength, login->rsp_length); + memcpy(login_rsp->isid, login->isid, 6); + login_rsp->tsih = cpu_to_be16(login->tsih); + login_rsp->itt = cpu_to_be32(login->init_task_tag); + login_rsp->statsn = cpu_to_be32(conn->stat_sn++); + login_rsp->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + login_rsp->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + pr_debug("Sending Login Response, Flags: 0x%02x, ITT: 0x%08x," + " ExpCmdSN; 0x%08x, MaxCmdSN: 0x%08x, StatSN: 0x%08x, Length:" + " %u\n", login_rsp->flags, ntohl(login_rsp->itt), + ntohl(login_rsp->exp_cmdsn), ntohl(login_rsp->max_cmdsn), + ntohl(login_rsp->statsn), login->rsp_length); + + padding = ((-login->rsp_length) & 3); + + if (iscsi_login_tx_data( + conn, + login->rsp, + login->rsp_buf, + login->rsp_length + padding) < 0) + return -1; + + login->rsp_length = 0; + login_rsp->tsih = be16_to_cpu(login_rsp->tsih); + login_rsp->itt = be32_to_cpu(login_rsp->itt); + login_rsp->statsn = be32_to_cpu(login_rsp->statsn); + mutex_lock(&sess->cmdsn_mutex); + login_rsp->exp_cmdsn = be32_to_cpu(sess->exp_cmd_sn); + login_rsp->max_cmdsn = be32_to_cpu(sess->max_cmd_sn); + mutex_unlock(&sess->cmdsn_mutex); + + return 0; +} + +static int iscsi_target_do_rx_login_io(struct iscsi_conn *conn, struct iscsi_login *login) +{ + u32 padding = 0, payload_length; + struct iscsi_login_req *login_req; + + if (iscsi_login_rx_data(conn, login->req, ISCSI_HDR_LEN) < 0) + return -1; + + login_req = (struct iscsi_login_req *) login->req; + payload_length = ntoh24(login_req->dlength); + login_req->tsih = be16_to_cpu(login_req->tsih); + login_req->itt = be32_to_cpu(login_req->itt); + login_req->cid = be16_to_cpu(login_req->cid); + login_req->cmdsn = be32_to_cpu(login_req->cmdsn); + login_req->exp_statsn = be32_to_cpu(login_req->exp_statsn); + + pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x," + " CmdSN: 0x%08x, ExpStatSN: 0x%08x, CID: %hu, Length: %u\n", + login_req->flags, login_req->itt, login_req->cmdsn, + login_req->exp_statsn, login_req->cid, payload_length); + + if (iscsi_target_check_login_request(conn, login) < 0) + return -1; + + padding = ((-payload_length) & 3); + memset(login->req_buf, 0, MAX_KEY_VALUE_PAIRS); + + if (iscsi_login_rx_data( + conn, + login->req_buf, + payload_length + padding) < 0) + return -1; + + return 0; +} + +static int iscsi_target_do_login_io(struct iscsi_conn *conn, struct iscsi_login *login) +{ + if (iscsi_target_do_tx_login_io(conn, login) < 0) + return -1; + + if (iscsi_target_do_rx_login_io(conn, login) < 0) + return -1; + + return 0; +} + +static int iscsi_target_get_initial_payload( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + u32 padding = 0, payload_length; + struct iscsi_login_req *login_req; + + login_req = (struct iscsi_login_req *) login->req; + payload_length = ntoh24(login_req->dlength); + + pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x," + " CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n", + login_req->flags, login_req->itt, login_req->cmdsn, + login_req->exp_statsn, payload_length); + + if (iscsi_target_check_login_request(conn, login) < 0) + return -1; + + padding = ((-payload_length) & 3); + + if (iscsi_login_rx_data( + conn, + login->req_buf, + payload_length + padding) < 0) + return -1; + + return 0; +} + +/* + * NOTE: We check for existing sessions or connections AFTER the initiator + * has been successfully authenticated in order to protect against faked + * ISID/TSIH combinations. + */ +static int iscsi_target_check_for_existing_instances( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + if (login->checked_for_existing) + return 0; + + login->checked_for_existing = 1; + + if (!login->tsih) + return iscsi_check_for_session_reinstatement(conn); + else + return iscsi_login_post_auth_non_zero_tsih(conn, login->cid, + login->initial_exp_statsn); +} + +static int iscsi_target_do_authentication( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + int authret; + u32 payload_length; + struct iscsi_param *param; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list); + if (!param) + return -1; + + authret = iscsi_handle_authentication( + conn, + login->req_buf, + login->rsp_buf, + payload_length, + &login->rsp_length, + param->value); + switch (authret) { + case 0: + pr_debug("Received OK response" + " from LIO Authentication, continuing.\n"); + break; + case 1: + pr_debug("iSCSI security negotiation" + " completed sucessfully.\n"); + login->auth_complete = 1; + if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) && + (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) { + login_rsp->flags |= (ISCSI_FLAG_LOGIN_NEXT_STAGE1 | + ISCSI_FLAG_LOGIN_TRANSIT); + login->current_stage = 1; + } + return iscsi_target_check_for_existing_instances( + conn, login); + case 2: + pr_err("Security negotiation" + " failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_AUTH_FAILED); + return -1; + default: + pr_err("Received unknown error %d from LIO" + " Authentication\n", authret); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + return -1; + } + + return 0; +} + +static int iscsi_target_handle_csg_zero( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + int ret; + u32 payload_length; + struct iscsi_param *param; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list); + if (!param) + return -1; + + ret = iscsi_decode_text_input( + PHASE_SECURITY|PHASE_DECLARATIVE, + SENDER_INITIATOR|SENDER_RECEIVER, + login->req_buf, + payload_length, + conn->param_list); + if (ret < 0) + return -1; + + if (ret > 0) { + if (login->auth_complete) { + pr_err("Initiator has already been" + " successfully authenticated, but is still" + " sending %s keys.\n", param->value); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + goto do_auth; + } + + if (login->first_request) + if (iscsi_target_check_first_request(conn, login) < 0) + return -1; + + ret = iscsi_encode_text_output( + PHASE_SECURITY|PHASE_DECLARATIVE, + SENDER_TARGET, + login->rsp_buf, + &login->rsp_length, + conn->param_list); + if (ret < 0) + return -1; + + if (!iscsi_check_negotiated_keys(conn->param_list)) { + if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication && + !strncmp(param->value, NONE, 4)) { + pr_err("Initiator sent AuthMethod=None but" + " Target is enforcing iSCSI Authentication," + " login failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_AUTH_FAILED); + return -1; + } + + if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication && + !login->auth_complete) + return 0; + + if (strncmp(param->value, NONE, 4) && !login->auth_complete) + return 0; + + if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) && + (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) { + login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE1 | + ISCSI_FLAG_LOGIN_TRANSIT; + login->current_stage = 1; + } + } + + return 0; +do_auth: + return iscsi_target_do_authentication(conn, login); +} + +static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_login *login) +{ + int ret; + u32 payload_length; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + ret = iscsi_decode_text_input( + PHASE_OPERATIONAL|PHASE_DECLARATIVE, + SENDER_INITIATOR|SENDER_RECEIVER, + login->req_buf, + payload_length, + conn->param_list); + if (ret < 0) + return -1; + + if (login->first_request) + if (iscsi_target_check_first_request(conn, login) < 0) + return -1; + + if (iscsi_target_check_for_existing_instances(conn, login) < 0) + return -1; + + ret = iscsi_encode_text_output( + PHASE_OPERATIONAL|PHASE_DECLARATIVE, + SENDER_TARGET, + login->rsp_buf, + &login->rsp_length, + conn->param_list); + if (ret < 0) + return -1; + + if (!login->auth_complete && + ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication) { + pr_err("Initiator is requesting CSG: 1, has not been" + " successfully authenticated, and the Target is" + " enforcing iSCSI Authentication, login failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_AUTH_FAILED); + return -1; + } + + if (!iscsi_check_negotiated_keys(conn->param_list)) + if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE3) && + (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) + login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE3 | + ISCSI_FLAG_LOGIN_TRANSIT; + + return 0; +} + +static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *login) +{ + int pdu_count = 0; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + + while (1) { + if (++pdu_count > MAX_LOGIN_PDUS) { + pr_err("MAX_LOGIN_PDUS count reached.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + return -1; + } + + switch ((login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2) { + case 0: + login_rsp->flags |= (0 & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK); + if (iscsi_target_handle_csg_zero(conn, login) < 0) + return -1; + break; + case 1: + login_rsp->flags |= ISCSI_FLAG_LOGIN_CURRENT_STAGE1; + if (iscsi_target_handle_csg_one(conn, login) < 0) + return -1; + if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { + login->tsih = conn->sess->tsih; + if (iscsi_target_do_tx_login_io(conn, + login) < 0) + return -1; + return 0; + } + break; + default: + pr_err("Illegal CSG: %d received from" + " Initiator, protocol error.\n", + (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) + >> 2); + break; + } + + if (iscsi_target_do_login_io(conn, login) < 0) + return -1; + + if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { + login_rsp->flags &= ~ISCSI_FLAG_LOGIN_TRANSIT; + login_rsp->flags &= ~ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK; + } + } + + return 0; +} + +static void iscsi_initiatorname_tolower( + char *param_buf) +{ + char *c; + u32 iqn_size = strlen(param_buf), i; + + for (i = 0; i < iqn_size; i++) { + c = (char *)¶m_buf[i]; + if (!isupper(*c)) + continue; + + *c = tolower(*c); + } +} + +/* + * Processes the first Login Request.. + */ +static int iscsi_target_locate_portal( + struct iscsi_np *np, + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + char *i_buf = NULL, *s_buf = NULL, *t_buf = NULL; + char *tmpbuf, *start = NULL, *end = NULL, *key, *value; + struct iscsi_session *sess = conn->sess; + struct iscsi_tiqn *tiqn; + struct iscsi_login_req *login_req; + struct iscsi_targ_login_rsp *login_rsp; + u32 payload_length; + int sessiontype = 0, ret = 0; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_targ_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + login->first_request = 1; + login->leading_connection = (!login_req->tsih) ? 1 : 0; + login->current_stage = + (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2; + login->version_min = login_req->min_version; + login->version_max = login_req->max_version; + memcpy(login->isid, login_req->isid, 6); + login->cmd_sn = login_req->cmdsn; + login->init_task_tag = login_req->itt; + login->initial_exp_statsn = login_req->exp_statsn; + login->cid = login_req->cid; + login->tsih = login_req->tsih; + + if (iscsi_target_get_initial_payload(conn, login) < 0) + return -1; + + tmpbuf = kzalloc(payload_length + 1, GFP_KERNEL); + if (!tmpbuf) { + pr_err("Unable to allocate memory for tmpbuf.\n"); + return -1; + } + + memcpy(tmpbuf, login->req_buf, payload_length); + tmpbuf[payload_length] = '\0'; + start = tmpbuf; + end = (start + payload_length); + + /* + * Locate the initial keys expected from the Initiator node in + * the first login request in order to progress with the login phase. + */ + while (start < end) { + if (iscsi_extract_key_value(start, &key, &value) < 0) { + ret = -1; + goto out; + } + + if (!strncmp(key, "InitiatorName", 13)) + i_buf = value; + else if (!strncmp(key, "SessionType", 11)) + s_buf = value; + else if (!strncmp(key, "TargetName", 10)) + t_buf = value; + + start += strlen(key) + strlen(value) + 2; + } + + /* + * See 5.3. Login Phase. + */ + if (!i_buf) { + pr_err("InitiatorName key not received" + " in first login request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + ret = -1; + goto out; + } + /* + * Convert the incoming InitiatorName to lowercase following + * RFC-3720 3.2.6.1. section c) that says that iSCSI IQNs + * are NOT case sensitive. + */ + iscsi_initiatorname_tolower(i_buf); + + if (!s_buf) { + if (!login->leading_connection) + goto get_target; + + pr_err("SessionType key not received" + " in first login request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + ret = -1; + goto out; + } + + /* + * Use default portal group for discovery sessions. + */ + sessiontype = strncmp(s_buf, DISCOVERY, 9); + if (!sessiontype) { + conn->tpg = iscsit_global->discovery_tpg; + if (!login->leading_connection) + goto get_target; + + sess->sess_ops->SessionType = 1; + /* + * Setup crc32c modules from libcrypto + */ + if (iscsi_login_setup_crypto(conn) < 0) { + pr_err("iscsi_login_setup_crypto() failed\n"); + ret = -1; + goto out; + } + /* + * Serialize access across the discovery struct iscsi_portal_group to + * process login attempt. + */ + if (iscsit_access_np(np, conn->tpg) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + ret = -1; + goto out; + } + ret = 0; + goto out; + } + +get_target: + if (!t_buf) { + pr_err("TargetName key not received" + " in first login request while" + " SessionType=Normal.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + ret = -1; + goto out; + } + + /* + * Locate Target IQN from Storage Node. + */ + tiqn = iscsit_get_tiqn_for_login(t_buf); + if (!tiqn) { + pr_err("Unable to locate Target IQN: %s in" + " Storage Node\n", t_buf); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + ret = -1; + goto out; + } + pr_debug("Located Storage Object: %s\n", tiqn->tiqn); + + /* + * Locate Target Portal Group from Storage Node. + */ + conn->tpg = iscsit_get_tpg_from_np(tiqn, np); + if (!conn->tpg) { + pr_err("Unable to locate Target Portal Group" + " on %s\n", tiqn->tiqn); + iscsit_put_tiqn_for_login(tiqn); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + ret = -1; + goto out; + } + pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt); + /* + * Setup crc32c modules from libcrypto + */ + if (iscsi_login_setup_crypto(conn) < 0) { + pr_err("iscsi_login_setup_crypto() failed\n"); + ret = -1; + goto out; + } + /* + * Serialize access across the struct iscsi_portal_group to + * process login attempt. + */ + if (iscsit_access_np(np, conn->tpg) < 0) { + iscsit_put_tiqn_for_login(tiqn); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + ret = -1; + conn->tpg = NULL; + goto out; + } + + /* + * conn->sess->node_acl will be set when the referenced + * struct iscsi_session is located from received ISID+TSIH in + * iscsi_login_non_zero_tsih_s2(). + */ + if (!login->leading_connection) { + ret = 0; + goto out; + } + + /* + * This value is required in iscsi_login_zero_tsih_s2() + */ + sess->sess_ops->SessionType = 0; + + /* + * Locate incoming Initiator IQN reference from Storage Node. + */ + sess->se_sess->se_node_acl = core_tpg_check_initiator_node_acl( + &conn->tpg->tpg_se_tpg, i_buf); + if (!sess->se_sess->se_node_acl) { + pr_err("iSCSI Initiator Node: %s is not authorized to" + " access iSCSI target portal group: %hu.\n", + i_buf, conn->tpg->tpgt); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_TGT_FORBIDDEN); + ret = -1; + goto out; + } + + ret = 0; +out: + kfree(tmpbuf); + return ret; +} + +struct iscsi_login *iscsi_target_init_negotiation( + struct iscsi_np *np, + struct iscsi_conn *conn, + char *login_pdu) +{ + struct iscsi_login *login; + + login = kzalloc(sizeof(struct iscsi_login), GFP_KERNEL); + if (!login) { + pr_err("Unable to allocate memory for struct iscsi_login.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + goto out; + } + + login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); + if (!login->req) { + pr_err("Unable to allocate memory for Login Request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + goto out; + } + memcpy(login->req, login_pdu, ISCSI_HDR_LEN); + + login->req_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL); + if (!login->req_buf) { + pr_err("Unable to allocate memory for response buffer.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + goto out; + } + /* + * SessionType: Discovery + * + * Locates Default Portal + * + * SessionType: Normal + * + * Locates Target Portal from NP -> Target IQN + */ + if (iscsi_target_locate_portal(np, conn, login) < 0) { + pr_err("iSCSI Login negotiation failed.\n"); + goto out; + } + + return login; +out: + kfree(login->req); + kfree(login->req_buf); + kfree(login); + + return NULL; +} + +int iscsi_target_start_negotiation( + struct iscsi_login *login, + struct iscsi_conn *conn) +{ + int ret = -1; + + login->rsp = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); + if (!login->rsp) { + pr_err("Unable to allocate memory for" + " Login Response.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + ret = -1; + goto out; + } + + login->rsp_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL); + if (!login->rsp_buf) { + pr_err("Unable to allocate memory for" + " request buffer.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + ret = -1; + goto out; + } + + ret = iscsi_target_do_login(conn, login); +out: + if (ret != 0) + iscsi_remove_failed_auth_entry(conn); + + iscsi_target_nego_release(login, conn); + return ret; +} + +void iscsi_target_nego_release( + struct iscsi_login *login, + struct iscsi_conn *conn) +{ + kfree(login->req); + kfree(login->rsp); + kfree(login->req_buf); + kfree(login->rsp_buf); + kfree(login); +} diff --git a/drivers/target/iscsi/iscsi_target_nego.h b/drivers/target/iscsi/iscsi_target_nego.h new file mode 100644 index 000000000000..92e133a5158f --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_nego.h @@ -0,0 +1,17 @@ +#ifndef ISCSI_TARGET_NEGO_H +#define ISCSI_TARGET_NEGO_H + +#define DECIMAL 0 +#define HEX 1 + +extern void convert_null_to_semi(char *, int); +extern int extract_param(const char *, const char *, unsigned int, char *, + unsigned char *); +extern struct iscsi_login *iscsi_target_init_negotiation( + struct iscsi_np *, struct iscsi_conn *, char *); +extern int iscsi_target_start_negotiation( + struct iscsi_login *, struct iscsi_conn *); +extern void iscsi_target_nego_release( + struct iscsi_login *, struct iscsi_conn *); + +#endif /* ISCSI_TARGET_NEGO_H */ diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c new file mode 100644 index 000000000000..aeafbe0cd7d1 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c @@ -0,0 +1,263 @@ +/******************************************************************************* + * This file contains the main functions related to Initiator Node Attributes. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_device.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target_nodeattrib.h" + +static inline char *iscsit_na_get_initiatorname( + struct iscsi_node_acl *nacl) +{ + struct se_node_acl *se_nacl = &nacl->se_node_acl; + + return &se_nacl->initiatorname[0]; +} + +void iscsit_set_default_node_attribues( + struct iscsi_node_acl *acl) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + a->dataout_timeout = NA_DATAOUT_TIMEOUT; + a->dataout_timeout_retries = NA_DATAOUT_TIMEOUT_RETRIES; + a->nopin_timeout = NA_NOPIN_TIMEOUT; + a->nopin_response_timeout = NA_NOPIN_RESPONSE_TIMEOUT; + a->random_datain_pdu_offsets = NA_RANDOM_DATAIN_PDU_OFFSETS; + a->random_datain_seq_offsets = NA_RANDOM_DATAIN_SEQ_OFFSETS; + a->random_r2t_offsets = NA_RANDOM_R2T_OFFSETS; + a->default_erl = NA_DEFAULT_ERL; +} + +extern int iscsit_na_dataout_timeout( + struct iscsi_node_acl *acl, + u32 dataout_timeout) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (dataout_timeout > NA_DATAOUT_TIMEOUT_MAX) { + pr_err("Requested DataOut Timeout %u larger than" + " maximum %u\n", dataout_timeout, + NA_DATAOUT_TIMEOUT_MAX); + return -EINVAL; + } else if (dataout_timeout < NA_DATAOUT_TIMEOUT_MIX) { + pr_err("Requested DataOut Timeout %u smaller than" + " minimum %u\n", dataout_timeout, + NA_DATAOUT_TIMEOUT_MIX); + return -EINVAL; + } + + a->dataout_timeout = dataout_timeout; + pr_debug("Set DataOut Timeout to %u for Initiator Node" + " %s\n", a->dataout_timeout, iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_dataout_timeout_retries( + struct iscsi_node_acl *acl, + u32 dataout_timeout_retries) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (dataout_timeout_retries > NA_DATAOUT_TIMEOUT_RETRIES_MAX) { + pr_err("Requested DataOut Timeout Retries %u larger" + " than maximum %u", dataout_timeout_retries, + NA_DATAOUT_TIMEOUT_RETRIES_MAX); + return -EINVAL; + } else if (dataout_timeout_retries < NA_DATAOUT_TIMEOUT_RETRIES_MIN) { + pr_err("Requested DataOut Timeout Retries %u smaller" + " than minimum %u", dataout_timeout_retries, + NA_DATAOUT_TIMEOUT_RETRIES_MIN); + return -EINVAL; + } + + a->dataout_timeout_retries = dataout_timeout_retries; + pr_debug("Set DataOut Timeout Retries to %u for" + " Initiator Node %s\n", a->dataout_timeout_retries, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_nopin_timeout( + struct iscsi_node_acl *acl, + u32 nopin_timeout) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + struct iscsi_session *sess; + struct iscsi_conn *conn; + struct se_node_acl *se_nacl = &a->nacl->se_node_acl; + struct se_session *se_sess; + u32 orig_nopin_timeout = a->nopin_timeout; + + if (nopin_timeout > NA_NOPIN_TIMEOUT_MAX) { + pr_err("Requested NopIn Timeout %u larger than maximum" + " %u\n", nopin_timeout, NA_NOPIN_TIMEOUT_MAX); + return -EINVAL; + } else if ((nopin_timeout < NA_NOPIN_TIMEOUT_MIN) && + (nopin_timeout != 0)) { + pr_err("Requested NopIn Timeout %u smaller than" + " minimum %u and not 0\n", nopin_timeout, + NA_NOPIN_TIMEOUT_MIN); + return -EINVAL; + } + + a->nopin_timeout = nopin_timeout; + pr_debug("Set NopIn Timeout to %u for Initiator" + " Node %s\n", a->nopin_timeout, + iscsit_na_get_initiatorname(acl)); + /* + * Reenable disabled nopin_timeout timer for all iSCSI connections. + */ + if (!orig_nopin_timeout) { + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + + spin_lock(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, + conn_list) { + if (conn->conn_state != + TARG_CONN_STATE_LOGGED_IN) + continue; + + spin_lock(&conn->nopin_timer_lock); + __iscsit_start_nopin_timer(conn); + spin_unlock(&conn->nopin_timer_lock); + } + spin_unlock(&sess->conn_lock); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + } + + return 0; +} + +extern int iscsit_na_nopin_response_timeout( + struct iscsi_node_acl *acl, + u32 nopin_response_timeout) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (nopin_response_timeout > NA_NOPIN_RESPONSE_TIMEOUT_MAX) { + pr_err("Requested NopIn Response Timeout %u larger" + " than maximum %u\n", nopin_response_timeout, + NA_NOPIN_RESPONSE_TIMEOUT_MAX); + return -EINVAL; + } else if (nopin_response_timeout < NA_NOPIN_RESPONSE_TIMEOUT_MIN) { + pr_err("Requested NopIn Response Timeout %u smaller" + " than minimum %u\n", nopin_response_timeout, + NA_NOPIN_RESPONSE_TIMEOUT_MIN); + return -EINVAL; + } + + a->nopin_response_timeout = nopin_response_timeout; + pr_debug("Set NopIn Response Timeout to %u for" + " Initiator Node %s\n", a->nopin_timeout, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_random_datain_pdu_offsets( + struct iscsi_node_acl *acl, + u32 random_datain_pdu_offsets) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (random_datain_pdu_offsets != 0 && random_datain_pdu_offsets != 1) { + pr_err("Requested Random DataIN PDU Offsets: %u not" + " 0 or 1\n", random_datain_pdu_offsets); + return -EINVAL; + } + + a->random_datain_pdu_offsets = random_datain_pdu_offsets; + pr_debug("Set Random DataIN PDU Offsets to %u for" + " Initiator Node %s\n", a->random_datain_pdu_offsets, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_random_datain_seq_offsets( + struct iscsi_node_acl *acl, + u32 random_datain_seq_offsets) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (random_datain_seq_offsets != 0 && random_datain_seq_offsets != 1) { + pr_err("Requested Random DataIN Sequence Offsets: %u" + " not 0 or 1\n", random_datain_seq_offsets); + return -EINVAL; + } + + a->random_datain_seq_offsets = random_datain_seq_offsets; + pr_debug("Set Random DataIN Sequence Offsets to %u for" + " Initiator Node %s\n", a->random_datain_seq_offsets, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_random_r2t_offsets( + struct iscsi_node_acl *acl, + u32 random_r2t_offsets) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (random_r2t_offsets != 0 && random_r2t_offsets != 1) { + pr_err("Requested Random R2T Offsets: %u not" + " 0 or 1\n", random_r2t_offsets); + return -EINVAL; + } + + a->random_r2t_offsets = random_r2t_offsets; + pr_debug("Set Random R2T Offsets to %u for" + " Initiator Node %s\n", a->random_r2t_offsets, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_default_erl( + struct iscsi_node_acl *acl, + u32 default_erl) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (default_erl != 0 && default_erl != 1 && default_erl != 2) { + pr_err("Requested default ERL: %u not 0, 1, or 2\n", + default_erl); + return -EINVAL; + } + + a->default_erl = default_erl; + pr_debug("Set use ERL0 flag to %u for Initiator" + " Node %s\n", a->default_erl, + iscsit_na_get_initiatorname(acl)); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.h b/drivers/target/iscsi/iscsi_target_nodeattrib.h new file mode 100644 index 000000000000..c970b326ef23 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.h @@ -0,0 +1,14 @@ +#ifndef ISCSI_TARGET_NODEATTRIB_H +#define ISCSI_TARGET_NODEATTRIB_H + +extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *); +extern int iscsit_na_dataout_timeout(struct iscsi_node_acl *, u32); +extern int iscsit_na_dataout_timeout_retries(struct iscsi_node_acl *, u32); +extern int iscsit_na_nopin_timeout(struct iscsi_node_acl *, u32); +extern int iscsit_na_nopin_response_timeout(struct iscsi_node_acl *, u32); +extern int iscsit_na_random_datain_pdu_offsets(struct iscsi_node_acl *, u32); +extern int iscsit_na_random_datain_seq_offsets(struct iscsi_node_acl *, u32); +extern int iscsit_na_random_r2t_offsets(struct iscsi_node_acl *, u32); +extern int iscsit_na_default_erl(struct iscsi_node_acl *, u32); + +#endif /* ISCSI_TARGET_NODEATTRIB_H */ diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c new file mode 100644 index 000000000000..252e246cf51e --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -0,0 +1,1905 @@ +/******************************************************************************* + * This file contains main functions related to iSCSI Parameter negotiation. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/slab.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_util.h" +#include "iscsi_target_parameters.h" + +int iscsi_login_rx_data( + struct iscsi_conn *conn, + char *buf, + int length) +{ + int rx_got; + struct kvec iov; + + memset(&iov, 0, sizeof(struct kvec)); + iov.iov_len = length; + iov.iov_base = buf; + + /* + * Initial Marker-less Interval. + * Add the values regardless of IFMarker/OFMarker, considering + * it may not be negoitated yet. + */ + conn->of_marker += length; + + rx_got = rx_data(conn, &iov, 1, length); + if (rx_got != length) { + pr_err("rx_data returned %d, expecting %d.\n", + rx_got, length); + return -1; + } + + return 0 ; +} + +int iscsi_login_tx_data( + struct iscsi_conn *conn, + char *pdu_buf, + char *text_buf, + int text_length) +{ + int length, tx_sent; + struct kvec iov[2]; + + length = (ISCSI_HDR_LEN + text_length); + + memset(&iov[0], 0, 2 * sizeof(struct kvec)); + iov[0].iov_len = ISCSI_HDR_LEN; + iov[0].iov_base = pdu_buf; + iov[1].iov_len = text_length; + iov[1].iov_base = text_buf; + + /* + * Initial Marker-less Interval. + * Add the values regardless of IFMarker/OFMarker, considering + * it may not be negoitated yet. + */ + conn->if_marker += length; + + tx_sent = tx_data(conn, &iov[0], 2, length); + if (tx_sent != length) { + pr_err("tx_data returned %d, expecting %d.\n", + tx_sent, length); + return -1; + } + + return 0; +} + +void iscsi_dump_conn_ops(struct iscsi_conn_ops *conn_ops) +{ + pr_debug("HeaderDigest: %s\n", (conn_ops->HeaderDigest) ? + "CRC32C" : "None"); + pr_debug("DataDigest: %s\n", (conn_ops->DataDigest) ? + "CRC32C" : "None"); + pr_debug("MaxRecvDataSegmentLength: %u\n", + conn_ops->MaxRecvDataSegmentLength); + pr_debug("OFMarker: %s\n", (conn_ops->OFMarker) ? "Yes" : "No"); + pr_debug("IFMarker: %s\n", (conn_ops->IFMarker) ? "Yes" : "No"); + if (conn_ops->OFMarker) + pr_debug("OFMarkInt: %u\n", conn_ops->OFMarkInt); + if (conn_ops->IFMarker) + pr_debug("IFMarkInt: %u\n", conn_ops->IFMarkInt); +} + +void iscsi_dump_sess_ops(struct iscsi_sess_ops *sess_ops) +{ + pr_debug("InitiatorName: %s\n", sess_ops->InitiatorName); + pr_debug("InitiatorAlias: %s\n", sess_ops->InitiatorAlias); + pr_debug("TargetName: %s\n", sess_ops->TargetName); + pr_debug("TargetAlias: %s\n", sess_ops->TargetAlias); + pr_debug("TargetPortalGroupTag: %hu\n", + sess_ops->TargetPortalGroupTag); + pr_debug("MaxConnections: %hu\n", sess_ops->MaxConnections); + pr_debug("InitialR2T: %s\n", + (sess_ops->InitialR2T) ? "Yes" : "No"); + pr_debug("ImmediateData: %s\n", (sess_ops->ImmediateData) ? + "Yes" : "No"); + pr_debug("MaxBurstLength: %u\n", sess_ops->MaxBurstLength); + pr_debug("FirstBurstLength: %u\n", sess_ops->FirstBurstLength); + pr_debug("DefaultTime2Wait: %hu\n", sess_ops->DefaultTime2Wait); + pr_debug("DefaultTime2Retain: %hu\n", + sess_ops->DefaultTime2Retain); + pr_debug("MaxOutstandingR2T: %hu\n", + sess_ops->MaxOutstandingR2T); + pr_debug("DataPDUInOrder: %s\n", + (sess_ops->DataPDUInOrder) ? "Yes" : "No"); + pr_debug("DataSequenceInOrder: %s\n", + (sess_ops->DataSequenceInOrder) ? "Yes" : "No"); + pr_debug("ErrorRecoveryLevel: %hu\n", + sess_ops->ErrorRecoveryLevel); + pr_debug("SessionType: %s\n", (sess_ops->SessionType) ? + "Discovery" : "Normal"); +} + +void iscsi_print_params(struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + list_for_each_entry(param, ¶m_list->param_list, p_list) + pr_debug("%s: %s\n", param->name, param->value); +} + +static struct iscsi_param *iscsi_set_default_param(struct iscsi_param_list *param_list, + char *name, char *value, u8 phase, u8 scope, u8 sender, + u16 type_range, u8 use) +{ + struct iscsi_param *param = NULL; + + param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL); + if (!param) { + pr_err("Unable to allocate memory for parameter.\n"); + goto out; + } + INIT_LIST_HEAD(¶m->p_list); + + param->name = kzalloc(strlen(name) + 1, GFP_KERNEL); + if (!param->name) { + pr_err("Unable to allocate memory for parameter name.\n"); + goto out; + } + + param->value = kzalloc(strlen(value) + 1, GFP_KERNEL); + if (!param->value) { + pr_err("Unable to allocate memory for parameter value.\n"); + goto out; + } + + memcpy(param->name, name, strlen(name)); + param->name[strlen(name)] = '\0'; + memcpy(param->value, value, strlen(value)); + param->value[strlen(value)] = '\0'; + param->phase = phase; + param->scope = scope; + param->sender = sender; + param->use = use; + param->type_range = type_range; + + switch (param->type_range) { + case TYPERANGE_BOOL_AND: + param->type = TYPE_BOOL_AND; + break; + case TYPERANGE_BOOL_OR: + param->type = TYPE_BOOL_OR; + break; + case TYPERANGE_0_TO_2: + case TYPERANGE_0_TO_3600: + case TYPERANGE_0_TO_32767: + case TYPERANGE_0_TO_65535: + case TYPERANGE_1_TO_65535: + case TYPERANGE_2_TO_3600: + case TYPERANGE_512_TO_16777215: + param->type = TYPE_NUMBER; + break; + case TYPERANGE_AUTH: + case TYPERANGE_DIGEST: + param->type = TYPE_VALUE_LIST | TYPE_STRING; + break; + case TYPERANGE_MARKINT: + param->type = TYPE_NUMBER_RANGE; + param->type_range |= TYPERANGE_1_TO_65535; + break; + case TYPERANGE_ISCSINAME: + case TYPERANGE_SESSIONTYPE: + case TYPERANGE_TARGETADDRESS: + case TYPERANGE_UTF8: + param->type = TYPE_STRING; + break; + default: + pr_err("Unknown type_range 0x%02x\n", + param->type_range); + goto out; + } + list_add_tail(¶m->p_list, ¶m_list->param_list); + + return param; +out: + if (param) { + kfree(param->value); + kfree(param->name); + kfree(param); + } + + return NULL; +} + +/* #warning Add extension keys */ +int iscsi_create_default_params(struct iscsi_param_list **param_list_ptr) +{ + struct iscsi_param *param = NULL; + struct iscsi_param_list *pl; + + pl = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL); + if (!pl) { + pr_err("Unable to allocate memory for" + " struct iscsi_param_list.\n"); + return -1 ; + } + INIT_LIST_HEAD(&pl->param_list); + INIT_LIST_HEAD(&pl->extra_response_list); + + /* + * The format for setting the initial parameter definitions are: + * + * Parameter name: + * Initial value: + * Allowable phase: + * Scope: + * Allowable senders: + * Typerange: + * Use: + */ + param = iscsi_set_default_param(pl, AUTHMETHOD, INITIAL_AUTHMETHOD, + PHASE_SECURITY, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_AUTH, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, HEADERDIGEST, INITIAL_HEADERDIGEST, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_DIGEST, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DATADIGEST, INITIAL_DATADIGEST, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_DIGEST, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, MAXCONNECTIONS, + INITIAL_MAXCONNECTIONS, PHASE_OPERATIONAL, + SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_1_TO_65535, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, SENDTARGETS, INITIAL_SENDTARGETS, + PHASE_FFP0, SCOPE_SESSION_WIDE, SENDER_INITIATOR, + TYPERANGE_UTF8, 0); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, TARGETNAME, INITIAL_TARGETNAME, + PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_ISCSINAME, USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, INITIATORNAME, + INITIAL_INITIATORNAME, PHASE_DECLARATIVE, + SCOPE_SESSION_WIDE, SENDER_INITIATOR, + TYPERANGE_ISCSINAME, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, TARGETALIAS, INITIAL_TARGETALIAS, + PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET, + TYPERANGE_UTF8, USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, INITIATORALIAS, + INITIAL_INITIATORALIAS, PHASE_DECLARATIVE, + SCOPE_SESSION_WIDE, SENDER_INITIATOR, TYPERANGE_UTF8, + USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, TARGETADDRESS, + INITIAL_TARGETADDRESS, PHASE_DECLARATIVE, + SCOPE_SESSION_WIDE, SENDER_TARGET, + TYPERANGE_TARGETADDRESS, USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, TARGETPORTALGROUPTAG, + INITIAL_TARGETPORTALGROUPTAG, + PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET, + TYPERANGE_0_TO_65535, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, INITIALR2T, INITIAL_INITIALR2T, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_BOOL_OR, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, IMMEDIATEDATA, + INITIAL_IMMEDIATEDATA, PHASE_OPERATIONAL, + SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_AND, + USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, MAXRECVDATASEGMENTLENGTH, + INITIAL_MAXRECVDATASEGMENTLENGTH, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_512_TO_16777215, USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, MAXBURSTLENGTH, + INITIAL_MAXBURSTLENGTH, PHASE_OPERATIONAL, + SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_512_TO_16777215, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, FIRSTBURSTLENGTH, + INITIAL_FIRSTBURSTLENGTH, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_512_TO_16777215, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DEFAULTTIME2WAIT, + INITIAL_DEFAULTTIME2WAIT, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_0_TO_3600, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DEFAULTTIME2RETAIN, + INITIAL_DEFAULTTIME2RETAIN, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_0_TO_3600, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, MAXOUTSTANDINGR2T, + INITIAL_MAXOUTSTANDINGR2T, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_1_TO_65535, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DATAPDUINORDER, + INITIAL_DATAPDUINORDER, PHASE_OPERATIONAL, + SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_OR, + USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DATASEQUENCEINORDER, + INITIAL_DATASEQUENCEINORDER, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_BOOL_OR, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, ERRORRECOVERYLEVEL, + INITIAL_ERRORRECOVERYLEVEL, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_0_TO_2, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, SESSIONTYPE, INITIAL_SESSIONTYPE, + PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_INITIATOR, + TYPERANGE_SESSIONTYPE, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, IFMARKER, INITIAL_IFMARKER, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_BOOL_AND, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, OFMARKER, INITIAL_OFMARKER, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_BOOL_AND, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, IFMARKINT, INITIAL_IFMARKINT, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_MARKINT, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, OFMARKINT, INITIAL_OFMARKINT, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_MARKINT, USE_INITIAL_ONLY); + if (!param) + goto out; + + *param_list_ptr = pl; + return 0; +out: + iscsi_release_param_list(pl); + return -1; +} + +int iscsi_set_keys_to_negotiate( + int sessiontype, + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + param->state = 0; + if (!strcmp(param->name, AUTHMETHOD)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, HEADERDIGEST)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DATADIGEST)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, MAXCONNECTIONS)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, TARGETNAME)) { + continue; + } else if (!strcmp(param->name, INITIATORNAME)) { + continue; + } else if (!strcmp(param->name, TARGETALIAS)) { + if (param->value) + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, INITIATORALIAS)) { + continue; + } else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, INITIALR2T)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, IMMEDIATEDATA)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, MAXBURSTLENGTH)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, FIRSTBURSTLENGTH)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DATAPDUINORDER)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DATASEQUENCEINORDER)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, SESSIONTYPE)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, IFMARKER)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, OFMARKER)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, IFMARKINT)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, OFMARKINT)) { + SET_PSTATE_NEGOTIATE(param); + } + } + + return 0; +} + +int iscsi_set_keys_irrelevant_for_discovery( + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!strcmp(param->name, MAXCONNECTIONS)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, INITIALR2T)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, IMMEDIATEDATA)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, MAXBURSTLENGTH)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, FIRSTBURSTLENGTH)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, DATAPDUINORDER)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, DATASEQUENCEINORDER)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, DEFAULTTIME2WAIT)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, IFMARKER)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, OFMARKER)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, IFMARKINT)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, OFMARKINT)) + param->state &= ~PSTATE_NEGOTIATE; + } + + return 0; +} + +int iscsi_copy_param_list( + struct iscsi_param_list **dst_param_list, + struct iscsi_param_list *src_param_list, + int leading) +{ + struct iscsi_param *new_param = NULL, *param = NULL; + struct iscsi_param_list *param_list = NULL; + + param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL); + if (!param_list) { + pr_err("Unable to allocate memory for" + " struct iscsi_param_list.\n"); + goto err_out; + } + INIT_LIST_HEAD(¶m_list->param_list); + INIT_LIST_HEAD(¶m_list->extra_response_list); + + list_for_each_entry(param, &src_param_list->param_list, p_list) { + if (!leading && (param->scope & SCOPE_SESSION_WIDE)) { + if ((strcmp(param->name, "TargetName") != 0) && + (strcmp(param->name, "InitiatorName") != 0) && + (strcmp(param->name, "TargetPortalGroupTag") != 0)) + continue; + } + + new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL); + if (!new_param) { + pr_err("Unable to allocate memory for" + " struct iscsi_param.\n"); + goto err_out; + } + + new_param->set_param = param->set_param; + new_param->phase = param->phase; + new_param->scope = param->scope; + new_param->sender = param->sender; + new_param->type = param->type; + new_param->use = param->use; + new_param->type_range = param->type_range; + + new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL); + if (!new_param->name) { + pr_err("Unable to allocate memory for" + " parameter name.\n"); + goto err_out; + } + + new_param->value = kzalloc(strlen(param->value) + 1, + GFP_KERNEL); + if (!new_param->value) { + pr_err("Unable to allocate memory for" + " parameter value.\n"); + goto err_out; + } + + memcpy(new_param->name, param->name, strlen(param->name)); + new_param->name[strlen(param->name)] = '\0'; + memcpy(new_param->value, param->value, strlen(param->value)); + new_param->value[strlen(param->value)] = '\0'; + + list_add_tail(&new_param->p_list, ¶m_list->param_list); + } + + if (!list_empty(¶m_list->param_list)) + *dst_param_list = param_list; + else { + pr_err("No parameters allocated.\n"); + goto err_out; + } + + return 0; + +err_out: + iscsi_release_param_list(param_list); + return -1; +} + +static void iscsi_release_extra_responses(struct iscsi_param_list *param_list) +{ + struct iscsi_extra_response *er, *er_tmp; + + list_for_each_entry_safe(er, er_tmp, ¶m_list->extra_response_list, + er_list) { + list_del(&er->er_list); + kfree(er); + } +} + +void iscsi_release_param_list(struct iscsi_param_list *param_list) +{ + struct iscsi_param *param, *param_tmp; + + list_for_each_entry_safe(param, param_tmp, ¶m_list->param_list, + p_list) { + list_del(¶m->p_list); + + kfree(param->name); + param->name = NULL; + kfree(param->value); + param->value = NULL; + kfree(param); + param = NULL; + } + + iscsi_release_extra_responses(param_list); + + kfree(param_list); +} + +struct iscsi_param *iscsi_find_param_from_key( + char *key, + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + if (!key || !param_list) { + pr_err("Key or parameter list pointer is NULL.\n"); + return NULL; + } + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!strcmp(key, param->name)) + return param; + } + + pr_err("Unable to locate key \"%s\".\n", key); + return NULL; +} + +int iscsi_extract_key_value(char *textbuf, char **key, char **value) +{ + *value = strchr(textbuf, '='); + if (!*value) { + pr_err("Unable to locate \"=\" seperator for key," + " ignoring request.\n"); + return -1; + } + + *key = textbuf; + **value = '\0'; + *value = *value + 1; + + return 0; +} + +int iscsi_update_param_value(struct iscsi_param *param, char *value) +{ + kfree(param->value); + + param->value = kzalloc(strlen(value) + 1, GFP_KERNEL); + if (!param->value) { + pr_err("Unable to allocate memory for value.\n"); + return -1; + } + + memcpy(param->value, value, strlen(value)); + param->value[strlen(value)] = '\0'; + + pr_debug("iSCSI Parameter updated to %s=%s\n", + param->name, param->value); + return 0; +} + +static int iscsi_add_notunderstood_response( + char *key, + char *value, + struct iscsi_param_list *param_list) +{ + struct iscsi_extra_response *extra_response; + + if (strlen(value) > VALUE_MAXLEN) { + pr_err("Value for notunderstood key \"%s\" exceeds %d," + " protocol error.\n", key, VALUE_MAXLEN); + return -1; + } + + extra_response = kzalloc(sizeof(struct iscsi_extra_response), GFP_KERNEL); + if (!extra_response) { + pr_err("Unable to allocate memory for" + " struct iscsi_extra_response.\n"); + return -1; + } + INIT_LIST_HEAD(&extra_response->er_list); + + strncpy(extra_response->key, key, strlen(key) + 1); + strncpy(extra_response->value, NOTUNDERSTOOD, + strlen(NOTUNDERSTOOD) + 1); + + list_add_tail(&extra_response->er_list, + ¶m_list->extra_response_list); + return 0; +} + +static int iscsi_check_for_auth_key(char *key) +{ + /* + * RFC 1994 + */ + if (!strcmp(key, "CHAP_A") || !strcmp(key, "CHAP_I") || + !strcmp(key, "CHAP_C") || !strcmp(key, "CHAP_N") || + !strcmp(key, "CHAP_R")) + return 1; + + /* + * RFC 2945 + */ + if (!strcmp(key, "SRP_U") || !strcmp(key, "SRP_N") || + !strcmp(key, "SRP_g") || !strcmp(key, "SRP_s") || + !strcmp(key, "SRP_A") || !strcmp(key, "SRP_B") || + !strcmp(key, "SRP_M") || !strcmp(key, "SRP_HM")) + return 1; + + return 0; +} + +static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param) +{ + if (IS_TYPE_BOOL_AND(param)) { + if (!strcmp(param->value, NO)) + SET_PSTATE_REPLY_OPTIONAL(param); + } else if (IS_TYPE_BOOL_OR(param)) { + if (!strcmp(param->value, YES)) + SET_PSTATE_REPLY_OPTIONAL(param); + /* + * Required for gPXE iSCSI boot client + */ + if (!strcmp(param->name, IMMEDIATEDATA)) + SET_PSTATE_REPLY_OPTIONAL(param); + } else if (IS_TYPE_NUMBER(param)) { + if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + /* + * The GlobalSAN iSCSI Initiator for MacOSX does + * not respond to MaxBurstLength, FirstBurstLength, + * DefaultTime2Wait or DefaultTime2Retain parameter keys. + * So, we set them to 'reply optional' here, and assume the + * the defaults from iscsi_parameters.h if the initiator + * is not RFC compliant and the keys are not negotiated. + */ + if (!strcmp(param->name, MAXBURSTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + if (!strcmp(param->name, FIRSTBURSTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + if (!strcmp(param->name, DEFAULTTIME2WAIT)) + SET_PSTATE_REPLY_OPTIONAL(param); + if (!strcmp(param->name, DEFAULTTIME2RETAIN)) + SET_PSTATE_REPLY_OPTIONAL(param); + /* + * Required for gPXE iSCSI boot client + */ + if (!strcmp(param->name, MAXCONNECTIONS)) + SET_PSTATE_REPLY_OPTIONAL(param); + } else if (IS_PHASE_DECLARATIVE(param)) + SET_PSTATE_REPLY_OPTIONAL(param); +} + +static int iscsi_check_boolean_value(struct iscsi_param *param, char *value) +{ + if (strcmp(value, YES) && strcmp(value, NO)) { + pr_err("Illegal value for \"%s\", must be either" + " \"%s\" or \"%s\".\n", param->name, YES, NO); + return -1; + } + + return 0; +} + +static int iscsi_check_numerical_value(struct iscsi_param *param, char *value_ptr) +{ + char *tmpptr; + int value = 0; + + value = simple_strtoul(value_ptr, &tmpptr, 0); + +/* #warning FIXME: Fix this */ +#if 0 + if (strspn(endptr, WHITE_SPACE) != strlen(endptr)) { + pr_err("Illegal value \"%s\" for \"%s\".\n", + value, param->name); + return -1; + } +#endif + if (IS_TYPERANGE_0_TO_2(param)) { + if ((value < 0) || (value > 2)) { + pr_err("Illegal value for \"%s\", must be" + " between 0 and 2.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_0_TO_3600(param)) { + if ((value < 0) || (value > 3600)) { + pr_err("Illegal value for \"%s\", must be" + " between 0 and 3600.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_0_TO_32767(param)) { + if ((value < 0) || (value > 32767)) { + pr_err("Illegal value for \"%s\", must be" + " between 0 and 32767.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_0_TO_65535(param)) { + if ((value < 0) || (value > 65535)) { + pr_err("Illegal value for \"%s\", must be" + " between 0 and 65535.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_1_TO_65535(param)) { + if ((value < 1) || (value > 65535)) { + pr_err("Illegal value for \"%s\", must be" + " between 1 and 65535.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_2_TO_3600(param)) { + if ((value < 2) || (value > 3600)) { + pr_err("Illegal value for \"%s\", must be" + " between 2 and 3600.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_512_TO_16777215(param)) { + if ((value < 512) || (value > 16777215)) { + pr_err("Illegal value for \"%s\", must be" + " between 512 and 16777215.\n", param->name); + return -1; + } + return 0; + } + + return 0; +} + +static int iscsi_check_numerical_range_value(struct iscsi_param *param, char *value) +{ + char *left_val_ptr = NULL, *right_val_ptr = NULL; + char *tilde_ptr = NULL, *tmp_ptr = NULL; + u32 left_val, right_val, local_left_val, local_right_val; + + if (strcmp(param->name, IFMARKINT) && + strcmp(param->name, OFMARKINT)) { + pr_err("Only parameters \"%s\" or \"%s\" may contain a" + " numerical range value.\n", IFMARKINT, OFMARKINT); + return -1; + } + + if (IS_PSTATE_PROPOSER(param)) + return 0; + + tilde_ptr = strchr(value, '~'); + if (!tilde_ptr) { + pr_err("Unable to locate numerical range indicator" + " \"~\" for \"%s\".\n", param->name); + return -1; + } + *tilde_ptr = '\0'; + + left_val_ptr = value; + right_val_ptr = value + strlen(left_val_ptr) + 1; + + if (iscsi_check_numerical_value(param, left_val_ptr) < 0) + return -1; + if (iscsi_check_numerical_value(param, right_val_ptr) < 0) + return -1; + + left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0); + right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0); + *tilde_ptr = '~'; + + if (right_val < left_val) { + pr_err("Numerical range for parameter \"%s\" contains" + " a right value which is less than the left.\n", + param->name); + return -1; + } + + /* + * For now, enforce reasonable defaults for [I,O]FMarkInt. + */ + tilde_ptr = strchr(param->value, '~'); + if (!tilde_ptr) { + pr_err("Unable to locate numerical range indicator" + " \"~\" for \"%s\".\n", param->name); + return -1; + } + *tilde_ptr = '\0'; + + left_val_ptr = param->value; + right_val_ptr = param->value + strlen(left_val_ptr) + 1; + + local_left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0); + local_right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0); + *tilde_ptr = '~'; + + if (param->set_param) { + if ((left_val < local_left_val) || + (right_val < local_left_val)) { + pr_err("Passed value range \"%u~%u\" is below" + " minimum left value \"%u\" for key \"%s\"," + " rejecting.\n", left_val, right_val, + local_left_val, param->name); + return -1; + } + } else { + if ((left_val < local_left_val) && + (right_val < local_left_val)) { + pr_err("Received value range \"%u~%u\" is" + " below minimum left value \"%u\" for key" + " \"%s\", rejecting.\n", left_val, right_val, + local_left_val, param->name); + SET_PSTATE_REJECT(param); + if (iscsi_update_param_value(param, REJECT) < 0) + return -1; + } + } + + return 0; +} + +static int iscsi_check_string_or_list_value(struct iscsi_param *param, char *value) +{ + if (IS_PSTATE_PROPOSER(param)) + return 0; + + if (IS_TYPERANGE_AUTH_PARAM(param)) { + if (strcmp(value, KRB5) && strcmp(value, SPKM1) && + strcmp(value, SPKM2) && strcmp(value, SRP) && + strcmp(value, CHAP) && strcmp(value, NONE)) { + pr_err("Illegal value for \"%s\", must be" + " \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"" + " or \"%s\".\n", param->name, KRB5, + SPKM1, SPKM2, SRP, CHAP, NONE); + return -1; + } + } + if (IS_TYPERANGE_DIGEST_PARAM(param)) { + if (strcmp(value, CRC32C) && strcmp(value, NONE)) { + pr_err("Illegal value for \"%s\", must be" + " \"%s\" or \"%s\".\n", param->name, + CRC32C, NONE); + return -1; + } + } + if (IS_TYPERANGE_SESSIONTYPE(param)) { + if (strcmp(value, DISCOVERY) && strcmp(value, NORMAL)) { + pr_err("Illegal value for \"%s\", must be" + " \"%s\" or \"%s\".\n", param->name, + DISCOVERY, NORMAL); + return -1; + } + } + + return 0; +} + +/* + * This function is used to pick a value range number, currently just + * returns the lesser of both right values. + */ +static char *iscsi_get_value_from_number_range( + struct iscsi_param *param, + char *value) +{ + char *end_ptr, *tilde_ptr1 = NULL, *tilde_ptr2 = NULL; + u32 acceptor_right_value, proposer_right_value; + + tilde_ptr1 = strchr(value, '~'); + if (!tilde_ptr1) + return NULL; + *tilde_ptr1++ = '\0'; + proposer_right_value = simple_strtoul(tilde_ptr1, &end_ptr, 0); + + tilde_ptr2 = strchr(param->value, '~'); + if (!tilde_ptr2) + return NULL; + *tilde_ptr2++ = '\0'; + acceptor_right_value = simple_strtoul(tilde_ptr2, &end_ptr, 0); + + return (acceptor_right_value >= proposer_right_value) ? + tilde_ptr1 : tilde_ptr2; +} + +static char *iscsi_check_valuelist_for_support( + struct iscsi_param *param, + char *value) +{ + char *tmp1 = NULL, *tmp2 = NULL; + char *acceptor_values = NULL, *proposer_values = NULL; + + acceptor_values = param->value; + proposer_values = value; + + do { + if (!proposer_values) + return NULL; + tmp1 = strchr(proposer_values, ','); + if (tmp1) + *tmp1 = '\0'; + acceptor_values = param->value; + do { + if (!acceptor_values) { + if (tmp1) + *tmp1 = ','; + return NULL; + } + tmp2 = strchr(acceptor_values, ','); + if (tmp2) + *tmp2 = '\0'; + if (!acceptor_values || !proposer_values) { + if (tmp1) + *tmp1 = ','; + if (tmp2) + *tmp2 = ','; + return NULL; + } + if (!strcmp(acceptor_values, proposer_values)) { + if (tmp2) + *tmp2 = ','; + goto out; + } + if (tmp2) + *tmp2++ = ','; + + acceptor_values = tmp2; + if (!acceptor_values) + break; + } while (acceptor_values); + if (tmp1) + *tmp1++ = ','; + proposer_values = tmp1; + } while (proposer_values); + +out: + return proposer_values; +} + +static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value) +{ + u8 acceptor_boolean_value = 0, proposer_boolean_value = 0; + char *negoitated_value = NULL; + + if (IS_PSTATE_ACCEPTOR(param)) { + pr_err("Received key \"%s\" twice, protocol error.\n", + param->name); + return -1; + } + + if (IS_PSTATE_REJECT(param)) + return 0; + + if (IS_TYPE_BOOL_AND(param)) { + if (!strcmp(value, YES)) + proposer_boolean_value = 1; + if (!strcmp(param->value, YES)) + acceptor_boolean_value = 1; + if (acceptor_boolean_value && proposer_boolean_value) + do {} while (0); + else { + if (iscsi_update_param_value(param, NO) < 0) + return -1; + if (!proposer_boolean_value) + SET_PSTATE_REPLY_OPTIONAL(param); + } + } else if (IS_TYPE_BOOL_OR(param)) { + if (!strcmp(value, YES)) + proposer_boolean_value = 1; + if (!strcmp(param->value, YES)) + acceptor_boolean_value = 1; + if (acceptor_boolean_value || proposer_boolean_value) { + if (iscsi_update_param_value(param, YES) < 0) + return -1; + if (proposer_boolean_value) + SET_PSTATE_REPLY_OPTIONAL(param); + } + } else if (IS_TYPE_NUMBER(param)) { + char *tmpptr, buf[10]; + u32 acceptor_value = simple_strtoul(param->value, &tmpptr, 0); + u32 proposer_value = simple_strtoul(value, &tmpptr, 0); + + memset(buf, 0, 10); + + if (!strcmp(param->name, MAXCONNECTIONS) || + !strcmp(param->name, MAXBURSTLENGTH) || + !strcmp(param->name, FIRSTBURSTLENGTH) || + !strcmp(param->name, MAXOUTSTANDINGR2T) || + !strcmp(param->name, DEFAULTTIME2RETAIN) || + !strcmp(param->name, ERRORRECOVERYLEVEL)) { + if (proposer_value > acceptor_value) { + sprintf(buf, "%u", acceptor_value); + if (iscsi_update_param_value(param, + &buf[0]) < 0) + return -1; + } else { + if (iscsi_update_param_value(param, value) < 0) + return -1; + } + } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) { + if (acceptor_value > proposer_value) { + sprintf(buf, "%u", acceptor_value); + if (iscsi_update_param_value(param, + &buf[0]) < 0) + return -1; + } else { + if (iscsi_update_param_value(param, value) < 0) + return -1; + } + } else { + if (iscsi_update_param_value(param, value) < 0) + return -1; + } + + if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + } else if (IS_TYPE_NUMBER_RANGE(param)) { + negoitated_value = iscsi_get_value_from_number_range( + param, value); + if (!negoitated_value) + return -1; + if (iscsi_update_param_value(param, negoitated_value) < 0) + return -1; + } else if (IS_TYPE_VALUE_LIST(param)) { + negoitated_value = iscsi_check_valuelist_for_support( + param, value); + if (!negoitated_value) { + pr_err("Proposer's value list \"%s\" contains" + " no valid values from Acceptor's value list" + " \"%s\".\n", value, param->value); + return -1; + } + if (iscsi_update_param_value(param, negoitated_value) < 0) + return -1; + } else if (IS_PHASE_DECLARATIVE(param)) { + if (iscsi_update_param_value(param, value) < 0) + return -1; + SET_PSTATE_REPLY_OPTIONAL(param); + } + + return 0; +} + +static int iscsi_check_proposer_state(struct iscsi_param *param, char *value) +{ + if (IS_PSTATE_RESPONSE_GOT(param)) { + pr_err("Received key \"%s\" twice, protocol error.\n", + param->name); + return -1; + } + + if (IS_TYPE_NUMBER_RANGE(param)) { + u32 left_val = 0, right_val = 0, recieved_value = 0; + char *left_val_ptr = NULL, *right_val_ptr = NULL; + char *tilde_ptr = NULL, *tmp_ptr = NULL; + + if (!strcmp(value, IRRELEVANT) || !strcmp(value, REJECT)) { + if (iscsi_update_param_value(param, value) < 0) + return -1; + return 0; + } + + tilde_ptr = strchr(value, '~'); + if (tilde_ptr) { + pr_err("Illegal \"~\" in response for \"%s\".\n", + param->name); + return -1; + } + tilde_ptr = strchr(param->value, '~'); + if (!tilde_ptr) { + pr_err("Unable to locate numerical range" + " indicator \"~\" for \"%s\".\n", param->name); + return -1; + } + *tilde_ptr = '\0'; + + left_val_ptr = param->value; + right_val_ptr = param->value + strlen(left_val_ptr) + 1; + left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0); + right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0); + recieved_value = simple_strtoul(value, &tmp_ptr, 0); + + *tilde_ptr = '~'; + + if ((recieved_value < left_val) || + (recieved_value > right_val)) { + pr_err("Illegal response \"%s=%u\", value must" + " be between %u and %u.\n", param->name, + recieved_value, left_val, right_val); + return -1; + } + } else if (IS_TYPE_VALUE_LIST(param)) { + char *comma_ptr = NULL, *tmp_ptr = NULL; + + comma_ptr = strchr(value, ','); + if (comma_ptr) { + pr_err("Illegal \",\" in response for \"%s\".\n", + param->name); + return -1; + } + + tmp_ptr = iscsi_check_valuelist_for_support(param, value); + if (!tmp_ptr) + return -1; + } + + if (iscsi_update_param_value(param, value) < 0) + return -1; + + return 0; +} + +static int iscsi_check_value(struct iscsi_param *param, char *value) +{ + char *comma_ptr = NULL; + + if (!strcmp(value, REJECT)) { + if (!strcmp(param->name, IFMARKINT) || + !strcmp(param->name, OFMARKINT)) { + /* + * Reject is not fatal for [I,O]FMarkInt, and causes + * [I,O]FMarker to be reset to No. (See iSCSI v20 A.3.2) + */ + SET_PSTATE_REJECT(param); + return 0; + } + pr_err("Received %s=%s\n", param->name, value); + return -1; + } + if (!strcmp(value, IRRELEVANT)) { + pr_debug("Received %s=%s\n", param->name, value); + SET_PSTATE_IRRELEVANT(param); + return 0; + } + if (!strcmp(value, NOTUNDERSTOOD)) { + if (!IS_PSTATE_PROPOSER(param)) { + pr_err("Received illegal offer %s=%s\n", + param->name, value); + return -1; + } + +/* #warning FIXME: Add check for X-ExtensionKey here */ + pr_err("Standard iSCSI key \"%s\" cannot be answered" + " with \"%s\", protocol error.\n", param->name, value); + return -1; + } + + do { + comma_ptr = NULL; + comma_ptr = strchr(value, ','); + + if (comma_ptr && !IS_TYPE_VALUE_LIST(param)) { + pr_err("Detected value seperator \",\", but" + " key \"%s\" does not allow a value list," + " protocol error.\n", param->name); + return -1; + } + if (comma_ptr) + *comma_ptr = '\0'; + + if (strlen(value) > VALUE_MAXLEN) { + pr_err("Value for key \"%s\" exceeds %d," + " protocol error.\n", param->name, + VALUE_MAXLEN); + return -1; + } + + if (IS_TYPE_BOOL_AND(param) || IS_TYPE_BOOL_OR(param)) { + if (iscsi_check_boolean_value(param, value) < 0) + return -1; + } else if (IS_TYPE_NUMBER(param)) { + if (iscsi_check_numerical_value(param, value) < 0) + return -1; + } else if (IS_TYPE_NUMBER_RANGE(param)) { + if (iscsi_check_numerical_range_value(param, value) < 0) + return -1; + } else if (IS_TYPE_STRING(param) || IS_TYPE_VALUE_LIST(param)) { + if (iscsi_check_string_or_list_value(param, value) < 0) + return -1; + } else { + pr_err("Huh? 0x%02x\n", param->type); + return -1; + } + + if (comma_ptr) + *comma_ptr++ = ','; + + value = comma_ptr; + } while (value); + + return 0; +} + +static struct iscsi_param *__iscsi_check_key( + char *key, + int sender, + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + if (strlen(key) > KEY_MAXLEN) { + pr_err("Length of key name \"%s\" exceeds %d.\n", + key, KEY_MAXLEN); + return NULL; + } + + param = iscsi_find_param_from_key(key, param_list); + if (!param) + return NULL; + + if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) { + pr_err("Key \"%s\" may not be sent to %s," + " protocol error.\n", param->name, + (sender & SENDER_RECEIVER) ? "target" : "initiator"); + return NULL; + } + + if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) { + pr_err("Key \"%s\" may not be sent to %s," + " protocol error.\n", param->name, + (sender & SENDER_RECEIVER) ? "initiator" : "target"); + return NULL; + } + + return param; +} + +static struct iscsi_param *iscsi_check_key( + char *key, + int phase, + int sender, + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + /* + * Key name length must not exceed 63 bytes. (See iSCSI v20 5.1) + */ + if (strlen(key) > KEY_MAXLEN) { + pr_err("Length of key name \"%s\" exceeds %d.\n", + key, KEY_MAXLEN); + return NULL; + } + + param = iscsi_find_param_from_key(key, param_list); + if (!param) + return NULL; + + if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) { + pr_err("Key \"%s\" may not be sent to %s," + " protocol error.\n", param->name, + (sender & SENDER_RECEIVER) ? "target" : "initiator"); + return NULL; + } + if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) { + pr_err("Key \"%s\" may not be sent to %s," + " protocol error.\n", param->name, + (sender & SENDER_RECEIVER) ? "initiator" : "target"); + return NULL; + } + + if (IS_PSTATE_ACCEPTOR(param)) { + pr_err("Key \"%s\" received twice, protocol error.\n", + key); + return NULL; + } + + if (!phase) + return param; + + if (!(param->phase & phase)) { + pr_err("Key \"%s\" may not be negotiated during ", + param->name); + switch (phase) { + case PHASE_SECURITY: + pr_debug("Security phase.\n"); + break; + case PHASE_OPERATIONAL: + pr_debug("Operational phase.\n"); + default: + pr_debug("Unknown phase.\n"); + } + return NULL; + } + + return param; +} + +static int iscsi_enforce_integrity_rules( + u8 phase, + struct iscsi_param_list *param_list) +{ + char *tmpptr; + u8 DataSequenceInOrder = 0; + u8 ErrorRecoveryLevel = 0, SessionType = 0; + u8 IFMarker = 0, OFMarker = 0; + u8 IFMarkInt_Reject = 0, OFMarkInt_Reject = 0; + u32 FirstBurstLength = 0, MaxBurstLength = 0; + struct iscsi_param *param = NULL; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!(param->phase & phase)) + continue; + if (!strcmp(param->name, SESSIONTYPE)) + if (!strcmp(param->value, NORMAL)) + SessionType = 1; + if (!strcmp(param->name, ERRORRECOVERYLEVEL)) + ErrorRecoveryLevel = simple_strtoul(param->value, + &tmpptr, 0); + if (!strcmp(param->name, DATASEQUENCEINORDER)) + if (!strcmp(param->value, YES)) + DataSequenceInOrder = 1; + if (!strcmp(param->name, MAXBURSTLENGTH)) + MaxBurstLength = simple_strtoul(param->value, + &tmpptr, 0); + if (!strcmp(param->name, IFMARKER)) + if (!strcmp(param->value, YES)) + IFMarker = 1; + if (!strcmp(param->name, OFMARKER)) + if (!strcmp(param->value, YES)) + OFMarker = 1; + if (!strcmp(param->name, IFMARKINT)) + if (!strcmp(param->value, REJECT)) + IFMarkInt_Reject = 1; + if (!strcmp(param->name, OFMARKINT)) + if (!strcmp(param->value, REJECT)) + OFMarkInt_Reject = 1; + } + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!(param->phase & phase)) + continue; + if (!SessionType && (!IS_PSTATE_ACCEPTOR(param) && + (strcmp(param->name, IFMARKER) && + strcmp(param->name, OFMARKER) && + strcmp(param->name, IFMARKINT) && + strcmp(param->name, OFMARKINT)))) + continue; + if (!strcmp(param->name, MAXOUTSTANDINGR2T) && + DataSequenceInOrder && (ErrorRecoveryLevel > 0)) { + if (strcmp(param->value, "1")) { + if (iscsi_update_param_value(param, "1") < 0) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + } + if (!strcmp(param->name, MAXCONNECTIONS) && !SessionType) { + if (strcmp(param->value, "1")) { + if (iscsi_update_param_value(param, "1") < 0) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + } + if (!strcmp(param->name, FIRSTBURSTLENGTH)) { + FirstBurstLength = simple_strtoul(param->value, + &tmpptr, 0); + if (FirstBurstLength > MaxBurstLength) { + char tmpbuf[10]; + memset(tmpbuf, 0, 10); + sprintf(tmpbuf, "%u", MaxBurstLength); + if (iscsi_update_param_value(param, tmpbuf)) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + } + if (!strcmp(param->name, IFMARKER) && IFMarkInt_Reject) { + if (iscsi_update_param_value(param, NO) < 0) + return -1; + IFMarker = 0; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + if (!strcmp(param->name, OFMARKER) && OFMarkInt_Reject) { + if (iscsi_update_param_value(param, NO) < 0) + return -1; + OFMarker = 0; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + if (!strcmp(param->name, IFMARKINT) && !IFMarker) { + if (!strcmp(param->value, REJECT)) + continue; + param->state &= ~PSTATE_NEGOTIATE; + if (iscsi_update_param_value(param, IRRELEVANT) < 0) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + if (!strcmp(param->name, OFMARKINT) && !OFMarker) { + if (!strcmp(param->value, REJECT)) + continue; + param->state &= ~PSTATE_NEGOTIATE; + if (iscsi_update_param_value(param, IRRELEVANT) < 0) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + } + + return 0; +} + +int iscsi_decode_text_input( + u8 phase, + u8 sender, + char *textbuf, + u32 length, + struct iscsi_param_list *param_list) +{ + char *tmpbuf, *start = NULL, *end = NULL; + + tmpbuf = kzalloc(length + 1, GFP_KERNEL); + if (!tmpbuf) { + pr_err("Unable to allocate memory for tmpbuf.\n"); + return -1; + } + + memcpy(tmpbuf, textbuf, length); + tmpbuf[length] = '\0'; + start = tmpbuf; + end = (start + length); + + while (start < end) { + char *key, *value; + struct iscsi_param *param; + + if (iscsi_extract_key_value(start, &key, &value) < 0) { + kfree(tmpbuf); + return -1; + } + + pr_debug("Got key: %s=%s\n", key, value); + + if (phase & PHASE_SECURITY) { + if (iscsi_check_for_auth_key(key) > 0) { + char *tmpptr = key + strlen(key); + *tmpptr = '='; + kfree(tmpbuf); + return 1; + } + } + + param = iscsi_check_key(key, phase, sender, param_list); + if (!param) { + if (iscsi_add_notunderstood_response(key, + value, param_list) < 0) { + kfree(tmpbuf); + return -1; + } + start += strlen(key) + strlen(value) + 2; + continue; + } + if (iscsi_check_value(param, value) < 0) { + kfree(tmpbuf); + return -1; + } + + start += strlen(key) + strlen(value) + 2; + + if (IS_PSTATE_PROPOSER(param)) { + if (iscsi_check_proposer_state(param, value) < 0) { + kfree(tmpbuf); + return -1; + } + SET_PSTATE_RESPONSE_GOT(param); + } else { + if (iscsi_check_acceptor_state(param, value) < 0) { + kfree(tmpbuf); + return -1; + } + SET_PSTATE_ACCEPTOR(param); + } + } + + kfree(tmpbuf); + return 0; +} + +int iscsi_encode_text_output( + u8 phase, + u8 sender, + char *textbuf, + u32 *length, + struct iscsi_param_list *param_list) +{ + char *output_buf = NULL; + struct iscsi_extra_response *er; + struct iscsi_param *param; + + output_buf = textbuf + *length; + + if (iscsi_enforce_integrity_rules(phase, param_list) < 0) + return -1; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!(param->sender & sender)) + continue; + if (IS_PSTATE_ACCEPTOR(param) && + !IS_PSTATE_RESPONSE_SENT(param) && + !IS_PSTATE_REPLY_OPTIONAL(param) && + (param->phase & phase)) { + *length += sprintf(output_buf, "%s=%s", + param->name, param->value); + *length += 1; + output_buf = textbuf + *length; + SET_PSTATE_RESPONSE_SENT(param); + pr_debug("Sending key: %s=%s\n", + param->name, param->value); + continue; + } + if (IS_PSTATE_NEGOTIATE(param) && + !IS_PSTATE_ACCEPTOR(param) && + !IS_PSTATE_PROPOSER(param) && + (param->phase & phase)) { + *length += sprintf(output_buf, "%s=%s", + param->name, param->value); + *length += 1; + output_buf = textbuf + *length; + SET_PSTATE_PROPOSER(param); + iscsi_check_proposer_for_optional_reply(param); + pr_debug("Sending key: %s=%s\n", + param->name, param->value); + } + } + + list_for_each_entry(er, ¶m_list->extra_response_list, er_list) { + *length += sprintf(output_buf, "%s=%s", er->key, er->value); + *length += 1; + output_buf = textbuf + *length; + pr_debug("Sending key: %s=%s\n", er->key, er->value); + } + iscsi_release_extra_responses(param_list); + + return 0; +} + +int iscsi_check_negotiated_keys(struct iscsi_param_list *param_list) +{ + int ret = 0; + struct iscsi_param *param; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (IS_PSTATE_NEGOTIATE(param) && + IS_PSTATE_PROPOSER(param) && + !IS_PSTATE_RESPONSE_GOT(param) && + !IS_PSTATE_REPLY_OPTIONAL(param) && + !IS_PHASE_DECLARATIVE(param)) { + pr_err("No response for proposed key \"%s\".\n", + param->name); + ret = -1; + } + } + + return ret; +} + +int iscsi_change_param_value( + char *keyvalue, + struct iscsi_param_list *param_list, + int check_key) +{ + char *key = NULL, *value = NULL; + struct iscsi_param *param; + int sender = 0; + + if (iscsi_extract_key_value(keyvalue, &key, &value) < 0) + return -1; + + if (!check_key) { + param = __iscsi_check_key(keyvalue, sender, param_list); + if (!param) + return -1; + } else { + param = iscsi_check_key(keyvalue, 0, sender, param_list); + if (!param) + return -1; + + param->set_param = 1; + if (iscsi_check_value(param, value) < 0) { + param->set_param = 0; + return -1; + } + param->set_param = 0; + } + + if (iscsi_update_param_value(param, value) < 0) + return -1; + + return 0; +} + +void iscsi_set_connection_parameters( + struct iscsi_conn_ops *ops, + struct iscsi_param_list *param_list) +{ + char *tmpptr; + struct iscsi_param *param; + + pr_debug("---------------------------------------------------" + "---------------\n"); + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param)) + continue; + if (!strcmp(param->name, AUTHMETHOD)) { + pr_debug("AuthMethod: %s\n", + param->value); + } else if (!strcmp(param->name, HEADERDIGEST)) { + ops->HeaderDigest = !strcmp(param->value, CRC32C); + pr_debug("HeaderDigest: %s\n", + param->value); + } else if (!strcmp(param->name, DATADIGEST)) { + ops->DataDigest = !strcmp(param->value, CRC32C); + pr_debug("DataDigest: %s\n", + param->value); + } else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) { + ops->MaxRecvDataSegmentLength = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("MaxRecvDataSegmentLength: %s\n", + param->value); + } else if (!strcmp(param->name, OFMARKER)) { + ops->OFMarker = !strcmp(param->value, YES); + pr_debug("OFMarker: %s\n", + param->value); + } else if (!strcmp(param->name, IFMARKER)) { + ops->IFMarker = !strcmp(param->value, YES); + pr_debug("IFMarker: %s\n", + param->value); + } else if (!strcmp(param->name, OFMARKINT)) { + ops->OFMarkInt = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("OFMarkInt: %s\n", + param->value); + } else if (!strcmp(param->name, IFMARKINT)) { + ops->IFMarkInt = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("IFMarkInt: %s\n", + param->value); + } + } + pr_debug("----------------------------------------------------" + "--------------\n"); +} + +void iscsi_set_session_parameters( + struct iscsi_sess_ops *ops, + struct iscsi_param_list *param_list, + int leading) +{ + char *tmpptr; + struct iscsi_param *param; + + pr_debug("----------------------------------------------------" + "--------------\n"); + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param)) + continue; + if (!strcmp(param->name, INITIATORNAME)) { + if (!param->value) + continue; + if (leading) + snprintf(ops->InitiatorName, + sizeof(ops->InitiatorName), + "%s", param->value); + pr_debug("InitiatorName: %s\n", + param->value); + } else if (!strcmp(param->name, INITIATORALIAS)) { + if (!param->value) + continue; + snprintf(ops->InitiatorAlias, + sizeof(ops->InitiatorAlias), + "%s", param->value); + pr_debug("InitiatorAlias: %s\n", + param->value); + } else if (!strcmp(param->name, TARGETNAME)) { + if (!param->value) + continue; + if (leading) + snprintf(ops->TargetName, + sizeof(ops->TargetName), + "%s", param->value); + pr_debug("TargetName: %s\n", + param->value); + } else if (!strcmp(param->name, TARGETALIAS)) { + if (!param->value) + continue; + snprintf(ops->TargetAlias, sizeof(ops->TargetAlias), + "%s", param->value); + pr_debug("TargetAlias: %s\n", + param->value); + } else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) { + ops->TargetPortalGroupTag = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("TargetPortalGroupTag: %s\n", + param->value); + } else if (!strcmp(param->name, MAXCONNECTIONS)) { + ops->MaxConnections = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("MaxConnections: %s\n", + param->value); + } else if (!strcmp(param->name, INITIALR2T)) { + ops->InitialR2T = !strcmp(param->value, YES); + pr_debug("InitialR2T: %s\n", + param->value); + } else if (!strcmp(param->name, IMMEDIATEDATA)) { + ops->ImmediateData = !strcmp(param->value, YES); + pr_debug("ImmediateData: %s\n", + param->value); + } else if (!strcmp(param->name, MAXBURSTLENGTH)) { + ops->MaxBurstLength = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("MaxBurstLength: %s\n", + param->value); + } else if (!strcmp(param->name, FIRSTBURSTLENGTH)) { + ops->FirstBurstLength = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("FirstBurstLength: %s\n", + param->value); + } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) { + ops->DefaultTime2Wait = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("DefaultTime2Wait: %s\n", + param->value); + } else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) { + ops->DefaultTime2Retain = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("DefaultTime2Retain: %s\n", + param->value); + } else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) { + ops->MaxOutstandingR2T = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("MaxOutstandingR2T: %s\n", + param->value); + } else if (!strcmp(param->name, DATAPDUINORDER)) { + ops->DataPDUInOrder = !strcmp(param->value, YES); + pr_debug("DataPDUInOrder: %s\n", + param->value); + } else if (!strcmp(param->name, DATASEQUENCEINORDER)) { + ops->DataSequenceInOrder = !strcmp(param->value, YES); + pr_debug("DataSequenceInOrder: %s\n", + param->value); + } else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) { + ops->ErrorRecoveryLevel = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("ErrorRecoveryLevel: %s\n", + param->value); + } else if (!strcmp(param->name, SESSIONTYPE)) { + ops->SessionType = !strcmp(param->value, DISCOVERY); + pr_debug("SessionType: %s\n", + param->value); + } + } + pr_debug("----------------------------------------------------" + "--------------\n"); + +} diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h new file mode 100644 index 000000000000..6a37fd6f1285 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_parameters.h @@ -0,0 +1,269 @@ +#ifndef ISCSI_PARAMETERS_H +#define ISCSI_PARAMETERS_H + +struct iscsi_extra_response { + char key[64]; + char value[32]; + struct list_head er_list; +} ____cacheline_aligned; + +struct iscsi_param { + char *name; + char *value; + u8 set_param; + u8 phase; + u8 scope; + u8 sender; + u8 type; + u8 use; + u16 type_range; + u32 state; + struct list_head p_list; +} ____cacheline_aligned; + +extern int iscsi_login_rx_data(struct iscsi_conn *, char *, int); +extern int iscsi_login_tx_data(struct iscsi_conn *, char *, char *, int); +extern void iscsi_dump_conn_ops(struct iscsi_conn_ops *); +extern void iscsi_dump_sess_ops(struct iscsi_sess_ops *); +extern void iscsi_print_params(struct iscsi_param_list *); +extern int iscsi_create_default_params(struct iscsi_param_list **); +extern int iscsi_set_keys_to_negotiate(int, struct iscsi_param_list *); +extern int iscsi_set_keys_irrelevant_for_discovery(struct iscsi_param_list *); +extern int iscsi_copy_param_list(struct iscsi_param_list **, + struct iscsi_param_list *, int); +extern int iscsi_change_param_value(char *, struct iscsi_param_list *, int); +extern void iscsi_release_param_list(struct iscsi_param_list *); +extern struct iscsi_param *iscsi_find_param_from_key(char *, struct iscsi_param_list *); +extern int iscsi_extract_key_value(char *, char **, char **); +extern int iscsi_update_param_value(struct iscsi_param *, char *); +extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsi_param_list *); +extern int iscsi_encode_text_output(u8, u8, char *, u32 *, + struct iscsi_param_list *); +extern int iscsi_check_negotiated_keys(struct iscsi_param_list *); +extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *, + struct iscsi_param_list *); +extern void iscsi_set_session_parameters(struct iscsi_sess_ops *, + struct iscsi_param_list *, int); + +#define YES "Yes" +#define NO "No" +#define ALL "All" +#define IRRELEVANT "Irrelevant" +#define NONE "None" +#define NOTUNDERSTOOD "NotUnderstood" +#define REJECT "Reject" + +/* + * The Parameter Names. + */ +#define AUTHMETHOD "AuthMethod" +#define HEADERDIGEST "HeaderDigest" +#define DATADIGEST "DataDigest" +#define MAXCONNECTIONS "MaxConnections" +#define SENDTARGETS "SendTargets" +#define TARGETNAME "TargetName" +#define INITIATORNAME "InitiatorName" +#define TARGETALIAS "TargetAlias" +#define INITIATORALIAS "InitiatorAlias" +#define TARGETADDRESS "TargetAddress" +#define TARGETPORTALGROUPTAG "TargetPortalGroupTag" +#define INITIALR2T "InitialR2T" +#define IMMEDIATEDATA "ImmediateData" +#define MAXRECVDATASEGMENTLENGTH "MaxRecvDataSegmentLength" +#define MAXBURSTLENGTH "MaxBurstLength" +#define FIRSTBURSTLENGTH "FirstBurstLength" +#define DEFAULTTIME2WAIT "DefaultTime2Wait" +#define DEFAULTTIME2RETAIN "DefaultTime2Retain" +#define MAXOUTSTANDINGR2T "MaxOutstandingR2T" +#define DATAPDUINORDER "DataPDUInOrder" +#define DATASEQUENCEINORDER "DataSequenceInOrder" +#define ERRORRECOVERYLEVEL "ErrorRecoveryLevel" +#define SESSIONTYPE "SessionType" +#define IFMARKER "IFMarker" +#define OFMARKER "OFMarker" +#define IFMARKINT "IFMarkInt" +#define OFMARKINT "OFMarkInt" +#define X_EXTENSIONKEY "X-com.sbei.version" +#define X_EXTENSIONKEY_CISCO_NEW "X-com.cisco.protocol" +#define X_EXTENSIONKEY_CISCO_OLD "X-com.cisco.iscsi.draft" + +/* + * For AuthMethod. + */ +#define KRB5 "KRB5" +#define SPKM1 "SPKM1" +#define SPKM2 "SPKM2" +#define SRP "SRP" +#define CHAP "CHAP" + +/* + * Initial values for Parameter Negotiation. + */ +#define INITIAL_AUTHMETHOD CHAP +#define INITIAL_HEADERDIGEST "CRC32C,None" +#define INITIAL_DATADIGEST "CRC32C,None" +#define INITIAL_MAXCONNECTIONS "1" +#define INITIAL_SENDTARGETS ALL +#define INITIAL_TARGETNAME "LIO.Target" +#define INITIAL_INITIATORNAME "LIO.Initiator" +#define INITIAL_TARGETALIAS "LIO Target" +#define INITIAL_INITIATORALIAS "LIO Initiator" +#define INITIAL_TARGETADDRESS "0.0.0.0:0000,0" +#define INITIAL_TARGETPORTALGROUPTAG "1" +#define INITIAL_INITIALR2T YES +#define INITIAL_IMMEDIATEDATA YES +#define INITIAL_MAXRECVDATASEGMENTLENGTH "8192" +#define INITIAL_MAXBURSTLENGTH "262144" +#define INITIAL_FIRSTBURSTLENGTH "65536" +#define INITIAL_DEFAULTTIME2WAIT "2" +#define INITIAL_DEFAULTTIME2RETAIN "20" +#define INITIAL_MAXOUTSTANDINGR2T "1" +#define INITIAL_DATAPDUINORDER YES +#define INITIAL_DATASEQUENCEINORDER YES +#define INITIAL_ERRORRECOVERYLEVEL "0" +#define INITIAL_SESSIONTYPE NORMAL +#define INITIAL_IFMARKER NO +#define INITIAL_OFMARKER NO +#define INITIAL_IFMARKINT "2048~65535" +#define INITIAL_OFMARKINT "2048~65535" + +/* + * For [Header,Data]Digests. + */ +#define CRC32C "CRC32C" + +/* + * For SessionType. + */ +#define DISCOVERY "Discovery" +#define NORMAL "Normal" + +/* + * struct iscsi_param->use + */ +#define USE_LEADING_ONLY 0x01 +#define USE_INITIAL_ONLY 0x02 +#define USE_ALL 0x04 + +#define IS_USE_LEADING_ONLY(p) ((p)->use & USE_LEADING_ONLY) +#define IS_USE_INITIAL_ONLY(p) ((p)->use & USE_INITIAL_ONLY) +#define IS_USE_ALL(p) ((p)->use & USE_ALL) + +#define SET_USE_INITIAL_ONLY(p) ((p)->use |= USE_INITIAL_ONLY) + +/* + * struct iscsi_param->sender + */ +#define SENDER_INITIATOR 0x01 +#define SENDER_TARGET 0x02 +#define SENDER_BOTH 0x03 +/* Used in iscsi_check_key() */ +#define SENDER_RECEIVER 0x04 + +#define IS_SENDER_INITIATOR(p) ((p)->sender & SENDER_INITIATOR) +#define IS_SENDER_TARGET(p) ((p)->sender & SENDER_TARGET) +#define IS_SENDER_BOTH(p) ((p)->sender & SENDER_BOTH) + +/* + * struct iscsi_param->scope + */ +#define SCOPE_CONNECTION_ONLY 0x01 +#define SCOPE_SESSION_WIDE 0x02 + +#define IS_SCOPE_CONNECTION_ONLY(p) ((p)->scope & SCOPE_CONNECTION_ONLY) +#define IS_SCOPE_SESSION_WIDE(p) ((p)->scope & SCOPE_SESSION_WIDE) + +/* + * struct iscsi_param->phase + */ +#define PHASE_SECURITY 0x01 +#define PHASE_OPERATIONAL 0x02 +#define PHASE_DECLARATIVE 0x04 +#define PHASE_FFP0 0x08 + +#define IS_PHASE_SECURITY(p) ((p)->phase & PHASE_SECURITY) +#define IS_PHASE_OPERATIONAL(p) ((p)->phase & PHASE_OPERATIONAL) +#define IS_PHASE_DECLARATIVE(p) ((p)->phase & PHASE_DECLARATIVE) +#define IS_PHASE_FFP0(p) ((p)->phase & PHASE_FFP0) + +/* + * struct iscsi_param->type + */ +#define TYPE_BOOL_AND 0x01 +#define TYPE_BOOL_OR 0x02 +#define TYPE_NUMBER 0x04 +#define TYPE_NUMBER_RANGE 0x08 +#define TYPE_STRING 0x10 +#define TYPE_VALUE_LIST 0x20 + +#define IS_TYPE_BOOL_AND(p) ((p)->type & TYPE_BOOL_AND) +#define IS_TYPE_BOOL_OR(p) ((p)->type & TYPE_BOOL_OR) +#define IS_TYPE_NUMBER(p) ((p)->type & TYPE_NUMBER) +#define IS_TYPE_NUMBER_RANGE(p) ((p)->type & TYPE_NUMBER_RANGE) +#define IS_TYPE_STRING(p) ((p)->type & TYPE_STRING) +#define IS_TYPE_VALUE_LIST(p) ((p)->type & TYPE_VALUE_LIST) + +/* + * struct iscsi_param->type_range + */ +#define TYPERANGE_BOOL_AND 0x0001 +#define TYPERANGE_BOOL_OR 0x0002 +#define TYPERANGE_0_TO_2 0x0004 +#define TYPERANGE_0_TO_3600 0x0008 +#define TYPERANGE_0_TO_32767 0x0010 +#define TYPERANGE_0_TO_65535 0x0020 +#define TYPERANGE_1_TO_65535 0x0040 +#define TYPERANGE_2_TO_3600 0x0080 +#define TYPERANGE_512_TO_16777215 0x0100 +#define TYPERANGE_AUTH 0x0200 +#define TYPERANGE_DIGEST 0x0400 +#define TYPERANGE_ISCSINAME 0x0800 +#define TYPERANGE_MARKINT 0x1000 +#define TYPERANGE_SESSIONTYPE 0x2000 +#define TYPERANGE_TARGETADDRESS 0x4000 +#define TYPERANGE_UTF8 0x8000 + +#define IS_TYPERANGE_0_TO_2(p) ((p)->type_range & TYPERANGE_0_TO_2) +#define IS_TYPERANGE_0_TO_3600(p) ((p)->type_range & TYPERANGE_0_TO_3600) +#define IS_TYPERANGE_0_TO_32767(p) ((p)->type_range & TYPERANGE_0_TO_32767) +#define IS_TYPERANGE_0_TO_65535(p) ((p)->type_range & TYPERANGE_0_TO_65535) +#define IS_TYPERANGE_1_TO_65535(p) ((p)->type_range & TYPERANGE_1_TO_65535) +#define IS_TYPERANGE_2_TO_3600(p) ((p)->type_range & TYPERANGE_2_TO_3600) +#define IS_TYPERANGE_512_TO_16777215(p) ((p)->type_range & \ + TYPERANGE_512_TO_16777215) +#define IS_TYPERANGE_AUTH_PARAM(p) ((p)->type_range & TYPERANGE_AUTH) +#define IS_TYPERANGE_DIGEST_PARAM(p) ((p)->type_range & TYPERANGE_DIGEST) +#define IS_TYPERANGE_SESSIONTYPE(p) ((p)->type_range & \ + TYPERANGE_SESSIONTYPE) + +/* + * struct iscsi_param->state + */ +#define PSTATE_ACCEPTOR 0x01 +#define PSTATE_NEGOTIATE 0x02 +#define PSTATE_PROPOSER 0x04 +#define PSTATE_IRRELEVANT 0x08 +#define PSTATE_REJECT 0x10 +#define PSTATE_REPLY_OPTIONAL 0x20 +#define PSTATE_RESPONSE_GOT 0x40 +#define PSTATE_RESPONSE_SENT 0x80 + +#define IS_PSTATE_ACCEPTOR(p) ((p)->state & PSTATE_ACCEPTOR) +#define IS_PSTATE_NEGOTIATE(p) ((p)->state & PSTATE_NEGOTIATE) +#define IS_PSTATE_PROPOSER(p) ((p)->state & PSTATE_PROPOSER) +#define IS_PSTATE_IRRELEVANT(p) ((p)->state & PSTATE_IRRELEVANT) +#define IS_PSTATE_REJECT(p) ((p)->state & PSTATE_REJECT) +#define IS_PSTATE_REPLY_OPTIONAL(p) ((p)->state & PSTATE_REPLY_OPTIONAL) +#define IS_PSTATE_RESPONSE_GOT(p) ((p)->state & PSTATE_RESPONSE_GOT) +#define IS_PSTATE_RESPONSE_SENT(p) ((p)->state & PSTATE_RESPONSE_SENT) + +#define SET_PSTATE_ACCEPTOR(p) ((p)->state |= PSTATE_ACCEPTOR) +#define SET_PSTATE_NEGOTIATE(p) ((p)->state |= PSTATE_NEGOTIATE) +#define SET_PSTATE_PROPOSER(p) ((p)->state |= PSTATE_PROPOSER) +#define SET_PSTATE_IRRELEVANT(p) ((p)->state |= PSTATE_IRRELEVANT) +#define SET_PSTATE_REJECT(p) ((p)->state |= PSTATE_REJECT) +#define SET_PSTATE_REPLY_OPTIONAL(p) ((p)->state |= PSTATE_REPLY_OPTIONAL) +#define SET_PSTATE_RESPONSE_GOT(p) ((p)->state |= PSTATE_RESPONSE_GOT) +#define SET_PSTATE_RESPONSE_SENT(p) ((p)->state |= PSTATE_RESPONSE_SENT) + +#endif /* ISCSI_PARAMETERS_H */ diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c new file mode 100644 index 000000000000..fc694082bfc0 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c @@ -0,0 +1,664 @@ +/******************************************************************************* + * This file contains main functions related to iSCSI DataSequenceInOrder=No + * and DataPDUInOrder=No. + * + \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/slab.h> +#include <linux/random.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_util.h" +#include "iscsi_target_seq_pdu_list.h" + +#define OFFLOAD_BUF_SIZE 32768 + +void iscsit_dump_seq_list(struct iscsi_cmd *cmd) +{ + int i; + struct iscsi_seq *seq; + + pr_debug("Dumping Sequence List for ITT: 0x%08x:\n", + cmd->init_task_tag); + + for (i = 0; i < cmd->seq_count; i++) { + seq = &cmd->seq_list[i]; + pr_debug("i: %d, pdu_start: %d, pdu_count: %d," + " offset: %d, xfer_len: %d, seq_send_order: %d," + " seq_no: %d\n", i, seq->pdu_start, seq->pdu_count, + seq->offset, seq->xfer_len, seq->seq_send_order, + seq->seq_no); + } +} + +void iscsit_dump_pdu_list(struct iscsi_cmd *cmd) +{ + int i; + struct iscsi_pdu *pdu; + + pr_debug("Dumping PDU List for ITT: 0x%08x:\n", + cmd->init_task_tag); + + for (i = 0; i < cmd->pdu_count; i++) { + pdu = &cmd->pdu_list[i]; + pr_debug("i: %d, offset: %d, length: %d," + " pdu_send_order: %d, seq_no: %d\n", i, pdu->offset, + pdu->length, pdu->pdu_send_order, pdu->seq_no); + } +} + +static void iscsit_ordered_seq_lists( + struct iscsi_cmd *cmd, + u8 type) +{ + u32 i, seq_count = 0; + + for (i = 0; i < cmd->seq_count; i++) { + if (cmd->seq_list[i].type != SEQTYPE_NORMAL) + continue; + cmd->seq_list[i].seq_send_order = seq_count++; + } +} + +static void iscsit_ordered_pdu_lists( + struct iscsi_cmd *cmd, + u8 type) +{ + u32 i, pdu_send_order = 0, seq_no = 0; + + for (i = 0; i < cmd->pdu_count; i++) { +redo: + if (cmd->pdu_list[i].seq_no == seq_no) { + cmd->pdu_list[i].pdu_send_order = pdu_send_order++; + continue; + } + seq_no++; + pdu_send_order = 0; + goto redo; + } +} + +/* + * Generate count random values into array. + * Use 0x80000000 to mark generates valued in array[]. + */ +static void iscsit_create_random_array(u32 *array, u32 count) +{ + int i, j, k; + + if (count == 1) { + array[0] = 0; + return; + } + + for (i = 0; i < count; i++) { +redo: + get_random_bytes(&j, sizeof(u32)); + j = (1 + (int) (9999 + 1) - j) % count; + for (k = 0; k < i + 1; k++) { + j |= 0x80000000; + if ((array[k] & 0x80000000) && (array[k] == j)) + goto redo; + } + array[i] = j; + } + + for (i = 0; i < count; i++) + array[i] &= ~0x80000000; +} + +static int iscsit_randomize_pdu_lists( + struct iscsi_cmd *cmd, + u8 type) +{ + int i = 0; + u32 *array, pdu_count, seq_count = 0, seq_no = 0, seq_offset = 0; + + for (pdu_count = 0; pdu_count < cmd->pdu_count; pdu_count++) { +redo: + if (cmd->pdu_list[pdu_count].seq_no == seq_no) { + seq_count++; + continue; + } + array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL); + if (!array) { + pr_err("Unable to allocate memory" + " for random array.\n"); + return -1; + } + iscsit_create_random_array(array, seq_count); + + for (i = 0; i < seq_count; i++) + cmd->pdu_list[seq_offset+i].pdu_send_order = array[i]; + + kfree(array); + + seq_offset += seq_count; + seq_count = 0; + seq_no++; + goto redo; + } + + if (seq_count) { + array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL); + if (!array) { + pr_err("Unable to allocate memory for" + " random array.\n"); + return -1; + } + iscsit_create_random_array(array, seq_count); + + for (i = 0; i < seq_count; i++) + cmd->pdu_list[seq_offset+i].pdu_send_order = array[i]; + + kfree(array); + } + + return 0; +} + +static int iscsit_randomize_seq_lists( + struct iscsi_cmd *cmd, + u8 type) +{ + int i, j = 0; + u32 *array, seq_count = cmd->seq_count; + + if ((type == PDULIST_IMMEDIATE) || (type == PDULIST_UNSOLICITED)) + seq_count--; + else if (type == PDULIST_IMMEDIATE_AND_UNSOLICITED) + seq_count -= 2; + + if (!seq_count) + return 0; + + array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL); + if (!array) { + pr_err("Unable to allocate memory for random array.\n"); + return -1; + } + iscsit_create_random_array(array, seq_count); + + for (i = 0; i < cmd->seq_count; i++) { + if (cmd->seq_list[i].type != SEQTYPE_NORMAL) + continue; + cmd->seq_list[i].seq_send_order = array[j++]; + } + + kfree(array); + return 0; +} + +static void iscsit_determine_counts_for_list( + struct iscsi_cmd *cmd, + struct iscsi_build_list *bl, + u32 *seq_count, + u32 *pdu_count) +{ + int check_immediate = 0; + u32 burstlength = 0, offset = 0; + u32 unsolicited_data_length = 0; + struct iscsi_conn *conn = cmd->conn; + + if ((bl->type == PDULIST_IMMEDIATE) || + (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED)) + check_immediate = 1; + + if ((bl->type == PDULIST_UNSOLICITED) || + (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED)) + unsolicited_data_length = (cmd->data_length > + conn->sess->sess_ops->FirstBurstLength) ? + conn->sess->sess_ops->FirstBurstLength : cmd->data_length; + + while (offset < cmd->data_length) { + *pdu_count += 1; + + if (check_immediate) { + check_immediate = 0; + offset += bl->immediate_data_length; + *seq_count += 1; + if (unsolicited_data_length) + unsolicited_data_length -= + bl->immediate_data_length; + continue; + } + if (unsolicited_data_length > 0) { + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) + >= cmd->data_length) { + unsolicited_data_length -= + (cmd->data_length - offset); + offset += (cmd->data_length - offset); + continue; + } + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) + >= conn->sess->sess_ops->FirstBurstLength) { + unsolicited_data_length -= + (conn->sess->sess_ops->FirstBurstLength - + offset); + offset += (conn->sess->sess_ops->FirstBurstLength - + offset); + burstlength = 0; + *seq_count += 1; + continue; + } + + offset += conn->conn_ops->MaxRecvDataSegmentLength; + unsolicited_data_length -= + conn->conn_ops->MaxRecvDataSegmentLength; + continue; + } + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >= + cmd->data_length) { + offset += (cmd->data_length - offset); + continue; + } + if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >= + conn->sess->sess_ops->MaxBurstLength) { + offset += (conn->sess->sess_ops->MaxBurstLength - + burstlength); + burstlength = 0; + *seq_count += 1; + continue; + } + + burstlength += conn->conn_ops->MaxRecvDataSegmentLength; + offset += conn->conn_ops->MaxRecvDataSegmentLength; + } +} + + +/* + * Builds PDU and/or Sequence list, called while DataSequenceInOrder=No + * and DataPDUInOrder=No. + */ +static int iscsit_build_pdu_and_seq_list( + struct iscsi_cmd *cmd, + struct iscsi_build_list *bl) +{ + int check_immediate = 0, datapduinorder, datasequenceinorder; + u32 burstlength = 0, offset = 0, i = 0; + u32 pdu_count = 0, seq_no = 0, unsolicited_data_length = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu = cmd->pdu_list; + struct iscsi_seq *seq = cmd->seq_list; + + datapduinorder = conn->sess->sess_ops->DataPDUInOrder; + datasequenceinorder = conn->sess->sess_ops->DataSequenceInOrder; + + if ((bl->type == PDULIST_IMMEDIATE) || + (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED)) + check_immediate = 1; + + if ((bl->type == PDULIST_UNSOLICITED) || + (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED)) + unsolicited_data_length = (cmd->data_length > + conn->sess->sess_ops->FirstBurstLength) ? + conn->sess->sess_ops->FirstBurstLength : cmd->data_length; + + while (offset < cmd->data_length) { + pdu_count++; + if (!datapduinorder) { + pdu[i].offset = offset; + pdu[i].seq_no = seq_no; + } + if (!datasequenceinorder && (pdu_count == 1)) { + seq[seq_no].pdu_start = i; + seq[seq_no].seq_no = seq_no; + seq[seq_no].offset = offset; + seq[seq_no].orig_offset = offset; + } + + if (check_immediate) { + check_immediate = 0; + if (!datapduinorder) { + pdu[i].type = PDUTYPE_IMMEDIATE; + pdu[i++].length = bl->immediate_data_length; + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_IMMEDIATE; + seq[seq_no].pdu_count = 1; + seq[seq_no].xfer_len = + bl->immediate_data_length; + } + offset += bl->immediate_data_length; + pdu_count = 0; + seq_no++; + if (unsolicited_data_length) + unsolicited_data_length -= + bl->immediate_data_length; + continue; + } + if (unsolicited_data_length > 0) { + if ((offset + + conn->conn_ops->MaxRecvDataSegmentLength) >= + cmd->data_length) { + if (!datapduinorder) { + pdu[i].type = PDUTYPE_UNSOLICITED; + pdu[i].length = + (cmd->data_length - offset); + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_UNSOLICITED; + seq[seq_no].pdu_count = pdu_count; + seq[seq_no].xfer_len = (burstlength + + (cmd->data_length - offset)); + } + unsolicited_data_length -= + (cmd->data_length - offset); + offset += (cmd->data_length - offset); + continue; + } + if ((offset + + conn->conn_ops->MaxRecvDataSegmentLength) >= + conn->sess->sess_ops->FirstBurstLength) { + if (!datapduinorder) { + pdu[i].type = PDUTYPE_UNSOLICITED; + pdu[i++].length = + (conn->sess->sess_ops->FirstBurstLength - + offset); + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_UNSOLICITED; + seq[seq_no].pdu_count = pdu_count; + seq[seq_no].xfer_len = (burstlength + + (conn->sess->sess_ops->FirstBurstLength - + offset)); + } + unsolicited_data_length -= + (conn->sess->sess_ops->FirstBurstLength - + offset); + offset += (conn->sess->sess_ops->FirstBurstLength - + offset); + burstlength = 0; + pdu_count = 0; + seq_no++; + continue; + } + + if (!datapduinorder) { + pdu[i].type = PDUTYPE_UNSOLICITED; + pdu[i++].length = + conn->conn_ops->MaxRecvDataSegmentLength; + } + burstlength += conn->conn_ops->MaxRecvDataSegmentLength; + offset += conn->conn_ops->MaxRecvDataSegmentLength; + unsolicited_data_length -= + conn->conn_ops->MaxRecvDataSegmentLength; + continue; + } + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >= + cmd->data_length) { + if (!datapduinorder) { + pdu[i].type = PDUTYPE_NORMAL; + pdu[i].length = (cmd->data_length - offset); + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_NORMAL; + seq[seq_no].pdu_count = pdu_count; + seq[seq_no].xfer_len = (burstlength + + (cmd->data_length - offset)); + } + offset += (cmd->data_length - offset); + continue; + } + if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >= + conn->sess->sess_ops->MaxBurstLength) { + if (!datapduinorder) { + pdu[i].type = PDUTYPE_NORMAL; + pdu[i++].length = + (conn->sess->sess_ops->MaxBurstLength - + burstlength); + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_NORMAL; + seq[seq_no].pdu_count = pdu_count; + seq[seq_no].xfer_len = (burstlength + + (conn->sess->sess_ops->MaxBurstLength - + burstlength)); + } + offset += (conn->sess->sess_ops->MaxBurstLength - + burstlength); + burstlength = 0; + pdu_count = 0; + seq_no++; + continue; + } + + if (!datapduinorder) { + pdu[i].type = PDUTYPE_NORMAL; + pdu[i++].length = + conn->conn_ops->MaxRecvDataSegmentLength; + } + burstlength += conn->conn_ops->MaxRecvDataSegmentLength; + offset += conn->conn_ops->MaxRecvDataSegmentLength; + } + + if (!datasequenceinorder) { + if (bl->data_direction & ISCSI_PDU_WRITE) { + if (bl->randomize & RANDOM_R2T_OFFSETS) { + if (iscsit_randomize_seq_lists(cmd, bl->type) + < 0) + return -1; + } else + iscsit_ordered_seq_lists(cmd, bl->type); + } else if (bl->data_direction & ISCSI_PDU_READ) { + if (bl->randomize & RANDOM_DATAIN_SEQ_OFFSETS) { + if (iscsit_randomize_seq_lists(cmd, bl->type) + < 0) + return -1; + } else + iscsit_ordered_seq_lists(cmd, bl->type); + } +#if 0 + iscsit_dump_seq_list(cmd); +#endif + } + if (!datapduinorder) { + if (bl->data_direction & ISCSI_PDU_WRITE) { + if (bl->randomize & RANDOM_DATAOUT_PDU_OFFSETS) { + if (iscsit_randomize_pdu_lists(cmd, bl->type) + < 0) + return -1; + } else + iscsit_ordered_pdu_lists(cmd, bl->type); + } else if (bl->data_direction & ISCSI_PDU_READ) { + if (bl->randomize & RANDOM_DATAIN_PDU_OFFSETS) { + if (iscsit_randomize_pdu_lists(cmd, bl->type) + < 0) + return -1; + } else + iscsit_ordered_pdu_lists(cmd, bl->type); + } +#if 0 + iscsit_dump_pdu_list(cmd); +#endif + } + + return 0; +} + +/* + * Only called while DataSequenceInOrder=No or DataPDUInOrder=No. + */ +int iscsit_do_build_list( + struct iscsi_cmd *cmd, + struct iscsi_build_list *bl) +{ + u32 pdu_count = 0, seq_count = 1; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu = NULL; + struct iscsi_seq *seq = NULL; + + iscsit_determine_counts_for_list(cmd, bl, &seq_count, &pdu_count); + + if (!conn->sess->sess_ops->DataSequenceInOrder) { + seq = kzalloc(seq_count * sizeof(struct iscsi_seq), GFP_ATOMIC); + if (!seq) { + pr_err("Unable to allocate struct iscsi_seq list\n"); + return -1; + } + cmd->seq_list = seq; + cmd->seq_count = seq_count; + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + pdu = kzalloc(pdu_count * sizeof(struct iscsi_pdu), GFP_ATOMIC); + if (!pdu) { + pr_err("Unable to allocate struct iscsi_pdu list.\n"); + kfree(seq); + return -1; + } + cmd->pdu_list = pdu; + cmd->pdu_count = pdu_count; + } + + return iscsit_build_pdu_and_seq_list(cmd, bl); +} + +struct iscsi_pdu *iscsit_get_pdu_holder( + struct iscsi_cmd *cmd, + u32 offset, + u32 length) +{ + u32 i; + struct iscsi_pdu *pdu = NULL; + + if (!cmd->pdu_list) { + pr_err("struct iscsi_cmd->pdu_list is NULL!\n"); + return NULL; + } + + pdu = &cmd->pdu_list[0]; + + for (i = 0; i < cmd->pdu_count; i++) + if ((pdu[i].offset == offset) && (pdu[i].length == length)) + return &pdu[i]; + + pr_err("Unable to locate PDU holder for ITT: 0x%08x, Offset:" + " %u, Length: %u\n", cmd->init_task_tag, offset, length); + return NULL; +} + +struct iscsi_pdu *iscsit_get_pdu_holder_for_seq( + struct iscsi_cmd *cmd, + struct iscsi_seq *seq) +{ + u32 i; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu = NULL; + + if (!cmd->pdu_list) { + pr_err("struct iscsi_cmd->pdu_list is NULL!\n"); + return NULL; + } + + if (conn->sess->sess_ops->DataSequenceInOrder) { +redo: + pdu = &cmd->pdu_list[cmd->pdu_start]; + + for (i = 0; pdu[i].seq_no != cmd->seq_no; i++) { +#if 0 + pr_debug("pdu[i].seq_no: %d, pdu[i].pdu" + "_send_order: %d, pdu[i].offset: %d," + " pdu[i].length: %d\n", pdu[i].seq_no, + pdu[i].pdu_send_order, pdu[i].offset, + pdu[i].length); +#endif + if (pdu[i].pdu_send_order == cmd->pdu_send_order) { + cmd->pdu_send_order++; + return &pdu[i]; + } + } + + cmd->pdu_start += cmd->pdu_send_order; + cmd->pdu_send_order = 0; + cmd->seq_no++; + + if (cmd->pdu_start < cmd->pdu_count) + goto redo; + + pr_err("Command ITT: 0x%08x unable to locate" + " struct iscsi_pdu for cmd->pdu_send_order: %u.\n", + cmd->init_task_tag, cmd->pdu_send_order); + return NULL; + } else { + if (!seq) { + pr_err("struct iscsi_seq is NULL!\n"); + return NULL; + } +#if 0 + pr_debug("seq->pdu_start: %d, seq->pdu_count: %d," + " seq->seq_no: %d\n", seq->pdu_start, seq->pdu_count, + seq->seq_no); +#endif + pdu = &cmd->pdu_list[seq->pdu_start]; + + if (seq->pdu_send_order == seq->pdu_count) { + pr_err("Command ITT: 0x%08x seq->pdu_send" + "_order: %u equals seq->pdu_count: %u\n", + cmd->init_task_tag, seq->pdu_send_order, + seq->pdu_count); + return NULL; + } + + for (i = 0; i < seq->pdu_count; i++) { + if (pdu[i].pdu_send_order == seq->pdu_send_order) { + seq->pdu_send_order++; + return &pdu[i]; + } + } + + pr_err("Command ITT: 0x%08x unable to locate iscsi" + "_pdu_t for seq->pdu_send_order: %u.\n", + cmd->init_task_tag, seq->pdu_send_order); + return NULL; + } + + return NULL; +} + +struct iscsi_seq *iscsit_get_seq_holder( + struct iscsi_cmd *cmd, + u32 offset, + u32 length) +{ + u32 i; + + if (!cmd->seq_list) { + pr_err("struct iscsi_cmd->seq_list is NULL!\n"); + return NULL; + } + + for (i = 0; i < cmd->seq_count; i++) { +#if 0 + pr_debug("seq_list[i].orig_offset: %d, seq_list[i]." + "xfer_len: %d, seq_list[i].seq_no %u\n", + cmd->seq_list[i].orig_offset, cmd->seq_list[i].xfer_len, + cmd->seq_list[i].seq_no); +#endif + if ((cmd->seq_list[i].orig_offset + + cmd->seq_list[i].xfer_len) >= + (offset + length)) + return &cmd->seq_list[i]; + } + + pr_err("Unable to locate Sequence holder for ITT: 0x%08x," + " Offset: %u, Length: %u\n", cmd->init_task_tag, offset, + length); + return NULL; +} diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.h b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h new file mode 100644 index 000000000000..0d52a10e3069 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h @@ -0,0 +1,86 @@ +#ifndef ISCSI_SEQ_AND_PDU_LIST_H +#define ISCSI_SEQ_AND_PDU_LIST_H + +/* struct iscsi_pdu->status */ +#define DATAOUT_PDU_SENT 1 + +/* struct iscsi_seq->type */ +#define SEQTYPE_IMMEDIATE 1 +#define SEQTYPE_UNSOLICITED 2 +#define SEQTYPE_NORMAL 3 + +/* struct iscsi_seq->status */ +#define DATAOUT_SEQUENCE_GOT_R2T 1 +#define DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY 2 +#define DATAOUT_SEQUENCE_COMPLETE 3 + +/* iscsi_determine_counts_for_list() type */ +#define PDULIST_NORMAL 1 +#define PDULIST_IMMEDIATE 2 +#define PDULIST_UNSOLICITED 3 +#define PDULIST_IMMEDIATE_AND_UNSOLICITED 4 + +/* struct iscsi_pdu->type */ +#define PDUTYPE_IMMEDIATE 1 +#define PDUTYPE_UNSOLICITED 2 +#define PDUTYPE_NORMAL 3 + +/* struct iscsi_pdu->status */ +#define ISCSI_PDU_NOT_RECEIVED 0 +#define ISCSI_PDU_RECEIVED_OK 1 +#define ISCSI_PDU_CRC_FAILED 2 +#define ISCSI_PDU_TIMED_OUT 3 + +/* struct iscsi_build_list->randomize */ +#define RANDOM_DATAIN_PDU_OFFSETS 0x01 +#define RANDOM_DATAIN_SEQ_OFFSETS 0x02 +#define RANDOM_DATAOUT_PDU_OFFSETS 0x04 +#define RANDOM_R2T_OFFSETS 0x08 + +/* struct iscsi_build_list->data_direction */ +#define ISCSI_PDU_READ 0x01 +#define ISCSI_PDU_WRITE 0x02 + +struct iscsi_build_list { + int data_direction; + int randomize; + int type; + int immediate_data_length; +}; + +struct iscsi_pdu { + int status; + int type; + u8 flags; + u32 data_sn; + u32 length; + u32 offset; + u32 pdu_send_order; + u32 seq_no; +} ____cacheline_aligned; + +struct iscsi_seq { + int sent; + int status; + int type; + u32 data_sn; + u32 first_datasn; + u32 last_datasn; + u32 next_burst_len; + u32 pdu_start; + u32 pdu_count; + u32 offset; + u32 orig_offset; + u32 pdu_send_order; + u32 r2t_sn; + u32 seq_send_order; + u32 seq_no; + u32 xfer_len; +} ____cacheline_aligned; + +extern int iscsit_do_build_list(struct iscsi_cmd *, struct iscsi_build_list *); +extern struct iscsi_pdu *iscsit_get_pdu_holder(struct iscsi_cmd *, u32, u32); +extern struct iscsi_pdu *iscsit_get_pdu_holder_for_seq(struct iscsi_cmd *, struct iscsi_seq *); +extern struct iscsi_seq *iscsit_get_seq_holder(struct iscsi_cmd *, u32, u32); + +#endif /* ISCSI_SEQ_AND_PDU_LIST_H */ diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c new file mode 100644 index 000000000000..bbdbe9301b27 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_stat.c @@ -0,0 +1,950 @@ +/******************************************************************************* + * Modern ConfigFS group context specific iSCSI statistics based on original + * iscsi_target_mib.c code + * + * Copyright (c) 2011 Rising Tide Systems + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/configfs.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> +#include <target/configfs_macros.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_device.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target_stat.h" + +#ifndef INITIAL_JIFFIES +#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) +#endif + +/* Instance Attributes Table */ +#define ISCSI_INST_NUM_NODES 1 +#define ISCSI_INST_DESCR "Storage Engine Target" +#define ISCSI_INST_LAST_FAILURE_TYPE 0 +#define ISCSI_DISCONTINUITY_TIME 0 + +#define ISCSI_NODE_INDEX 1 + +#define ISPRINT(a) ((a >= ' ') && (a <= '~')) + +/**************************************************************************** + * iSCSI MIB Tables + ****************************************************************************/ +/* + * Instance Attributes Table + */ +CONFIGFS_EATTR_STRUCT(iscsi_stat_instance, iscsi_wwn_stat_grps); +#define ISCSI_STAT_INSTANCE_ATTR(_name, _mode) \ +static struct iscsi_stat_instance_attribute \ + iscsi_stat_instance_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_instance_show_attr_##_name, \ + iscsi_stat_instance_store_attr_##_name); + +#define ISCSI_STAT_INSTANCE_ATTR_RO(_name) \ +static struct iscsi_stat_instance_attribute \ + iscsi_stat_instance_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_instance_show_attr_##_name); + +static ssize_t iscsi_stat_instance_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_INSTANCE_ATTR_RO(inst); + +static ssize_t iscsi_stat_instance_show_attr_min_ver( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION); +} +ISCSI_STAT_INSTANCE_ATTR_RO(min_ver); + +static ssize_t iscsi_stat_instance_show_attr_max_ver( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION); +} +ISCSI_STAT_INSTANCE_ATTR_RO(max_ver); + +static ssize_t iscsi_stat_instance_show_attr_portals( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_num_tpg_nps); +} +ISCSI_STAT_INSTANCE_ATTR_RO(portals); + +static ssize_t iscsi_stat_instance_show_attr_nodes( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_INST_NUM_NODES); +} +ISCSI_STAT_INSTANCE_ATTR_RO(nodes); + +static ssize_t iscsi_stat_instance_show_attr_sessions( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_nsessions); +} +ISCSI_STAT_INSTANCE_ATTR_RO(sessions); + +static ssize_t iscsi_stat_instance_show_attr_fail_sess( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + u32 sess_err_count; + + spin_lock_bh(&sess_err->lock); + sess_err_count = (sess_err->digest_errors + + sess_err->cxn_timeout_errors + + sess_err->pdu_format_errors); + spin_unlock_bh(&sess_err->lock); + + return snprintf(page, PAGE_SIZE, "%u\n", sess_err_count); +} +ISCSI_STAT_INSTANCE_ATTR_RO(fail_sess); + +static ssize_t iscsi_stat_instance_show_attr_fail_type( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", + sess_err->last_sess_failure_type); +} +ISCSI_STAT_INSTANCE_ATTR_RO(fail_type); + +static ssize_t iscsi_stat_instance_show_attr_fail_rem_name( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%s\n", + sess_err->last_sess_fail_rem_name[0] ? + sess_err->last_sess_fail_rem_name : NONE); +} +ISCSI_STAT_INSTANCE_ATTR_RO(fail_rem_name); + +static ssize_t iscsi_stat_instance_show_attr_disc_time( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DISCONTINUITY_TIME); +} +ISCSI_STAT_INSTANCE_ATTR_RO(disc_time); + +static ssize_t iscsi_stat_instance_show_attr_description( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%s\n", ISCSI_INST_DESCR); +} +ISCSI_STAT_INSTANCE_ATTR_RO(description); + +static ssize_t iscsi_stat_instance_show_attr_vendor( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "RisingTide Systems iSCSI-Target\n"); +} +ISCSI_STAT_INSTANCE_ATTR_RO(vendor); + +static ssize_t iscsi_stat_instance_show_attr_version( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%s\n", ISCSIT_VERSION); +} +ISCSI_STAT_INSTANCE_ATTR_RO(version); + +CONFIGFS_EATTR_OPS(iscsi_stat_instance, iscsi_wwn_stat_grps, + iscsi_instance_group); + +static struct configfs_attribute *iscsi_stat_instance_attrs[] = { + &iscsi_stat_instance_inst.attr, + &iscsi_stat_instance_min_ver.attr, + &iscsi_stat_instance_max_ver.attr, + &iscsi_stat_instance_portals.attr, + &iscsi_stat_instance_nodes.attr, + &iscsi_stat_instance_sessions.attr, + &iscsi_stat_instance_fail_sess.attr, + &iscsi_stat_instance_fail_type.attr, + &iscsi_stat_instance_fail_rem_name.attr, + &iscsi_stat_instance_disc_time.attr, + &iscsi_stat_instance_description.attr, + &iscsi_stat_instance_vendor.attr, + &iscsi_stat_instance_version.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_instance_item_ops = { + .show_attribute = iscsi_stat_instance_attr_show, + .store_attribute = iscsi_stat_instance_attr_store, +}; + +struct config_item_type iscsi_stat_instance_cit = { + .ct_item_ops = &iscsi_stat_instance_item_ops, + .ct_attrs = iscsi_stat_instance_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Instance Session Failure Stats Table + */ +CONFIGFS_EATTR_STRUCT(iscsi_stat_sess_err, iscsi_wwn_stat_grps); +#define ISCSI_STAT_SESS_ERR_ATTR(_name, _mode) \ +static struct iscsi_stat_sess_err_attribute \ + iscsi_stat_sess_err_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_sess_err_show_attr_##_name, \ + iscsi_stat_sess_err_store_attr_##_name); + +#define ISCSI_STAT_SESS_ERR_ATTR_RO(_name) \ +static struct iscsi_stat_sess_err_attribute \ + iscsi_stat_sess_err_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_sess_err_show_attr_##_name); + +static ssize_t iscsi_stat_sess_err_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_SESS_ERR_ATTR_RO(inst); + +static ssize_t iscsi_stat_sess_err_show_attr_digest_errors( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", sess_err->digest_errors); +} +ISCSI_STAT_SESS_ERR_ATTR_RO(digest_errors); + +static ssize_t iscsi_stat_sess_err_show_attr_cxn_errors( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", sess_err->cxn_timeout_errors); +} +ISCSI_STAT_SESS_ERR_ATTR_RO(cxn_errors); + +static ssize_t iscsi_stat_sess_err_show_attr_format_errors( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", sess_err->pdu_format_errors); +} +ISCSI_STAT_SESS_ERR_ATTR_RO(format_errors); + +CONFIGFS_EATTR_OPS(iscsi_stat_sess_err, iscsi_wwn_stat_grps, + iscsi_sess_err_group); + +static struct configfs_attribute *iscsi_stat_sess_err_attrs[] = { + &iscsi_stat_sess_err_inst.attr, + &iscsi_stat_sess_err_digest_errors.attr, + &iscsi_stat_sess_err_cxn_errors.attr, + &iscsi_stat_sess_err_format_errors.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_sess_err_item_ops = { + .show_attribute = iscsi_stat_sess_err_attr_show, + .store_attribute = iscsi_stat_sess_err_attr_store, +}; + +struct config_item_type iscsi_stat_sess_err_cit = { + .ct_item_ops = &iscsi_stat_sess_err_item_ops, + .ct_attrs = iscsi_stat_sess_err_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Target Attributes Table + */ +CONFIGFS_EATTR_STRUCT(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps); +#define ISCSI_STAT_TGT_ATTR(_name, _mode) \ +static struct iscsi_stat_tgt_attr_attribute \ + iscsi_stat_tgt_attr_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_tgt-attr_show_attr_##_name, \ + iscsi_stat_tgt_attr_store_attr_##_name); + +#define ISCSI_STAT_TGT_ATTR_RO(_name) \ +static struct iscsi_stat_tgt_attr_attribute \ + iscsi_stat_tgt_attr_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_tgt_attr_show_attr_##_name); + +static ssize_t iscsi_stat_tgt_attr_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_TGT_ATTR_RO(inst); + +static ssize_t iscsi_stat_tgt_attr_show_attr_indx( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX); +} +ISCSI_STAT_TGT_ATTR_RO(indx); + +static ssize_t iscsi_stat_tgt_attr_show_attr_login_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + u32 fail_count; + + spin_lock(&lstat->lock); + fail_count = (lstat->redirects + lstat->authorize_fails + + lstat->authenticate_fails + lstat->negotiate_fails + + lstat->other_fails); + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%u\n", fail_count); +} +ISCSI_STAT_TGT_ATTR_RO(login_fails); + +static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_time( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + u32 last_fail_time; + + spin_lock(&lstat->lock); + last_fail_time = lstat->last_fail_time ? + (u32)(((u32)lstat->last_fail_time - + INITIAL_JIFFIES) * 100 / HZ) : 0; + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%u\n", last_fail_time); +} +ISCSI_STAT_TGT_ATTR_RO(last_fail_time); + +static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_type( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + u32 last_fail_type; + + spin_lock(&lstat->lock); + last_fail_type = lstat->last_fail_type; + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%u\n", last_fail_type); +} +ISCSI_STAT_TGT_ATTR_RO(last_fail_type); + +static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_name( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + unsigned char buf[224]; + + spin_lock(&lstat->lock); + snprintf(buf, 224, "%s", lstat->last_intr_fail_name[0] ? + lstat->last_intr_fail_name : NONE); + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%s\n", buf); +} +ISCSI_STAT_TGT_ATTR_RO(fail_intr_name); + +static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr_type( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + unsigned char buf[8]; + + spin_lock(&lstat->lock); + snprintf(buf, 8, "%s", (lstat->last_intr_fail_ip_addr != NULL) ? + "ipv6" : "ipv4"); + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%s\n", buf); +} +ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr_type); + +static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + unsigned char buf[32]; + + spin_lock(&lstat->lock); + if (lstat->last_intr_fail_ip_family == AF_INET6) + snprintf(buf, 32, "[%s]", lstat->last_intr_fail_ip_addr); + else + snprintf(buf, 32, "%s", lstat->last_intr_fail_ip_addr); + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%s\n", buf); +} +ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr); + +CONFIGFS_EATTR_OPS(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps, + iscsi_tgt_attr_group); + +static struct configfs_attribute *iscsi_stat_tgt_attr_attrs[] = { + &iscsi_stat_tgt_attr_inst.attr, + &iscsi_stat_tgt_attr_indx.attr, + &iscsi_stat_tgt_attr_login_fails.attr, + &iscsi_stat_tgt_attr_last_fail_time.attr, + &iscsi_stat_tgt_attr_last_fail_type.attr, + &iscsi_stat_tgt_attr_fail_intr_name.attr, + &iscsi_stat_tgt_attr_fail_intr_addr_type.attr, + &iscsi_stat_tgt_attr_fail_intr_addr.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_tgt_attr_item_ops = { + .show_attribute = iscsi_stat_tgt_attr_attr_show, + .store_attribute = iscsi_stat_tgt_attr_attr_store, +}; + +struct config_item_type iscsi_stat_tgt_attr_cit = { + .ct_item_ops = &iscsi_stat_tgt_attr_item_ops, + .ct_attrs = iscsi_stat_tgt_attr_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Target Login Stats Table + */ +CONFIGFS_EATTR_STRUCT(iscsi_stat_login, iscsi_wwn_stat_grps); +#define ISCSI_STAT_LOGIN(_name, _mode) \ +static struct iscsi_stat_login_attribute \ + iscsi_stat_login_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_login_show_attr_##_name, \ + iscsi_stat_login_store_attr_##_name); + +#define ISCSI_STAT_LOGIN_RO(_name) \ +static struct iscsi_stat_login_attribute \ + iscsi_stat_login_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_login_show_attr_##_name); + +static ssize_t iscsi_stat_login_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_LOGIN_RO(inst); + +static ssize_t iscsi_stat_login_show_attr_indx( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX); +} +ISCSI_STAT_LOGIN_RO(indx); + +static ssize_t iscsi_stat_login_show_attr_accepts( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->accepts); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(accepts); + +static ssize_t iscsi_stat_login_show_attr_other_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->other_fails); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(other_fails); + +static ssize_t iscsi_stat_login_show_attr_redirects( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->redirects); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(redirects); + +static ssize_t iscsi_stat_login_show_attr_authorize_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authorize_fails); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(authorize_fails); + +static ssize_t iscsi_stat_login_show_attr_authenticate_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authenticate_fails); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(authenticate_fails); + +static ssize_t iscsi_stat_login_show_attr_negotiate_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->negotiate_fails); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(negotiate_fails); + +CONFIGFS_EATTR_OPS(iscsi_stat_login, iscsi_wwn_stat_grps, + iscsi_login_stats_group); + +static struct configfs_attribute *iscsi_stat_login_stats_attrs[] = { + &iscsi_stat_login_inst.attr, + &iscsi_stat_login_indx.attr, + &iscsi_stat_login_accepts.attr, + &iscsi_stat_login_other_fails.attr, + &iscsi_stat_login_redirects.attr, + &iscsi_stat_login_authorize_fails.attr, + &iscsi_stat_login_authenticate_fails.attr, + &iscsi_stat_login_negotiate_fails.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_login_stats_item_ops = { + .show_attribute = iscsi_stat_login_attr_show, + .store_attribute = iscsi_stat_login_attr_store, +}; + +struct config_item_type iscsi_stat_login_cit = { + .ct_item_ops = &iscsi_stat_login_stats_item_ops, + .ct_attrs = iscsi_stat_login_stats_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Target Logout Stats Table + */ + +CONFIGFS_EATTR_STRUCT(iscsi_stat_logout, iscsi_wwn_stat_grps); +#define ISCSI_STAT_LOGOUT(_name, _mode) \ +static struct iscsi_stat_logout_attribute \ + iscsi_stat_logout_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_logout_show_attr_##_name, \ + iscsi_stat_logout_store_attr_##_name); + +#define ISCSI_STAT_LOGOUT_RO(_name) \ +static struct iscsi_stat_logout_attribute \ + iscsi_stat_logout_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_logout_show_attr_##_name); + +static ssize_t iscsi_stat_logout_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_LOGOUT_RO(inst); + +static ssize_t iscsi_stat_logout_show_attr_indx( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX); +} +ISCSI_STAT_LOGOUT_RO(indx); + +static ssize_t iscsi_stat_logout_show_attr_normal_logouts( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_logout_stats *lstats = &tiqn->logout_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", lstats->normal_logouts); +} +ISCSI_STAT_LOGOUT_RO(normal_logouts); + +static ssize_t iscsi_stat_logout_show_attr_abnormal_logouts( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_logout_stats *lstats = &tiqn->logout_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", lstats->abnormal_logouts); +} +ISCSI_STAT_LOGOUT_RO(abnormal_logouts); + +CONFIGFS_EATTR_OPS(iscsi_stat_logout, iscsi_wwn_stat_grps, + iscsi_logout_stats_group); + +static struct configfs_attribute *iscsi_stat_logout_stats_attrs[] = { + &iscsi_stat_logout_inst.attr, + &iscsi_stat_logout_indx.attr, + &iscsi_stat_logout_normal_logouts.attr, + &iscsi_stat_logout_abnormal_logouts.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_logout_stats_item_ops = { + .show_attribute = iscsi_stat_logout_attr_show, + .store_attribute = iscsi_stat_logout_attr_store, +}; + +struct config_item_type iscsi_stat_logout_cit = { + .ct_item_ops = &iscsi_stat_logout_stats_item_ops, + .ct_attrs = iscsi_stat_logout_stats_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Session Stats Table + */ + +CONFIGFS_EATTR_STRUCT(iscsi_stat_sess, iscsi_node_stat_grps); +#define ISCSI_STAT_SESS(_name, _mode) \ +static struct iscsi_stat_sess_attribute \ + iscsi_stat_sess_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_sess_show_attr_##_name, \ + iscsi_stat_sess_store_attr_##_name); + +#define ISCSI_STAT_SESS_RO(_name) \ +static struct iscsi_stat_sess_attribute \ + iscsi_stat_sess_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_sess_show_attr_##_name); + +static ssize_t iscsi_stat_sess_show_attr_inst( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_wwn *wwn = acl->se_node_acl.se_tpg->se_tpg_wwn; + struct iscsi_tiqn *tiqn = container_of(wwn, + struct iscsi_tiqn, tiqn_wwn); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_SESS_RO(inst); + +static ssize_t iscsi_stat_sess_show_attr_node( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", + sess->sess_ops->SessionType ? 0 : ISCSI_NODE_INDEX); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(node); + +static ssize_t iscsi_stat_sess_show_attr_indx( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", + sess->session_index); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(indx); + +static ssize_t iscsi_stat_sess_show_attr_cmd_pdus( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", sess->cmd_pdus); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(cmd_pdus); + +static ssize_t iscsi_stat_sess_show_attr_rsp_pdus( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", sess->rsp_pdus); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(rsp_pdus); + +static ssize_t iscsi_stat_sess_show_attr_txdata_octs( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%llu\n", + (unsigned long long)sess->tx_data_octets); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(txdata_octs); + +static ssize_t iscsi_stat_sess_show_attr_rxdata_octs( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%llu\n", + (unsigned long long)sess->rx_data_octets); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(rxdata_octs); + +static ssize_t iscsi_stat_sess_show_attr_conn_digest_errors( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", + sess->conn_digest_errors); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(conn_digest_errors); + +static ssize_t iscsi_stat_sess_show_attr_conn_timeout_errors( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", + sess->conn_timeout_errors); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(conn_timeout_errors); + +CONFIGFS_EATTR_OPS(iscsi_stat_sess, iscsi_node_stat_grps, + iscsi_sess_stats_group); + +static struct configfs_attribute *iscsi_stat_sess_stats_attrs[] = { + &iscsi_stat_sess_inst.attr, + &iscsi_stat_sess_node.attr, + &iscsi_stat_sess_indx.attr, + &iscsi_stat_sess_cmd_pdus.attr, + &iscsi_stat_sess_rsp_pdus.attr, + &iscsi_stat_sess_txdata_octs.attr, + &iscsi_stat_sess_rxdata_octs.attr, + &iscsi_stat_sess_conn_digest_errors.attr, + &iscsi_stat_sess_conn_timeout_errors.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_sess_stats_item_ops = { + .show_attribute = iscsi_stat_sess_attr_show, + .store_attribute = iscsi_stat_sess_attr_store, +}; + +struct config_item_type iscsi_stat_sess_cit = { + .ct_item_ops = &iscsi_stat_sess_stats_item_ops, + .ct_attrs = iscsi_stat_sess_stats_attrs, + .ct_owner = THIS_MODULE, +}; diff --git a/drivers/target/iscsi/iscsi_target_stat.h b/drivers/target/iscsi/iscsi_target_stat.h new file mode 100644 index 000000000000..3ff76b4faad3 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_stat.h @@ -0,0 +1,64 @@ +#ifndef ISCSI_TARGET_STAT_H +#define ISCSI_TARGET_STAT_H + +/* + * For struct iscsi_tiqn->tiqn_wwn default groups + */ +extern struct config_item_type iscsi_stat_instance_cit; +extern struct config_item_type iscsi_stat_sess_err_cit; +extern struct config_item_type iscsi_stat_tgt_attr_cit; +extern struct config_item_type iscsi_stat_login_cit; +extern struct config_item_type iscsi_stat_logout_cit; + +/* + * For struct iscsi_session->se_sess default groups + */ +extern struct config_item_type iscsi_stat_sess_cit; + +/* iSCSI session error types */ +#define ISCSI_SESS_ERR_UNKNOWN 0 +#define ISCSI_SESS_ERR_DIGEST 1 +#define ISCSI_SESS_ERR_CXN_TIMEOUT 2 +#define ISCSI_SESS_ERR_PDU_FORMAT 3 + +/* iSCSI session error stats */ +struct iscsi_sess_err_stats { + spinlock_t lock; + u32 digest_errors; + u32 cxn_timeout_errors; + u32 pdu_format_errors; + u32 last_sess_failure_type; + char last_sess_fail_rem_name[224]; +} ____cacheline_aligned; + +/* iSCSI login failure types (sub oids) */ +#define ISCSI_LOGIN_FAIL_OTHER 2 +#define ISCSI_LOGIN_FAIL_REDIRECT 3 +#define ISCSI_LOGIN_FAIL_AUTHORIZE 4 +#define ISCSI_LOGIN_FAIL_AUTHENTICATE 5 +#define ISCSI_LOGIN_FAIL_NEGOTIATE 6 + +/* iSCSI login stats */ +struct iscsi_login_stats { + spinlock_t lock; + u32 accepts; + u32 other_fails; + u32 redirects; + u32 authorize_fails; + u32 authenticate_fails; + u32 negotiate_fails; /* used for notifications */ + u64 last_fail_time; /* time stamp (jiffies) */ + u32 last_fail_type; + int last_intr_fail_ip_family; + unsigned char last_intr_fail_ip_addr[IPV6_ADDRESS_SPACE]; + char last_intr_fail_name[224]; +} ____cacheline_aligned; + +/* iSCSI logout stats */ +struct iscsi_logout_stats { + spinlock_t lock; + u32 normal_logouts; + u32 abnormal_logouts; +} ____cacheline_aligned; + +#endif /*** ISCSI_TARGET_STAT_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c new file mode 100644 index 000000000000..db1fe1ec84df --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tmr.c @@ -0,0 +1,849 @@ +/******************************************************************************* + * This file contains the iSCSI Target specific Task Management functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <asm/unaligned.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_device.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_tmr.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" + +u8 iscsit_tmr_abort_task( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_cmd *ref_cmd; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_tmr_req *tmr_req = cmd->tmr_req; + struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; + struct iscsi_tm *hdr = (struct iscsi_tm *) buf; + + ref_cmd = iscsit_find_cmd_from_itt(conn, hdr->rtt); + if (!ref_cmd) { + pr_err("Unable to locate RefTaskTag: 0x%08x on CID:" + " %hu.\n", hdr->rtt, conn->cid); + return ((hdr->refcmdsn >= conn->sess->exp_cmd_sn) && + (hdr->refcmdsn <= conn->sess->max_cmd_sn)) ? + ISCSI_TMF_RSP_COMPLETE : ISCSI_TMF_RSP_NO_TASK; + } + if (ref_cmd->cmd_sn != hdr->refcmdsn) { + pr_err("RefCmdSN 0x%08x does not equal" + " task's CmdSN 0x%08x. Rejecting ABORT_TASK.\n", + hdr->refcmdsn, ref_cmd->cmd_sn); + return ISCSI_TMF_RSP_REJECTED; + } + + se_tmr->ref_task_tag = hdr->rtt; + se_tmr->ref_cmd = &ref_cmd->se_cmd; + tmr_req->ref_cmd_sn = hdr->refcmdsn; + tmr_req->exp_data_sn = hdr->exp_datasn; + + return ISCSI_TMF_RSP_COMPLETE; +} + +/* + * Called from iscsit_handle_task_mgt_cmd(). + */ +int iscsit_tmr_task_warm_reset( + struct iscsi_conn *conn, + struct iscsi_tmr_req *tmr_req, + unsigned char *buf) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); +#if 0 + struct iscsi_init_task_mgt_cmnd *hdr = + (struct iscsi_init_task_mgt_cmnd *) buf; +#endif + if (!na->tmr_warm_reset) { + pr_err("TMR Opcode TARGET_WARM_RESET authorization" + " failed for Initiator Node: %s\n", + sess->se_sess->se_node_acl->initiatorname); + return -1; + } + /* + * Do the real work in transport_generic_do_tmr(). + */ + return 0; +} + +int iscsit_tmr_task_cold_reset( + struct iscsi_conn *conn, + struct iscsi_tmr_req *tmr_req, + unsigned char *buf) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + + if (!na->tmr_cold_reset) { + pr_err("TMR Opcode TARGET_COLD_RESET authorization" + " failed for Initiator Node: %s\n", + sess->se_sess->se_node_acl->initiatorname); + return -1; + } + /* + * Do the real work in transport_generic_do_tmr(). + */ + return 0; +} + +u8 iscsit_tmr_task_reassign( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_cmd *ref_cmd = NULL; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_conn_recovery *cr = NULL; + struct iscsi_tmr_req *tmr_req = cmd->tmr_req; + struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; + struct iscsi_tm *hdr = (struct iscsi_tm *) buf; + int ret; + + pr_debug("Got TASK_REASSIGN TMR ITT: 0x%08x," + " RefTaskTag: 0x%08x, ExpDataSN: 0x%08x, CID: %hu\n", + hdr->itt, hdr->rtt, hdr->exp_datasn, conn->cid); + + if (conn->sess->sess_ops->ErrorRecoveryLevel != 2) { + pr_err("TMR TASK_REASSIGN not supported in ERL<2," + " ignoring request.\n"); + return ISCSI_TMF_RSP_NOT_SUPPORTED; + } + + ret = iscsit_find_cmd_for_recovery(conn->sess, &ref_cmd, &cr, hdr->rtt); + if (ret == -2) { + pr_err("Command ITT: 0x%08x is still alligent to CID:" + " %hu\n", ref_cmd->init_task_tag, cr->cid); + return ISCSI_TMF_RSP_TASK_ALLEGIANT; + } else if (ret == -1) { + pr_err("Unable to locate RefTaskTag: 0x%08x in" + " connection recovery command list.\n", hdr->rtt); + return ISCSI_TMF_RSP_NO_TASK; + } + /* + * Temporary check to prevent connection recovery for + * connections with a differing MaxRecvDataSegmentLength. + */ + if (cr->maxrecvdatasegmentlength != + conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("Unable to perform connection recovery for" + " differing MaxRecvDataSegmentLength, rejecting" + " TMR TASK_REASSIGN.\n"); + return ISCSI_TMF_RSP_REJECTED; + } + + se_tmr->ref_task_tag = hdr->rtt; + se_tmr->ref_cmd = &ref_cmd->se_cmd; + se_tmr->ref_task_lun = get_unaligned_le64(&hdr->lun); + tmr_req->ref_cmd_sn = hdr->refcmdsn; + tmr_req->exp_data_sn = hdr->exp_datasn; + tmr_req->conn_recovery = cr; + tmr_req->task_reassign = 1; + /* + * Command can now be reassigned to a new connection. + * The task management response must be sent before the + * reassignment actually happens. See iscsi_tmr_post_handler(). + */ + return ISCSI_TMF_RSP_COMPLETE; +} + +static void iscsit_task_reassign_remove_cmd( + struct iscsi_cmd *cmd, + struct iscsi_conn_recovery *cr, + struct iscsi_session *sess) +{ + int ret; + + spin_lock(&cr->conn_recovery_cmd_lock); + ret = iscsit_remove_cmd_from_connection_recovery(cmd, sess); + spin_unlock(&cr->conn_recovery_cmd_lock); + if (!ret) { + pr_debug("iSCSI connection recovery successful for CID:" + " %hu on SID: %u\n", cr->cid, sess->sid); + iscsit_remove_active_connection_recovery_entry(cr, sess); + } +} + +static int iscsit_task_reassign_complete_nop_out( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd = se_tmr->ref_cmd; + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_conn_recovery *cr; + + if (!cmd->cr) { + pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x" + " is NULL!\n", cmd->init_task_tag); + return -1; + } + cr = cmd->cr; + + /* + * Reset the StatSN so a new one for this commands new connection + * will be assigned. + * Reset the ExpStatSN as well so we may receive Status SNACKs. + */ + cmd->stat_sn = cmd->exp_stat_sn = 0; + + iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess); + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + cmd->i_state = ISTATE_SEND_NOPIN; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; +} + +static int iscsit_task_reassign_complete_write( + struct iscsi_cmd *cmd, + struct iscsi_tmr_req *tmr_req) +{ + int no_build_r2ts = 0; + u32 length = 0, offset = 0; + struct iscsi_conn *conn = cmd->conn; + struct se_cmd *se_cmd = &cmd->se_cmd; + /* + * The Initiator must not send a R2T SNACK with a Begrun less than + * the TMR TASK_REASSIGN's ExpDataSN. + */ + if (!tmr_req->exp_data_sn) { + cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = 0; + } else { + cmd->cmd_flags |= ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = (tmr_req->exp_data_sn - 1); + } + + /* + * The TMR TASK_REASSIGN's ExpDataSN contains the next R2TSN the + * Initiator is expecting. The Target controls all WRITE operations + * so if we have received all DataOUT we can safety ignore Initiator. + */ + if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) { + if (!atomic_read(&cmd->transport_sent)) { + pr_debug("WRITE ITT: 0x%08x: t_state: %d" + " never sent to transport\n", + cmd->init_task_tag, cmd->se_cmd.t_state); + return transport_generic_handle_data(se_cmd); + } + + cmd->i_state = ISTATE_SEND_STATUS; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; + } + + /* + * Special case to deal with DataSequenceInOrder=No and Non-Immeidate + * Unsolicited DataOut. + */ + if (cmd->unsolicited_data) { + cmd->unsolicited_data = 0; + + offset = cmd->next_burst_len = cmd->write_data_done; + + if ((conn->sess->sess_ops->FirstBurstLength - offset) >= + cmd->data_length) { + no_build_r2ts = 1; + length = (cmd->data_length - offset); + } else + length = (conn->sess->sess_ops->FirstBurstLength - offset); + + spin_lock_bh(&cmd->r2t_lock); + if (iscsit_add_r2t_to_list(cmd, offset, length, 0, 0) < 0) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + cmd->outstanding_r2ts++; + spin_unlock_bh(&cmd->r2t_lock); + + if (no_build_r2ts) + return 0; + } + /* + * iscsit_build_r2ts_for_cmd() can handle the rest from here. + */ + return iscsit_build_r2ts_for_cmd(cmd, conn, 2); +} + +static int iscsit_task_reassign_complete_read( + struct iscsi_cmd *cmd, + struct iscsi_tmr_req *tmr_req) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct se_cmd *se_cmd = &cmd->se_cmd; + /* + * The Initiator must not send a Data SNACK with a BegRun less than + * the TMR TASK_REASSIGN's ExpDataSN. + */ + if (!tmr_req->exp_data_sn) { + cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = 0; + } else { + cmd->cmd_flags |= ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = (tmr_req->exp_data_sn - 1); + } + + if (!atomic_read(&cmd->transport_sent)) { + pr_debug("READ ITT: 0x%08x: t_state: %d never sent to" + " transport\n", cmd->init_task_tag, + cmd->se_cmd.t_state); + transport_generic_handle_cdb(se_cmd); + return 0; + } + + if (!atomic_read(&se_cmd->t_transport_complete)) { + pr_err("READ ITT: 0x%08x: t_state: %d, never returned" + " from transport\n", cmd->init_task_tag, + cmd->se_cmd.t_state); + return -1; + } + + dr = iscsit_allocate_datain_req(); + if (!dr) + return -1; + /* + * The TMR TASK_REASSIGN's ExpDataSN contains the next DataSN the + * Initiator is expecting. + */ + dr->data_sn = dr->begrun = tmr_req->exp_data_sn; + dr->runlength = 0; + dr->generate_recovery_values = 1; + dr->recovery = DATAIN_CONNECTION_RECOVERY; + + iscsit_attach_datain_req(cmd, dr); + + cmd->i_state = ISTATE_SEND_DATAIN; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; +} + +static int iscsit_task_reassign_complete_none( + struct iscsi_cmd *cmd, + struct iscsi_tmr_req *tmr_req) +{ + struct iscsi_conn *conn = cmd->conn; + + cmd->i_state = ISTATE_SEND_STATUS; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; +} + +static int iscsit_task_reassign_complete_scsi_cmnd( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd = se_tmr->ref_cmd; + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_conn_recovery *cr; + + if (!cmd->cr) { + pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x" + " is NULL!\n", cmd->init_task_tag); + return -1; + } + cr = cmd->cr; + + /* + * Reset the StatSN so a new one for this commands new connection + * will be assigned. + * Reset the ExpStatSN as well so we may receive Status SNACKs. + */ + cmd->stat_sn = cmd->exp_stat_sn = 0; + + iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess); + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) { + cmd->i_state = ISTATE_SEND_STATUS; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; + } + + switch (cmd->data_direction) { + case DMA_TO_DEVICE: + return iscsit_task_reassign_complete_write(cmd, tmr_req); + case DMA_FROM_DEVICE: + return iscsit_task_reassign_complete_read(cmd, tmr_req); + case DMA_NONE: + return iscsit_task_reassign_complete_none(cmd, tmr_req); + default: + pr_err("Unknown cmd->data_direction: 0x%02x\n", + cmd->data_direction); + return -1; + } + + return 0; +} + +static int iscsit_task_reassign_complete( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd; + struct iscsi_cmd *cmd; + int ret = 0; + + if (!se_tmr->ref_cmd) { + pr_err("TMR Request is missing a RefCmd struct iscsi_cmd.\n"); + return -1; + } + se_cmd = se_tmr->ref_cmd; + cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + cmd->conn = conn; + + switch (cmd->iscsi_opcode) { + case ISCSI_OP_NOOP_OUT: + ret = iscsit_task_reassign_complete_nop_out(tmr_req, conn); + break; + case ISCSI_OP_SCSI_CMD: + ret = iscsit_task_reassign_complete_scsi_cmnd(tmr_req, conn); + break; + default: + pr_err("Illegal iSCSI Opcode 0x%02x during" + " command realligence\n", cmd->iscsi_opcode); + return -1; + } + + if (ret != 0) + return ret; + + pr_debug("Completed connection realligence for Opcode: 0x%02x," + " ITT: 0x%08x to CID: %hu.\n", cmd->iscsi_opcode, + cmd->init_task_tag, conn->cid); + + return 0; +} + +/* + * Handles special after-the-fact actions related to TMRs. + * Right now the only one that its really needed for is + * connection recovery releated TASK_REASSIGN. + */ +extern int iscsit_tmr_post_handler(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +{ + struct iscsi_tmr_req *tmr_req = cmd->tmr_req; + struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; + + if (tmr_req->task_reassign && + (se_tmr->response == ISCSI_TMF_RSP_COMPLETE)) + return iscsit_task_reassign_complete(tmr_req, conn); + + return 0; +} + +/* + * Nothing to do here, but leave it for good measure. :-) + */ +int iscsit_task_reassign_prepare_read( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + return 0; +} + +static void iscsit_task_reassign_prepare_unsolicited_dataout( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int i, j; + struct iscsi_pdu *pdu = NULL; + struct iscsi_seq *seq = NULL; + + if (conn->sess->sess_ops->DataSequenceInOrder) { + cmd->data_sn = 0; + + if (cmd->immediate_data) + cmd->r2t_offset += (cmd->first_burst_len - + cmd->seq_start_offset); + + if (conn->sess->sess_ops->DataPDUInOrder) { + cmd->write_data_done -= (cmd->immediate_data) ? + (cmd->first_burst_len - + cmd->seq_start_offset) : + cmd->first_burst_len; + cmd->first_burst_len = 0; + return; + } + + for (i = 0; i < cmd->pdu_count; i++) { + pdu = &cmd->pdu_list[i]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + if ((pdu->offset >= cmd->seq_start_offset) && + ((pdu->offset + pdu->length) <= + cmd->seq_end_offset)) { + cmd->first_burst_len -= pdu->length; + cmd->write_data_done -= pdu->length; + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + } else { + for (i = 0; i < cmd->seq_count; i++) { + seq = &cmd->seq_list[i]; + + if (seq->type != SEQTYPE_UNSOLICITED) + continue; + + cmd->write_data_done -= + (seq->offset - seq->orig_offset); + cmd->first_burst_len = 0; + seq->data_sn = 0; + seq->offset = seq->orig_offset; + seq->next_burst_len = 0; + seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY; + + if (conn->sess->sess_ops->DataPDUInOrder) + continue; + + for (j = 0; j < seq->pdu_count; j++) { + pdu = &cmd->pdu_list[j+seq->pdu_start]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + } +} + +int iscsit_task_reassign_prepare_write( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd = se_tmr->ref_cmd; + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_pdu *pdu = NULL; + struct iscsi_r2t *r2t = NULL, *r2t_tmp; + int first_incomplete_r2t = 1, i = 0; + + /* + * The command was in the process of receiving Unsolicited DataOUT when + * the connection failed. + */ + if (cmd->unsolicited_data) + iscsit_task_reassign_prepare_unsolicited_dataout(cmd, conn); + + /* + * The Initiator is requesting R2Ts starting from zero, skip + * checking acknowledged R2Ts and start checking struct iscsi_r2ts + * greater than zero. + */ + if (!tmr_req->exp_data_sn) + goto drop_unacknowledged_r2ts; + + /* + * We now check that the PDUs in DataOUT sequences below + * the TMR TASK_REASSIGN ExpDataSN (R2TSN the Initiator is + * expecting next) have all the DataOUT they require to complete + * the DataOUT sequence. First scan from R2TSN 0 to TMR + * TASK_REASSIGN ExpDataSN-1. + * + * If we have not received all DataOUT in question, we must + * make sure to make the appropriate changes to values in + * struct iscsi_cmd (and elsewhere depending on session parameters) + * so iscsit_build_r2ts_for_cmd() in iscsit_task_reassign_complete_write() + * will resend a new R2T for the DataOUT sequences in question. + */ + spin_lock_bh(&cmd->r2t_lock); + if (list_empty(&cmd->cmd_r2t_list)) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + + if (r2t->r2t_sn >= tmr_req->exp_data_sn) + continue; + /* + * Safely ignore Recovery R2Ts and R2Ts that have completed + * DataOUT sequences. + */ + if (r2t->seq_complete) + continue; + + if (r2t->recovery_r2t) + continue; + + /* + * DataSequenceInOrder=Yes: + * + * Taking into account the iSCSI implementation requirement of + * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and + * DataSequenceInOrder=Yes, we must take into consideration + * the following: + * + * DataSequenceInOrder=No: + * + * Taking into account that the Initiator controls the (possibly + * random) PDU Order in (possibly random) Sequence Order of + * DataOUT the target requests with R2Ts, we must take into + * consideration the following: + * + * DataPDUInOrder=Yes for DataSequenceInOrder=[Yes,No]: + * + * While processing non-complete R2T DataOUT sequence requests + * the Target will re-request only the total sequence length + * minus current received offset. This is because we must + * assume the initiator will continue sending DataOUT from the + * last PDU before the connection failed. + * + * DataPDUInOrder=No for DataSequenceInOrder=[Yes,No]: + * + * While processing non-complete R2T DataOUT sequence requests + * the Target will re-request the entire DataOUT sequence if + * any single PDU is missing from the sequence. This is because + * we have no logical method to determine the next PDU offset, + * and we must assume the Initiator will be sending any random + * PDU offset in the current sequence after TASK_REASSIGN + * has completed. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + if (!first_incomplete_r2t) { + cmd->r2t_offset -= r2t->xfer_len; + goto next; + } + + if (conn->sess->sess_ops->DataPDUInOrder) { + cmd->data_sn = 0; + cmd->r2t_offset -= (r2t->xfer_len - + cmd->next_burst_len); + first_incomplete_r2t = 0; + goto next; + } + + cmd->data_sn = 0; + cmd->r2t_offset -= r2t->xfer_len; + + for (i = 0; i < cmd->pdu_count; i++) { + pdu = &cmd->pdu_list[i]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + if ((pdu->offset >= r2t->offset) && + (pdu->offset < (r2t->offset + + r2t->xfer_len))) { + cmd->next_burst_len -= pdu->length; + cmd->write_data_done -= pdu->length; + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + + first_incomplete_r2t = 0; + } else { + struct iscsi_seq *seq; + + seq = iscsit_get_seq_holder(cmd, r2t->offset, + r2t->xfer_len); + if (!seq) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + cmd->write_data_done -= + (seq->offset - seq->orig_offset); + seq->data_sn = 0; + seq->offset = seq->orig_offset; + seq->next_burst_len = 0; + seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY; + + cmd->seq_send_order--; + + if (conn->sess->sess_ops->DataPDUInOrder) + goto next; + + for (i = 0; i < seq->pdu_count; i++) { + pdu = &cmd->pdu_list[i+seq->pdu_start]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + +next: + cmd->outstanding_r2ts--; + } + spin_unlock_bh(&cmd->r2t_lock); + + /* + * We now drop all unacknowledged R2Ts, ie: ExpDataSN from TMR + * TASK_REASSIGN to the last R2T in the list.. We are also careful + * to check that the Initiator is not requesting R2Ts for DataOUT + * sequences it has already completed. + * + * Free each R2T in question and adjust values in struct iscsi_cmd + * accordingly so iscsit_build_r2ts_for_cmd() do the rest of + * the work after the TMR TASK_REASSIGN Response is sent. + */ +drop_unacknowledged_r2ts: + + cmd->cmd_flags &= ~ICF_SENT_LAST_R2T; + cmd->r2t_sn = tmr_req->exp_data_sn; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list) { + /* + * Skip up to the R2T Sequence number provided by the + * iSCSI TASK_REASSIGN TMR + */ + if (r2t->r2t_sn < tmr_req->exp_data_sn) + continue; + + if (r2t->seq_complete) { + pr_err("Initiator is requesting R2Ts from" + " R2TSN: 0x%08x, but R2TSN: 0x%08x, Offset: %u," + " Length: %u is already complete." + " BAD INITIATOR ERL=2 IMPLEMENTATION!\n", + tmr_req->exp_data_sn, r2t->r2t_sn, + r2t->offset, r2t->xfer_len); + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + if (r2t->recovery_r2t) { + iscsit_free_r2t(r2t, cmd); + continue; + } + + /* DataSequenceInOrder=Yes: + * + * Taking into account the iSCSI implementation requirement of + * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and + * DataSequenceInOrder=Yes, it's safe to subtract the R2Ts + * entire transfer length from the commands R2T offset marker. + * + * DataSequenceInOrder=No: + * + * We subtract the difference from struct iscsi_seq between the + * current offset and original offset from cmd->write_data_done + * for account for DataOUT PDUs already received. Then reset + * the current offset to the original and zero out the current + * burst length, to make sure we re-request the entire DataOUT + * sequence. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) + cmd->r2t_offset -= r2t->xfer_len; + else + cmd->seq_send_order--; + + cmd->outstanding_r2ts--; + iscsit_free_r2t(r2t, cmd); + } + spin_unlock_bh(&cmd->r2t_lock); + + return 0; +} + +/* + * Performs sanity checks TMR TASK_REASSIGN's ExpDataSN for + * a given struct iscsi_cmd. + */ +int iscsit_check_task_reassign_expdatasn( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd = se_tmr->ref_cmd; + struct iscsi_cmd *ref_cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + if (ref_cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD) + return 0; + + if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) + return 0; + + if (ref_cmd->data_direction == DMA_NONE) + return 0; + + /* + * For READs the TMR TASK_REASSIGNs ExpDataSN contains the next DataSN + * of DataIN the Initiator is expecting. + * + * Also check that the Initiator is not re-requesting DataIN that has + * already been acknowledged with a DataAck SNACK. + */ + if (ref_cmd->data_direction == DMA_FROM_DEVICE) { + if (tmr_req->exp_data_sn > ref_cmd->data_sn) { + pr_err("Received ExpDataSN: 0x%08x for READ" + " in TMR TASK_REASSIGN greater than command's" + " DataSN: 0x%08x.\n", tmr_req->exp_data_sn, + ref_cmd->data_sn); + return -1; + } + if ((ref_cmd->cmd_flags & ICF_GOT_DATACK_SNACK) && + (tmr_req->exp_data_sn <= ref_cmd->acked_data_sn)) { + pr_err("Received ExpDataSN: 0x%08x for READ" + " in TMR TASK_REASSIGN for previously" + " acknowledged DataIN: 0x%08x," + " protocol error\n", tmr_req->exp_data_sn, + ref_cmd->acked_data_sn); + return -1; + } + return iscsit_task_reassign_prepare_read(tmr_req, conn); + } + + /* + * For WRITEs the TMR TASK_REASSIGNs ExpDataSN contains the next R2TSN + * for R2Ts the Initiator is expecting. + * + * Do the magic in iscsit_task_reassign_prepare_write(). + */ + if (ref_cmd->data_direction == DMA_TO_DEVICE) { + if (tmr_req->exp_data_sn > ref_cmd->r2t_sn) { + pr_err("Received ExpDataSN: 0x%08x for WRITE" + " in TMR TASK_REASSIGN greater than command's" + " R2TSN: 0x%08x.\n", tmr_req->exp_data_sn, + ref_cmd->r2t_sn); + return -1; + } + return iscsit_task_reassign_prepare_write(tmr_req, conn); + } + + pr_err("Unknown iSCSI data_direction: 0x%02x\n", + ref_cmd->data_direction); + + return -1; +} diff --git a/drivers/target/iscsi/iscsi_target_tmr.h b/drivers/target/iscsi/iscsi_target_tmr.h new file mode 100644 index 000000000000..142e992cb097 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tmr.h @@ -0,0 +1,14 @@ +#ifndef ISCSI_TARGET_TMR_H +#define ISCSI_TARGET_TMR_H + +extern u8 iscsit_tmr_abort_task(struct iscsi_cmd *, unsigned char *); +extern int iscsit_tmr_task_warm_reset(struct iscsi_conn *, struct iscsi_tmr_req *, + unsigned char *); +extern int iscsit_tmr_task_cold_reset(struct iscsi_conn *, struct iscsi_tmr_req *, + unsigned char *); +extern u8 iscsit_tmr_task_reassign(struct iscsi_cmd *, unsigned char *); +extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_check_task_reassign_expdatasn(struct iscsi_tmr_req *, + struct iscsi_conn *); + +#endif /* ISCSI_TARGET_TMR_H */ diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c new file mode 100644 index 000000000000..d4cf2cd25c44 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -0,0 +1,759 @@ +/******************************************************************************* + * This file contains iSCSI Target Portal Group related functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <target/target_core_base.h> +#include <target/target_core_transport.h> +#include <target/target_core_fabric_ops.h> +#include <target/target_core_configfs.h> +#include <target/target_core_tpg.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_login.h" +#include "iscsi_target_nodeattrib.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_parameters.h" + +struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *tiqn, u16 tpgt) +{ + struct iscsi_portal_group *tpg; + + tpg = kzalloc(sizeof(struct iscsi_portal_group), GFP_KERNEL); + if (!tpg) { + pr_err("Unable to allocate struct iscsi_portal_group\n"); + return NULL; + } + + tpg->tpgt = tpgt; + tpg->tpg_state = TPG_STATE_FREE; + tpg->tpg_tiqn = tiqn; + INIT_LIST_HEAD(&tpg->tpg_gnp_list); + INIT_LIST_HEAD(&tpg->tpg_list); + mutex_init(&tpg->tpg_access_lock); + mutex_init(&tpg->np_login_lock); + spin_lock_init(&tpg->tpg_state_lock); + spin_lock_init(&tpg->tpg_np_lock); + + return tpg; +} + +static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *); + +int iscsit_load_discovery_tpg(void) +{ + struct iscsi_param *param; + struct iscsi_portal_group *tpg; + int ret; + + tpg = iscsit_alloc_portal_group(NULL, 1); + if (!tpg) { + pr_err("Unable to allocate struct iscsi_portal_group\n"); + return -1; + } + + ret = core_tpg_register( + &lio_target_fabric_configfs->tf_ops, + NULL, &tpg->tpg_se_tpg, (void *)tpg, + TRANSPORT_TPG_TYPE_DISCOVERY); + if (ret < 0) { + kfree(tpg); + return -1; + } + + tpg->sid = 1; /* First Assigned LIO Session ID */ + iscsit_set_default_tpg_attribs(tpg); + + if (iscsi_create_default_params(&tpg->param_list) < 0) + goto out; + /* + * By default we disable authentication for discovery sessions, + * this can be changed with: + * + * /sys/kernel/config/target/iscsi/discovery_auth/enforce_discovery_auth + */ + param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list); + if (!param) + goto out; + + if (iscsi_update_param_value(param, "CHAP,None") < 0) + goto out; + + tpg->tpg_attrib.authentication = 0; + + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = TPG_STATE_ACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + iscsit_global->discovery_tpg = tpg; + pr_debug("CORE[0] - Allocated Discovery TPG\n"); + + return 0; +out: + if (tpg->sid == 1) + core_tpg_deregister(&tpg->tpg_se_tpg); + kfree(tpg); + return -1; +} + +void iscsit_release_discovery_tpg(void) +{ + struct iscsi_portal_group *tpg = iscsit_global->discovery_tpg; + + if (!tpg) + return; + + core_tpg_deregister(&tpg->tpg_se_tpg); + + kfree(tpg); + iscsit_global->discovery_tpg = NULL; +} + +struct iscsi_portal_group *iscsit_get_tpg_from_np( + struct iscsi_tiqn *tiqn, + struct iscsi_np *np) +{ + struct iscsi_portal_group *tpg = NULL; + struct iscsi_tpg_np *tpg_np; + + spin_lock(&tiqn->tiqn_tpg_lock); + list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { + + spin_lock(&tpg->tpg_state_lock); + if (tpg->tpg_state == TPG_STATE_FREE) { + spin_unlock(&tpg->tpg_state_lock); + continue; + } + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tpg->tpg_np_lock); + list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { + if (tpg_np->tpg_np == np) { + spin_unlock(&tpg->tpg_np_lock); + spin_unlock(&tiqn->tiqn_tpg_lock); + return tpg; + } + } + spin_unlock(&tpg->tpg_np_lock); + } + spin_unlock(&tiqn->tiqn_tpg_lock); + + return NULL; +} + +int iscsit_get_tpg( + struct iscsi_portal_group *tpg) +{ + int ret; + + ret = mutex_lock_interruptible(&tpg->tpg_access_lock); + return ((ret != 0) || signal_pending(current)) ? -1 : 0; +} + +void iscsit_put_tpg(struct iscsi_portal_group *tpg) +{ + mutex_unlock(&tpg->tpg_access_lock); +} + +static void iscsit_clear_tpg_np_login_thread( + struct iscsi_tpg_np *tpg_np, + struct iscsi_portal_group *tpg) +{ + if (!tpg_np->tpg_np) { + pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n"); + return; + } + + iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg); +} + +void iscsit_clear_tpg_np_login_threads( + struct iscsi_portal_group *tpg) +{ + struct iscsi_tpg_np *tpg_np; + + spin_lock(&tpg->tpg_np_lock); + list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { + if (!tpg_np->tpg_np) { + pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n"); + continue; + } + spin_unlock(&tpg->tpg_np_lock); + iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + spin_lock(&tpg->tpg_np_lock); + } + spin_unlock(&tpg->tpg_np_lock); +} + +void iscsit_tpg_dump_params(struct iscsi_portal_group *tpg) +{ + iscsi_print_params(tpg->param_list); +} + +static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + a->authentication = TA_AUTHENTICATION; + a->login_timeout = TA_LOGIN_TIMEOUT; + a->netif_timeout = TA_NETIF_TIMEOUT; + a->default_cmdsn_depth = TA_DEFAULT_CMDSN_DEPTH; + a->generate_node_acls = TA_GENERATE_NODE_ACLS; + a->cache_dynamic_acls = TA_CACHE_DYNAMIC_ACLS; + a->demo_mode_write_protect = TA_DEMO_MODE_WRITE_PROTECT; + a->prod_mode_write_protect = TA_PROD_MODE_WRITE_PROTECT; +} + +int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg) +{ + if (tpg->tpg_state != TPG_STATE_FREE) { + pr_err("Unable to add iSCSI Target Portal Group: %d" + " while not in TPG_STATE_FREE state.\n", tpg->tpgt); + return -EEXIST; + } + iscsit_set_default_tpg_attribs(tpg); + + if (iscsi_create_default_params(&tpg->param_list) < 0) + goto err_out; + + ISCSI_TPG_ATTRIB(tpg)->tpg = tpg; + + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = TPG_STATE_INACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tiqn->tiqn_tpg_lock); + list_add_tail(&tpg->tpg_list, &tiqn->tiqn_tpg_list); + tiqn->tiqn_ntpgs++; + pr_debug("CORE[%s]_TPG[%hu] - Added iSCSI Target Portal Group\n", + tiqn->tiqn, tpg->tpgt); + spin_unlock(&tiqn->tiqn_tpg_lock); + + return 0; +err_out: + if (tpg->param_list) { + iscsi_release_param_list(tpg->param_list); + tpg->param_list = NULL; + } + kfree(tpg); + return -ENOMEM; +} + +int iscsit_tpg_del_portal_group( + struct iscsi_tiqn *tiqn, + struct iscsi_portal_group *tpg, + int force) +{ + u8 old_state = tpg->tpg_state; + + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = TPG_STATE_INACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { + pr_err("Unable to delete iSCSI Target Portal Group:" + " %hu while active sessions exist, and force=0\n", + tpg->tpgt); + tpg->tpg_state = old_state; + return -EPERM; + } + + core_tpg_clear_object_luns(&tpg->tpg_se_tpg); + + if (tpg->param_list) { + iscsi_release_param_list(tpg->param_list); + tpg->param_list = NULL; + } + + core_tpg_deregister(&tpg->tpg_se_tpg); + + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = TPG_STATE_FREE; + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tiqn->tiqn_tpg_lock); + tiqn->tiqn_ntpgs--; + list_del(&tpg->tpg_list); + spin_unlock(&tiqn->tiqn_tpg_lock); + + pr_debug("CORE[%s]_TPG[%hu] - Deleted iSCSI Target Portal Group\n", + tiqn->tiqn, tpg->tpgt); + + kfree(tpg); + return 0; +} + +int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg) +{ + struct iscsi_param *param; + struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; + + spin_lock(&tpg->tpg_state_lock); + if (tpg->tpg_state == TPG_STATE_ACTIVE) { + pr_err("iSCSI target portal group: %hu is already" + " active, ignoring request.\n", tpg->tpgt); + spin_unlock(&tpg->tpg_state_lock); + return -EINVAL; + } + /* + * Make sure that AuthMethod does not contain None as an option + * unless explictly disabled. Set the default to CHAP if authentication + * is enforced (as per default), and remove the NONE option. + */ + param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list); + if (!param) { + spin_unlock(&tpg->tpg_state_lock); + return -ENOMEM; + } + + if (ISCSI_TPG_ATTRIB(tpg)->authentication) { + if (!strcmp(param->value, NONE)) + if (iscsi_update_param_value(param, CHAP) < 0) { + spin_unlock(&tpg->tpg_state_lock); + return -ENOMEM; + } + if (iscsit_ta_authentication(tpg, 1) < 0) { + spin_unlock(&tpg->tpg_state_lock); + return -ENOMEM; + } + } + + tpg->tpg_state = TPG_STATE_ACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tiqn->tiqn_tpg_lock); + tiqn->tiqn_active_tpgs++; + pr_debug("iSCSI_TPG[%hu] - Enabled iSCSI Target Portal Group\n", + tpg->tpgt); + spin_unlock(&tiqn->tiqn_tpg_lock); + + return 0; +} + +int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *tpg, int force) +{ + struct iscsi_tiqn *tiqn; + u8 old_state = tpg->tpg_state; + + spin_lock(&tpg->tpg_state_lock); + if (tpg->tpg_state == TPG_STATE_INACTIVE) { + pr_err("iSCSI Target Portal Group: %hu is already" + " inactive, ignoring request.\n", tpg->tpgt); + spin_unlock(&tpg->tpg_state_lock); + return -EINVAL; + } + tpg->tpg_state = TPG_STATE_INACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + iscsit_clear_tpg_np_login_threads(tpg); + + if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = old_state; + spin_unlock(&tpg->tpg_state_lock); + pr_err("Unable to disable iSCSI Target Portal Group:" + " %hu while active sessions exist, and force=0\n", + tpg->tpgt); + return -EPERM; + } + + tiqn = tpg->tpg_tiqn; + if (!tiqn || (tpg == iscsit_global->discovery_tpg)) + return 0; + + spin_lock(&tiqn->tiqn_tpg_lock); + tiqn->tiqn_active_tpgs--; + pr_debug("iSCSI_TPG[%hu] - Disabled iSCSI Target Portal Group\n", + tpg->tpgt); + spin_unlock(&tiqn->tiqn_tpg_lock); + + return 0; +} + +struct iscsi_node_attrib *iscsit_tpg_get_node_attrib( + struct iscsi_session *sess) +{ + struct se_session *se_sess = sess->se_sess; + struct se_node_acl *se_nacl = se_sess->se_node_acl; + struct iscsi_node_acl *acl = container_of(se_nacl, struct iscsi_node_acl, + se_node_acl); + + return &acl->node_attrib; +} + +struct iscsi_tpg_np *iscsit_tpg_locate_child_np( + struct iscsi_tpg_np *tpg_np, + int network_transport) +{ + struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp; + + spin_lock(&tpg_np->tpg_np_parent_lock); + list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp, + &tpg_np->tpg_np_parent_list, tpg_np_child_list) { + if (tpg_np_child->tpg_np->np_network_transport == + network_transport) { + spin_unlock(&tpg_np->tpg_np_parent_lock); + return tpg_np_child; + } + } + spin_unlock(&tpg_np->tpg_np_parent_lock); + + return NULL; +} + +struct iscsi_tpg_np *iscsit_tpg_add_network_portal( + struct iscsi_portal_group *tpg, + struct __kernel_sockaddr_storage *sockaddr, + char *ip_str, + struct iscsi_tpg_np *tpg_np_parent, + int network_transport) +{ + struct iscsi_np *np; + struct iscsi_tpg_np *tpg_np; + + tpg_np = kzalloc(sizeof(struct iscsi_tpg_np), GFP_KERNEL); + if (!tpg_np) { + pr_err("Unable to allocate memory for" + " struct iscsi_tpg_np.\n"); + return ERR_PTR(-ENOMEM); + } + + np = iscsit_add_np(sockaddr, ip_str, network_transport); + if (IS_ERR(np)) { + kfree(tpg_np); + return ERR_CAST(np); + } + + INIT_LIST_HEAD(&tpg_np->tpg_np_list); + INIT_LIST_HEAD(&tpg_np->tpg_np_child_list); + INIT_LIST_HEAD(&tpg_np->tpg_np_parent_list); + spin_lock_init(&tpg_np->tpg_np_parent_lock); + tpg_np->tpg_np = np; + tpg_np->tpg = tpg; + + spin_lock(&tpg->tpg_np_lock); + list_add_tail(&tpg_np->tpg_np_list, &tpg->tpg_gnp_list); + tpg->num_tpg_nps++; + if (tpg->tpg_tiqn) + tpg->tpg_tiqn->tiqn_num_tpg_nps++; + spin_unlock(&tpg->tpg_np_lock); + + if (tpg_np_parent) { + tpg_np->tpg_np_parent = tpg_np_parent; + spin_lock(&tpg_np_parent->tpg_np_parent_lock); + list_add_tail(&tpg_np->tpg_np_child_list, + &tpg_np_parent->tpg_np_parent_list); + spin_unlock(&tpg_np_parent->tpg_np_parent_lock); + } + + pr_debug("CORE[%s] - Added Network Portal: %s:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + (np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP"); + + return tpg_np; +} + +static int iscsit_tpg_release_np( + struct iscsi_tpg_np *tpg_np, + struct iscsi_portal_group *tpg, + struct iscsi_np *np) +{ + iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + + pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + (np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP"); + + tpg_np->tpg_np = NULL; + tpg_np->tpg = NULL; + kfree(tpg_np); + /* + * iscsit_del_np() will shutdown struct iscsi_np when last TPG reference is released. + */ + return iscsit_del_np(np); +} + +int iscsit_tpg_del_network_portal( + struct iscsi_portal_group *tpg, + struct iscsi_tpg_np *tpg_np) +{ + struct iscsi_np *np; + struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp; + int ret = 0; + + np = tpg_np->tpg_np; + if (!np) { + pr_err("Unable to locate struct iscsi_np from" + " struct iscsi_tpg_np\n"); + return -EINVAL; + } + + if (!tpg_np->tpg_np_parent) { + /* + * We are the parent tpg network portal. Release all of the + * child tpg_np's (eg: the non ISCSI_TCP ones) on our parent + * list first. + */ + list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp, + &tpg_np->tpg_np_parent_list, + tpg_np_child_list) { + ret = iscsit_tpg_del_network_portal(tpg, tpg_np_child); + if (ret < 0) + pr_err("iscsit_tpg_del_network_portal()" + " failed: %d\n", ret); + } + } else { + /* + * We are not the parent ISCSI_TCP tpg network portal. Release + * our own network portals from the child list. + */ + spin_lock(&tpg_np->tpg_np_parent->tpg_np_parent_lock); + list_del(&tpg_np->tpg_np_child_list); + spin_unlock(&tpg_np->tpg_np_parent->tpg_np_parent_lock); + } + + spin_lock(&tpg->tpg_np_lock); + list_del(&tpg_np->tpg_np_list); + tpg->num_tpg_nps--; + if (tpg->tpg_tiqn) + tpg->tpg_tiqn->tiqn_num_tpg_nps--; + spin_unlock(&tpg->tpg_np_lock); + + return iscsit_tpg_release_np(tpg_np, tpg, np); +} + +int iscsit_tpg_set_initiator_node_queue_depth( + struct iscsi_portal_group *tpg, + unsigned char *initiatorname, + u32 queue_depth, + int force) +{ + return core_tpg_set_initiator_node_queue_depth(&tpg->tpg_se_tpg, + initiatorname, queue_depth, force); +} + +int iscsit_ta_authentication(struct iscsi_portal_group *tpg, u32 authentication) +{ + unsigned char buf1[256], buf2[256], *none = NULL; + int len; + struct iscsi_param *param; + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((authentication != 1) && (authentication != 0)) { + pr_err("Illegal value for authentication parameter:" + " %u, ignoring request.\n", authentication); + return -1; + } + + memset(buf1, 0, sizeof(buf1)); + memset(buf2, 0, sizeof(buf2)); + + param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list); + if (!param) + return -EINVAL; + + if (authentication) { + snprintf(buf1, sizeof(buf1), "%s", param->value); + none = strstr(buf1, NONE); + if (!none) + goto out; + if (!strncmp(none + 4, ",", 1)) { + if (!strcmp(buf1, none)) + sprintf(buf2, "%s", none+5); + else { + none--; + *none = '\0'; + len = sprintf(buf2, "%s", buf1); + none += 5; + sprintf(buf2 + len, "%s", none); + } + } else { + none--; + *none = '\0'; + sprintf(buf2, "%s", buf1); + } + if (iscsi_update_param_value(param, buf2) < 0) + return -EINVAL; + } else { + snprintf(buf1, sizeof(buf1), "%s", param->value); + none = strstr(buf1, NONE); + if ((none)) + goto out; + strncat(buf1, ",", strlen(",")); + strncat(buf1, NONE, strlen(NONE)); + if (iscsi_update_param_value(param, buf1) < 0) + return -EINVAL; + } + +out: + a->authentication = authentication; + pr_debug("%s iSCSI Authentication Methods for TPG: %hu.\n", + a->authentication ? "Enforcing" : "Disabling", tpg->tpgt); + + return 0; +} + +int iscsit_ta_login_timeout( + struct iscsi_portal_group *tpg, + u32 login_timeout) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if (login_timeout > TA_LOGIN_TIMEOUT_MAX) { + pr_err("Requested Login Timeout %u larger than maximum" + " %u\n", login_timeout, TA_LOGIN_TIMEOUT_MAX); + return -EINVAL; + } else if (login_timeout < TA_LOGIN_TIMEOUT_MIN) { + pr_err("Requested Logout Timeout %u smaller than" + " minimum %u\n", login_timeout, TA_LOGIN_TIMEOUT_MIN); + return -EINVAL; + } + + a->login_timeout = login_timeout; + pr_debug("Set Logout Timeout to %u for Target Portal Group" + " %hu\n", a->login_timeout, tpg->tpgt); + + return 0; +} + +int iscsit_ta_netif_timeout( + struct iscsi_portal_group *tpg, + u32 netif_timeout) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if (netif_timeout > TA_NETIF_TIMEOUT_MAX) { + pr_err("Requested Network Interface Timeout %u larger" + " than maximum %u\n", netif_timeout, + TA_NETIF_TIMEOUT_MAX); + return -EINVAL; + } else if (netif_timeout < TA_NETIF_TIMEOUT_MIN) { + pr_err("Requested Network Interface Timeout %u smaller" + " than minimum %u\n", netif_timeout, + TA_NETIF_TIMEOUT_MIN); + return -EINVAL; + } + + a->netif_timeout = netif_timeout; + pr_debug("Set Network Interface Timeout to %u for" + " Target Portal Group %hu\n", a->netif_timeout, tpg->tpgt); + + return 0; +} + +int iscsit_ta_generate_node_acls( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->generate_node_acls = flag; + pr_debug("iSCSI_TPG[%hu] - Generate Initiator Portal Group ACLs: %s\n", + tpg->tpgt, (a->generate_node_acls) ? "Enabled" : "Disabled"); + + return 0; +} + +int iscsit_ta_default_cmdsn_depth( + struct iscsi_portal_group *tpg, + u32 tcq_depth) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if (tcq_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) { + pr_err("Requested Default Queue Depth: %u larger" + " than maximum %u\n", tcq_depth, + TA_DEFAULT_CMDSN_DEPTH_MAX); + return -EINVAL; + } else if (tcq_depth < TA_DEFAULT_CMDSN_DEPTH_MIN) { + pr_err("Requested Default Queue Depth: %u smaller" + " than minimum %u\n", tcq_depth, + TA_DEFAULT_CMDSN_DEPTH_MIN); + return -EINVAL; + } + + a->default_cmdsn_depth = tcq_depth; + pr_debug("iSCSI_TPG[%hu] - Set Default CmdSN TCQ Depth to %u\n", + tpg->tpgt, a->default_cmdsn_depth); + + return 0; +} + +int iscsit_ta_cache_dynamic_acls( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->cache_dynamic_acls = flag; + pr_debug("iSCSI_TPG[%hu] - Cache Dynamic Initiator Portal Group" + " ACLs %s\n", tpg->tpgt, (a->cache_dynamic_acls) ? + "Enabled" : "Disabled"); + + return 0; +} + +int iscsit_ta_demo_mode_write_protect( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->demo_mode_write_protect = flag; + pr_debug("iSCSI_TPG[%hu] - Demo Mode Write Protect bit: %s\n", + tpg->tpgt, (a->demo_mode_write_protect) ? "ON" : "OFF"); + + return 0; +} + +int iscsit_ta_prod_mode_write_protect( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->prod_mode_write_protect = flag; + pr_debug("iSCSI_TPG[%hu] - Production Mode Write Protect bit:" + " %s\n", tpg->tpgt, (a->prod_mode_write_protect) ? + "ON" : "OFF"); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h new file mode 100644 index 000000000000..dda48c141a8c --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -0,0 +1,41 @@ +#ifndef ISCSI_TARGET_TPG_H +#define ISCSI_TARGET_TPG_H + +extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *, u16); +extern int iscsit_load_discovery_tpg(void); +extern void iscsit_release_discovery_tpg(void); +extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *, + struct iscsi_np *); +extern int iscsit_get_tpg(struct iscsi_portal_group *); +extern void iscsit_put_tpg(struct iscsi_portal_group *); +extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *); +extern void iscsit_tpg_dump_params(struct iscsi_portal_group *); +extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *); +extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *, + int); +extern int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *); +extern int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *, int); +extern struct iscsi_node_acl *iscsit_tpg_add_initiator_node_acl( + struct iscsi_portal_group *, const char *, u32); +extern void iscsit_tpg_del_initiator_node_acl(struct iscsi_portal_group *, + struct se_node_acl *); +extern struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(struct iscsi_session *); +extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *); +extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int); +extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *, + struct __kernel_sockaddr_storage *, char *, struct iscsi_tpg_np *, + int); +extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *, + struct iscsi_tpg_np *); +extern int iscsit_tpg_set_initiator_node_queue_depth(struct iscsi_portal_group *, + unsigned char *, u32, int); +extern int iscsit_ta_authentication(struct iscsi_portal_group *, u32); +extern int iscsit_ta_login_timeout(struct iscsi_portal_group *, u32); +extern int iscsit_ta_netif_timeout(struct iscsi_portal_group *, u32); +extern int iscsit_ta_generate_node_acls(struct iscsi_portal_group *, u32); +extern int iscsit_ta_default_cmdsn_depth(struct iscsi_portal_group *, u32); +extern int iscsit_ta_cache_dynamic_acls(struct iscsi_portal_group *, u32); +extern int iscsit_ta_demo_mode_write_protect(struct iscsi_portal_group *, u32); +extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32); + +#endif /* ISCSI_TARGET_TPG_H */ diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c new file mode 100644 index 000000000000..0baac5bcebd4 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tq.c @@ -0,0 +1,551 @@ +/******************************************************************************* + * This file contains the iSCSI Login Thread and Thread Queue functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/kthread.h> +#include <linux/list.h> +#include <linux/bitmap.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_tq.h" +#include "iscsi_target.h" + +static LIST_HEAD(active_ts_list); +static LIST_HEAD(inactive_ts_list); +static DEFINE_SPINLOCK(active_ts_lock); +static DEFINE_SPINLOCK(inactive_ts_lock); +static DEFINE_SPINLOCK(ts_bitmap_lock); + +static void iscsi_add_ts_to_active_list(struct iscsi_thread_set *ts) +{ + spin_lock(&active_ts_lock); + list_add_tail(&ts->ts_list, &active_ts_list); + iscsit_global->active_ts++; + spin_unlock(&active_ts_lock); +} + +extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *ts) +{ + spin_lock(&inactive_ts_lock); + list_add_tail(&ts->ts_list, &inactive_ts_list); + iscsit_global->inactive_ts++; + spin_unlock(&inactive_ts_lock); +} + +static void iscsi_del_ts_from_active_list(struct iscsi_thread_set *ts) +{ + spin_lock(&active_ts_lock); + list_del(&ts->ts_list); + iscsit_global->active_ts--; + spin_unlock(&active_ts_lock); +} + +static struct iscsi_thread_set *iscsi_get_ts_from_inactive_list(void) +{ + struct iscsi_thread_set *ts; + + spin_lock(&inactive_ts_lock); + if (list_empty(&inactive_ts_list)) { + spin_unlock(&inactive_ts_lock); + return NULL; + } + + list_for_each_entry(ts, &inactive_ts_list, ts_list) + break; + + list_del(&ts->ts_list); + iscsit_global->inactive_ts--; + spin_unlock(&inactive_ts_lock); + + return ts; +} + +extern int iscsi_allocate_thread_sets(u32 thread_pair_count) +{ + int allocated_thread_pair_count = 0, i, thread_id; + struct iscsi_thread_set *ts = NULL; + + for (i = 0; i < thread_pair_count; i++) { + ts = kzalloc(sizeof(struct iscsi_thread_set), GFP_KERNEL); + if (!ts) { + pr_err("Unable to allocate memory for" + " thread set.\n"); + return allocated_thread_pair_count; + } + /* + * Locate the next available regision in the thread_set_bitmap + */ + spin_lock(&ts_bitmap_lock); + thread_id = bitmap_find_free_region(iscsit_global->ts_bitmap, + iscsit_global->ts_bitmap_count, get_order(1)); + spin_unlock(&ts_bitmap_lock); + if (thread_id < 0) { + pr_err("bitmap_find_free_region() failed for" + " thread_set_bitmap\n"); + kfree(ts); + return allocated_thread_pair_count; + } + + ts->thread_id = thread_id; + ts->status = ISCSI_THREAD_SET_FREE; + INIT_LIST_HEAD(&ts->ts_list); + spin_lock_init(&ts->ts_state_lock); + init_completion(&ts->rx_post_start_comp); + init_completion(&ts->tx_post_start_comp); + init_completion(&ts->rx_restart_comp); + init_completion(&ts->tx_restart_comp); + init_completion(&ts->rx_start_comp); + init_completion(&ts->tx_start_comp); + + ts->create_threads = 1; + ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s", + ISCSI_TX_THREAD_NAME); + if (IS_ERR(ts->tx_thread)) { + dump_stack(); + pr_err("Unable to start iscsi_target_tx_thread\n"); + break; + } + + ts->rx_thread = kthread_run(iscsi_target_rx_thread, ts, "%s", + ISCSI_RX_THREAD_NAME); + if (IS_ERR(ts->rx_thread)) { + kthread_stop(ts->tx_thread); + pr_err("Unable to start iscsi_target_rx_thread\n"); + break; + } + ts->create_threads = 0; + + iscsi_add_ts_to_inactive_list(ts); + allocated_thread_pair_count++; + } + + pr_debug("Spawned %d thread set(s) (%d total threads).\n", + allocated_thread_pair_count, allocated_thread_pair_count * 2); + return allocated_thread_pair_count; +} + +extern void iscsi_deallocate_thread_sets(void) +{ + u32 released_count = 0; + struct iscsi_thread_set *ts = NULL; + + while ((ts = iscsi_get_ts_from_inactive_list())) { + + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_DIE; + spin_unlock_bh(&ts->ts_state_lock); + + if (ts->rx_thread) { + send_sig(SIGINT, ts->rx_thread, 1); + kthread_stop(ts->rx_thread); + } + if (ts->tx_thread) { + send_sig(SIGINT, ts->tx_thread, 1); + kthread_stop(ts->tx_thread); + } + /* + * Release this thread_id in the thread_set_bitmap + */ + spin_lock(&ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, + ts->thread_id, get_order(1)); + spin_unlock(&ts_bitmap_lock); + + released_count++; + kfree(ts); + } + + if (released_count) + pr_debug("Stopped %d thread set(s) (%d total threads)." + "\n", released_count, released_count * 2); +} + +static void iscsi_deallocate_extra_thread_sets(void) +{ + u32 orig_count, released_count = 0; + struct iscsi_thread_set *ts = NULL; + + orig_count = TARGET_THREAD_SET_COUNT; + + while ((iscsit_global->inactive_ts + 1) > orig_count) { + ts = iscsi_get_ts_from_inactive_list(); + if (!ts) + break; + + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_DIE; + spin_unlock_bh(&ts->ts_state_lock); + + if (ts->rx_thread) { + send_sig(SIGINT, ts->rx_thread, 1); + kthread_stop(ts->rx_thread); + } + if (ts->tx_thread) { + send_sig(SIGINT, ts->tx_thread, 1); + kthread_stop(ts->tx_thread); + } + /* + * Release this thread_id in the thread_set_bitmap + */ + spin_lock(&ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, + ts->thread_id, get_order(1)); + spin_unlock(&ts_bitmap_lock); + + released_count++; + kfree(ts); + } + + if (released_count) { + pr_debug("Stopped %d thread set(s) (%d total threads)." + "\n", released_count, released_count * 2); + } +} + +void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts) +{ + iscsi_add_ts_to_active_list(ts); + + spin_lock_bh(&ts->ts_state_lock); + conn->thread_set = ts; + ts->conn = conn; + spin_unlock_bh(&ts->ts_state_lock); + /* + * Start up the RX thread and wait on rx_post_start_comp. The RX + * Thread will then do the same for the TX Thread in + * iscsi_rx_thread_pre_handler(). + */ + complete(&ts->rx_start_comp); + wait_for_completion(&ts->rx_post_start_comp); +} + +struct iscsi_thread_set *iscsi_get_thread_set(void) +{ + int allocate_ts = 0; + struct completion comp; + struct iscsi_thread_set *ts = NULL; + /* + * If no inactive thread set is available on the first call to + * iscsi_get_ts_from_inactive_list(), sleep for a second and + * try again. If still none are available after two attempts, + * allocate a set ourselves. + */ +get_set: + ts = iscsi_get_ts_from_inactive_list(); + if (!ts) { + if (allocate_ts == 2) + iscsi_allocate_thread_sets(1); + + init_completion(&comp); + wait_for_completion_timeout(&comp, 1 * HZ); + + allocate_ts++; + goto get_set; + } + + ts->delay_inactive = 1; + ts->signal_sent = 0; + ts->thread_count = 2; + init_completion(&ts->rx_restart_comp); + init_completion(&ts->tx_restart_comp); + + return ts; +} + +void iscsi_set_thread_clear(struct iscsi_conn *conn, u8 thread_clear) +{ + struct iscsi_thread_set *ts = NULL; + + if (!conn->thread_set) { + pr_err("struct iscsi_conn->thread_set is NULL\n"); + return; + } + ts = conn->thread_set; + + spin_lock_bh(&ts->ts_state_lock); + ts->thread_clear &= ~thread_clear; + + if ((thread_clear & ISCSI_CLEAR_RX_THREAD) && + (ts->blocked_threads & ISCSI_BLOCK_RX_THREAD)) + complete(&ts->rx_restart_comp); + else if ((thread_clear & ISCSI_CLEAR_TX_THREAD) && + (ts->blocked_threads & ISCSI_BLOCK_TX_THREAD)) + complete(&ts->tx_restart_comp); + spin_unlock_bh(&ts->ts_state_lock); +} + +void iscsi_set_thread_set_signal(struct iscsi_conn *conn, u8 signal_sent) +{ + struct iscsi_thread_set *ts = NULL; + + if (!conn->thread_set) { + pr_err("struct iscsi_conn->thread_set is NULL\n"); + return; + } + ts = conn->thread_set; + + spin_lock_bh(&ts->ts_state_lock); + ts->signal_sent |= signal_sent; + spin_unlock_bh(&ts->ts_state_lock); +} + +int iscsi_release_thread_set(struct iscsi_conn *conn) +{ + int thread_called = 0; + struct iscsi_thread_set *ts = NULL; + + if (!conn || !conn->thread_set) { + pr_err("connection or thread set pointer is NULL\n"); + BUG(); + } + ts = conn->thread_set; + + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_RESET; + + if (!strncmp(current->comm, ISCSI_RX_THREAD_NAME, + strlen(ISCSI_RX_THREAD_NAME))) + thread_called = ISCSI_RX_THREAD; + else if (!strncmp(current->comm, ISCSI_TX_THREAD_NAME, + strlen(ISCSI_TX_THREAD_NAME))) + thread_called = ISCSI_TX_THREAD; + + if (ts->rx_thread && (thread_called == ISCSI_TX_THREAD) && + (ts->thread_clear & ISCSI_CLEAR_RX_THREAD)) { + + if (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD)) { + send_sig(SIGINT, ts->rx_thread, 1); + ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD; + } + ts->blocked_threads |= ISCSI_BLOCK_RX_THREAD; + spin_unlock_bh(&ts->ts_state_lock); + wait_for_completion(&ts->rx_restart_comp); + spin_lock_bh(&ts->ts_state_lock); + ts->blocked_threads &= ~ISCSI_BLOCK_RX_THREAD; + } + if (ts->tx_thread && (thread_called == ISCSI_RX_THREAD) && + (ts->thread_clear & ISCSI_CLEAR_TX_THREAD)) { + + if (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD)) { + send_sig(SIGINT, ts->tx_thread, 1); + ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD; + } + ts->blocked_threads |= ISCSI_BLOCK_TX_THREAD; + spin_unlock_bh(&ts->ts_state_lock); + wait_for_completion(&ts->tx_restart_comp); + spin_lock_bh(&ts->ts_state_lock); + ts->blocked_threads &= ~ISCSI_BLOCK_TX_THREAD; + } + + ts->conn = NULL; + ts->status = ISCSI_THREAD_SET_FREE; + spin_unlock_bh(&ts->ts_state_lock); + + return 0; +} + +int iscsi_thread_set_force_reinstatement(struct iscsi_conn *conn) +{ + struct iscsi_thread_set *ts; + + if (!conn->thread_set) + return -1; + ts = conn->thread_set; + + spin_lock_bh(&ts->ts_state_lock); + if (ts->status != ISCSI_THREAD_SET_ACTIVE) { + spin_unlock_bh(&ts->ts_state_lock); + return -1; + } + + if (ts->tx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD))) { + send_sig(SIGINT, ts->tx_thread, 1); + ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD; + } + if (ts->rx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD))) { + send_sig(SIGINT, ts->rx_thread, 1); + ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD; + } + spin_unlock_bh(&ts->ts_state_lock); + + return 0; +} + +static void iscsi_check_to_add_additional_sets(void) +{ + int thread_sets_add; + + spin_lock(&inactive_ts_lock); + thread_sets_add = iscsit_global->inactive_ts; + spin_unlock(&inactive_ts_lock); + if (thread_sets_add == 1) + iscsi_allocate_thread_sets(1); +} + +static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts) +{ + spin_lock_bh(&ts->ts_state_lock); + if ((ts->status == ISCSI_THREAD_SET_DIE) || signal_pending(current)) { + spin_unlock_bh(&ts->ts_state_lock); + return -1; + } + spin_unlock_bh(&ts->ts_state_lock); + + return 0; +} + +struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts) +{ + int ret; + + spin_lock_bh(&ts->ts_state_lock); + if (ts->create_threads) { + spin_unlock_bh(&ts->ts_state_lock); + goto sleep; + } + + flush_signals(current); + + if (ts->delay_inactive && (--ts->thread_count == 0)) { + spin_unlock_bh(&ts->ts_state_lock); + iscsi_del_ts_from_active_list(ts); + + if (!iscsit_global->in_shutdown) + iscsi_deallocate_extra_thread_sets(); + + iscsi_add_ts_to_inactive_list(ts); + spin_lock_bh(&ts->ts_state_lock); + } + + if ((ts->status == ISCSI_THREAD_SET_RESET) && + (ts->thread_clear & ISCSI_CLEAR_RX_THREAD)) + complete(&ts->rx_restart_comp); + + ts->thread_clear &= ~ISCSI_CLEAR_RX_THREAD; + spin_unlock_bh(&ts->ts_state_lock); +sleep: + ret = wait_for_completion_interruptible(&ts->rx_start_comp); + if (ret != 0) + return NULL; + + if (iscsi_signal_thread_pre_handler(ts) < 0) + return NULL; + + if (!ts->conn) { + pr_err("struct iscsi_thread_set->conn is NULL for" + " thread_id: %d, going back to sleep\n", ts->thread_id); + goto sleep; + } + iscsi_check_to_add_additional_sets(); + /* + * The RX Thread starts up the TX Thread and sleeps. + */ + ts->thread_clear |= ISCSI_CLEAR_RX_THREAD; + complete(&ts->tx_start_comp); + wait_for_completion(&ts->tx_post_start_comp); + + return ts->conn; +} + +struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts) +{ + int ret; + + spin_lock_bh(&ts->ts_state_lock); + if (ts->create_threads) { + spin_unlock_bh(&ts->ts_state_lock); + goto sleep; + } + + flush_signals(current); + + if (ts->delay_inactive && (--ts->thread_count == 0)) { + spin_unlock_bh(&ts->ts_state_lock); + iscsi_del_ts_from_active_list(ts); + + if (!iscsit_global->in_shutdown) + iscsi_deallocate_extra_thread_sets(); + + iscsi_add_ts_to_inactive_list(ts); + spin_lock_bh(&ts->ts_state_lock); + } + if ((ts->status == ISCSI_THREAD_SET_RESET) && + (ts->thread_clear & ISCSI_CLEAR_TX_THREAD)) + complete(&ts->tx_restart_comp); + + ts->thread_clear &= ~ISCSI_CLEAR_TX_THREAD; + spin_unlock_bh(&ts->ts_state_lock); +sleep: + ret = wait_for_completion_interruptible(&ts->tx_start_comp); + if (ret != 0) + return NULL; + + if (iscsi_signal_thread_pre_handler(ts) < 0) + return NULL; + + if (!ts->conn) { + pr_err("struct iscsi_thread_set->conn is NULL for " + " thread_id: %d, going back to sleep\n", + ts->thread_id); + goto sleep; + } + + iscsi_check_to_add_additional_sets(); + /* + * From the TX thread, up the tx_post_start_comp that the RX Thread is + * sleeping on in iscsi_rx_thread_pre_handler(), then up the + * rx_post_start_comp that iscsi_activate_thread_set() is sleeping on. + */ + ts->thread_clear |= ISCSI_CLEAR_TX_THREAD; + complete(&ts->tx_post_start_comp); + complete(&ts->rx_post_start_comp); + + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_ACTIVE; + spin_unlock_bh(&ts->ts_state_lock); + + return ts->conn; +} + +int iscsi_thread_set_init(void) +{ + int size; + + iscsit_global->ts_bitmap_count = ISCSI_TS_BITMAP_BITS; + + size = BITS_TO_LONGS(iscsit_global->ts_bitmap_count) * sizeof(long); + iscsit_global->ts_bitmap = kzalloc(size, GFP_KERNEL); + if (!iscsit_global->ts_bitmap) { + pr_err("Unable to allocate iscsit_global->ts_bitmap\n"); + return -ENOMEM; + } + + spin_lock_init(&active_ts_lock); + spin_lock_init(&inactive_ts_lock); + spin_lock_init(&ts_bitmap_lock); + INIT_LIST_HEAD(&active_ts_list); + INIT_LIST_HEAD(&inactive_ts_list); + + return 0; +} + +void iscsi_thread_set_free(void) +{ + kfree(iscsit_global->ts_bitmap); +} diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h new file mode 100644 index 000000000000..26e6a95ec203 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tq.h @@ -0,0 +1,88 @@ +#ifndef ISCSI_THREAD_QUEUE_H +#define ISCSI_THREAD_QUEUE_H + +/* + * Defines for thread sets. + */ +extern int iscsi_thread_set_force_reinstatement(struct iscsi_conn *); +extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *); +extern int iscsi_allocate_thread_sets(u32); +extern void iscsi_deallocate_thread_sets(void); +extern void iscsi_activate_thread_set(struct iscsi_conn *, struct iscsi_thread_set *); +extern struct iscsi_thread_set *iscsi_get_thread_set(void); +extern void iscsi_set_thread_clear(struct iscsi_conn *, u8); +extern void iscsi_set_thread_set_signal(struct iscsi_conn *, u8); +extern int iscsi_release_thread_set(struct iscsi_conn *); +extern struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *); +extern struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *); +extern int iscsi_thread_set_init(void); +extern void iscsi_thread_set_free(void); + +extern int iscsi_target_tx_thread(void *); +extern int iscsi_target_rx_thread(void *); + +#define TARGET_THREAD_SET_COUNT 4 + +#define ISCSI_RX_THREAD 1 +#define ISCSI_TX_THREAD 2 +#define ISCSI_RX_THREAD_NAME "iscsi_trx" +#define ISCSI_TX_THREAD_NAME "iscsi_ttx" +#define ISCSI_BLOCK_RX_THREAD 0x1 +#define ISCSI_BLOCK_TX_THREAD 0x2 +#define ISCSI_CLEAR_RX_THREAD 0x1 +#define ISCSI_CLEAR_TX_THREAD 0x2 +#define ISCSI_SIGNAL_RX_THREAD 0x1 +#define ISCSI_SIGNAL_TX_THREAD 0x2 + +/* struct iscsi_thread_set->status */ +#define ISCSI_THREAD_SET_FREE 1 +#define ISCSI_THREAD_SET_ACTIVE 2 +#define ISCSI_THREAD_SET_DIE 3 +#define ISCSI_THREAD_SET_RESET 4 +#define ISCSI_THREAD_SET_DEALLOCATE_THREADS 5 + +/* By default allow a maximum of 32K iSCSI connections */ +#define ISCSI_TS_BITMAP_BITS 32768 + +struct iscsi_thread_set { + /* flags used for blocking and restarting sets */ + int blocked_threads; + /* flag for creating threads */ + int create_threads; + /* flag for delaying readding to inactive list */ + int delay_inactive; + /* status for thread set */ + int status; + /* which threads have had signals sent */ + int signal_sent; + /* flag for which threads exited first */ + int thread_clear; + /* Active threads in the thread set */ + int thread_count; + /* Unique thread ID */ + u32 thread_id; + /* pointer to connection if set is active */ + struct iscsi_conn *conn; + /* used for controlling ts state accesses */ + spinlock_t ts_state_lock; + /* Used for rx side post startup */ + struct completion rx_post_start_comp; + /* Used for tx side post startup */ + struct completion tx_post_start_comp; + /* used for restarting thread queue */ + struct completion rx_restart_comp; + /* used for restarting thread queue */ + struct completion tx_restart_comp; + /* used for normal unused blocking */ + struct completion rx_start_comp; + /* used for normal unused blocking */ + struct completion tx_start_comp; + /* OS descriptor for rx thread */ + struct task_struct *rx_thread; + /* OS descriptor for tx thread */ + struct task_struct *tx_thread; + /* struct iscsi_thread_set in list list head*/ + struct list_head ts_list; +}; + +#endif /*** ISCSI_THREAD_QUEUE_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c new file mode 100644 index 000000000000..a1acb0167902 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -0,0 +1,1819 @@ +/******************************************************************************* + * This file contains the iSCSI Target specific utility functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/list.h> +#include <scsi/scsi_tcq.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> +#include <target/target_core_tmr.h> +#include <target/target_core_fabric_ops.h> +#include <target/target_core_configfs.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_tq.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" + +#define PRINT_BUFF(buff, len) \ +{ \ + int zzz; \ + \ + pr_debug("%d:\n", __LINE__); \ + for (zzz = 0; zzz < len; zzz++) { \ + if (zzz % 16 == 0) { \ + if (zzz) \ + pr_debug("\n"); \ + pr_debug("%4i: ", zzz); \ + } \ + pr_debug("%02x ", (unsigned char) (buff)[zzz]); \ + } \ + if ((len + 1) % 16) \ + pr_debug("\n"); \ +} + +extern struct list_head g_tiqn_list; +extern spinlock_t tiqn_lock; + +/* + * Called with cmd->r2t_lock held. + */ +int iscsit_add_r2t_to_list( + struct iscsi_cmd *cmd, + u32 offset, + u32 xfer_len, + int recovery, + u32 r2t_sn) +{ + struct iscsi_r2t *r2t; + + r2t = kmem_cache_zalloc(lio_r2t_cache, GFP_ATOMIC); + if (!r2t) { + pr_err("Unable to allocate memory for struct iscsi_r2t.\n"); + return -1; + } + INIT_LIST_HEAD(&r2t->r2t_list); + + r2t->recovery_r2t = recovery; + r2t->r2t_sn = (!r2t_sn) ? cmd->r2t_sn++ : r2t_sn; + r2t->offset = offset; + r2t->xfer_len = xfer_len; + list_add_tail(&r2t->r2t_list, &cmd->cmd_r2t_list); + spin_unlock_bh(&cmd->r2t_lock); + + iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T); + + spin_lock_bh(&cmd->r2t_lock); + return 0; +} + +struct iscsi_r2t *iscsit_get_r2t_for_eos( + struct iscsi_cmd *cmd, + u32 offset, + u32 length) +{ + struct iscsi_r2t *r2t; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + if ((r2t->offset <= offset) && + (r2t->offset + r2t->xfer_len) >= (offset + length)) { + spin_unlock_bh(&cmd->r2t_lock); + return r2t; + } + } + spin_unlock_bh(&cmd->r2t_lock); + + pr_err("Unable to locate R2T for Offset: %u, Length:" + " %u\n", offset, length); + return NULL; +} + +struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *cmd) +{ + struct iscsi_r2t *r2t; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + if (!r2t->sent_r2t) { + spin_unlock_bh(&cmd->r2t_lock); + return r2t; + } + } + spin_unlock_bh(&cmd->r2t_lock); + + pr_err("Unable to locate next R2T to send for ITT:" + " 0x%08x.\n", cmd->init_task_tag); + return NULL; +} + +/* + * Called with cmd->r2t_lock held. + */ +void iscsit_free_r2t(struct iscsi_r2t *r2t, struct iscsi_cmd *cmd) +{ + list_del(&r2t->r2t_list); + kmem_cache_free(lio_r2t_cache, r2t); +} + +void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd) +{ + struct iscsi_r2t *r2t, *r2t_tmp; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list) + iscsit_free_r2t(r2t, cmd); + spin_unlock_bh(&cmd->r2t_lock); +} + +/* + * May be called from software interrupt (timer) context for allocating + * iSCSI NopINs. + */ +struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) +{ + struct iscsi_cmd *cmd; + + cmd = kmem_cache_zalloc(lio_cmd_cache, gfp_mask); + if (!cmd) { + pr_err("Unable to allocate memory for struct iscsi_cmd.\n"); + return NULL; + } + + cmd->conn = conn; + INIT_LIST_HEAD(&cmd->i_list); + INIT_LIST_HEAD(&cmd->datain_list); + INIT_LIST_HEAD(&cmd->cmd_r2t_list); + init_completion(&cmd->reject_comp); + spin_lock_init(&cmd->datain_lock); + spin_lock_init(&cmd->dataout_timeout_lock); + spin_lock_init(&cmd->istate_lock); + spin_lock_init(&cmd->error_lock); + spin_lock_init(&cmd->r2t_lock); + + return cmd; +} + +/* + * Called from iscsi_handle_scsi_cmd() + */ +struct iscsi_cmd *iscsit_allocate_se_cmd( + struct iscsi_conn *conn, + u32 data_length, + int data_direction, + int iscsi_task_attr) +{ + struct iscsi_cmd *cmd; + struct se_cmd *se_cmd; + int sam_task_attr; + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->data_direction = data_direction; + cmd->data_length = data_length; + /* + * Figure out the SAM Task Attribute for the incoming SCSI CDB + */ + if ((iscsi_task_attr == ISCSI_ATTR_UNTAGGED) || + (iscsi_task_attr == ISCSI_ATTR_SIMPLE)) + sam_task_attr = MSG_SIMPLE_TAG; + else if (iscsi_task_attr == ISCSI_ATTR_ORDERED) + sam_task_attr = MSG_ORDERED_TAG; + else if (iscsi_task_attr == ISCSI_ATTR_HEAD_OF_QUEUE) + sam_task_attr = MSG_HEAD_TAG; + else if (iscsi_task_attr == ISCSI_ATTR_ACA) + sam_task_attr = MSG_ACA_TAG; + else { + pr_debug("Unknown iSCSI Task Attribute: 0x%02x, using" + " MSG_SIMPLE_TAG\n", iscsi_task_attr); + sam_task_attr = MSG_SIMPLE_TAG; + } + + se_cmd = &cmd->se_cmd; + /* + * Initialize struct se_cmd descriptor from target_core_mod infrastructure + */ + transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops, + conn->sess->se_sess, data_length, data_direction, + sam_task_attr, &cmd->sense_buffer[0]); + return cmd; +} + +struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr( + struct iscsi_conn *conn, + u8 function) +{ + struct iscsi_cmd *cmd; + struct se_cmd *se_cmd; + u8 tcm_function; + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->data_direction = DMA_NONE; + + cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL); + if (!cmd->tmr_req) { + pr_err("Unable to allocate memory for" + " Task Management command!\n"); + return NULL; + } + /* + * TASK_REASSIGN for ERL=2 / connection stays inside of + * LIO-Target $FABRIC_MOD + */ + if (function == ISCSI_TM_FUNC_TASK_REASSIGN) + return cmd; + + se_cmd = &cmd->se_cmd; + /* + * Initialize struct se_cmd descriptor from target_core_mod infrastructure + */ + transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops, + conn->sess->se_sess, 0, DMA_NONE, + MSG_SIMPLE_TAG, &cmd->sense_buffer[0]); + + switch (function) { + case ISCSI_TM_FUNC_ABORT_TASK: + tcm_function = TMR_ABORT_TASK; + break; + case ISCSI_TM_FUNC_ABORT_TASK_SET: + tcm_function = TMR_ABORT_TASK_SET; + break; + case ISCSI_TM_FUNC_CLEAR_ACA: + tcm_function = TMR_CLEAR_ACA; + break; + case ISCSI_TM_FUNC_CLEAR_TASK_SET: + tcm_function = TMR_CLEAR_TASK_SET; + break; + case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: + tcm_function = TMR_LUN_RESET; + break; + case ISCSI_TM_FUNC_TARGET_WARM_RESET: + tcm_function = TMR_TARGET_WARM_RESET; + break; + case ISCSI_TM_FUNC_TARGET_COLD_RESET: + tcm_function = TMR_TARGET_COLD_RESET; + break; + default: + pr_err("Unknown iSCSI TMR Function:" + " 0x%02x\n", function); + goto out; + } + + se_cmd->se_tmr_req = core_tmr_alloc_req(se_cmd, + (void *)cmd->tmr_req, tcm_function); + if (!se_cmd->se_tmr_req) + goto out; + + cmd->tmr_req->se_tmr_req = se_cmd->se_tmr_req; + + return cmd; +out: + iscsit_release_cmd(cmd); + if (se_cmd) + transport_free_se_cmd(se_cmd); + return NULL; +} + +int iscsit_decide_list_to_build( + struct iscsi_cmd *cmd, + u32 immediate_data_length) +{ + struct iscsi_build_list bl; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na; + + if (sess->sess_ops->DataSequenceInOrder && + sess->sess_ops->DataPDUInOrder) + return 0; + + if (cmd->data_direction == DMA_NONE) + return 0; + + na = iscsit_tpg_get_node_attrib(sess); + memset(&bl, 0, sizeof(struct iscsi_build_list)); + + if (cmd->data_direction == DMA_FROM_DEVICE) { + bl.data_direction = ISCSI_PDU_READ; + bl.type = PDULIST_NORMAL; + if (na->random_datain_pdu_offsets) + bl.randomize |= RANDOM_DATAIN_PDU_OFFSETS; + if (na->random_datain_seq_offsets) + bl.randomize |= RANDOM_DATAIN_SEQ_OFFSETS; + } else { + bl.data_direction = ISCSI_PDU_WRITE; + bl.immediate_data_length = immediate_data_length; + if (na->random_r2t_offsets) + bl.randomize |= RANDOM_R2T_OFFSETS; + + if (!cmd->immediate_data && !cmd->unsolicited_data) + bl.type = PDULIST_NORMAL; + else if (cmd->immediate_data && !cmd->unsolicited_data) + bl.type = PDULIST_IMMEDIATE; + else if (!cmd->immediate_data && cmd->unsolicited_data) + bl.type = PDULIST_UNSOLICITED; + else if (cmd->immediate_data && cmd->unsolicited_data) + bl.type = PDULIST_IMMEDIATE_AND_UNSOLICITED; + } + + return iscsit_do_build_list(cmd, &bl); +} + +struct iscsi_seq *iscsit_get_seq_holder_for_datain( + struct iscsi_cmd *cmd, + u32 seq_send_order) +{ + u32 i; + + for (i = 0; i < cmd->seq_count; i++) + if (cmd->seq_list[i].seq_send_order == seq_send_order) + return &cmd->seq_list[i]; + + return NULL; +} + +struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *cmd) +{ + u32 i; + + if (!cmd->seq_list) { + pr_err("struct iscsi_cmd->seq_list is NULL!\n"); + return NULL; + } + + for (i = 0; i < cmd->seq_count; i++) { + if (cmd->seq_list[i].type != SEQTYPE_NORMAL) + continue; + if (cmd->seq_list[i].seq_send_order == cmd->seq_send_order) { + cmd->seq_send_order++; + return &cmd->seq_list[i]; + } + } + + return NULL; +} + +struct iscsi_r2t *iscsit_get_holder_for_r2tsn( + struct iscsi_cmd *cmd, + u32 r2t_sn) +{ + struct iscsi_r2t *r2t; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + if (r2t->r2t_sn == r2t_sn) { + spin_unlock_bh(&cmd->r2t_lock); + return r2t; + } + } + spin_unlock_bh(&cmd->r2t_lock); + + return NULL; +} + +static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cmdsn) +{ + int ret; + + /* + * This is the proper method of checking received CmdSN against + * ExpCmdSN and MaxCmdSN values, as well as accounting for out + * or order CmdSNs due to multiple connection sessions and/or + * CRC failures. + */ + if (iscsi_sna_gt(cmdsn, sess->max_cmd_sn)) { + pr_err("Received CmdSN: 0x%08x is greater than" + " MaxCmdSN: 0x%08x, protocol error.\n", cmdsn, + sess->max_cmd_sn); + ret = CMDSN_ERROR_CANNOT_RECOVER; + + } else if (cmdsn == sess->exp_cmd_sn) { + sess->exp_cmd_sn++; + pr_debug("Received CmdSN matches ExpCmdSN," + " incremented ExpCmdSN to: 0x%08x\n", + sess->exp_cmd_sn); + ret = CMDSN_NORMAL_OPERATION; + + } else if (iscsi_sna_gt(cmdsn, sess->exp_cmd_sn)) { + pr_debug("Received CmdSN: 0x%08x is greater" + " than ExpCmdSN: 0x%08x, not acknowledging.\n", + cmdsn, sess->exp_cmd_sn); + ret = CMDSN_HIGHER_THAN_EXP; + + } else { + pr_err("Received CmdSN: 0x%08x is less than" + " ExpCmdSN: 0x%08x, ignoring.\n", cmdsn, + sess->exp_cmd_sn); + ret = CMDSN_LOWER_THAN_EXP; + } + + return ret; +} + +/* + * Commands may be received out of order if MC/S is in use. + * Ensure they are executed in CmdSN order. + */ +int iscsit_sequence_cmd( + struct iscsi_conn *conn, + struct iscsi_cmd *cmd, + u32 cmdsn) +{ + int ret; + int cmdsn_ret; + + mutex_lock(&conn->sess->cmdsn_mutex); + + cmdsn_ret = iscsit_check_received_cmdsn(conn->sess, cmdsn); + switch (cmdsn_ret) { + case CMDSN_NORMAL_OPERATION: + ret = iscsit_execute_cmd(cmd, 0); + if ((ret >= 0) && !list_empty(&conn->sess->sess_ooo_cmdsn_list)) + iscsit_execute_ooo_cmdsns(conn->sess); + break; + case CMDSN_HIGHER_THAN_EXP: + ret = iscsit_handle_ooo_cmdsn(conn->sess, cmd, cmdsn); + break; + case CMDSN_LOWER_THAN_EXP: + cmd->i_state = ISTATE_REMOVE; + iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); + ret = cmdsn_ret; + break; + default: + ret = cmdsn_ret; + break; + } + mutex_unlock(&conn->sess->cmdsn_mutex); + + return ret; +} + +int iscsit_check_unsolicited_dataout(struct iscsi_cmd *cmd, unsigned char *buf) +{ + struct iscsi_conn *conn = cmd->conn; + struct se_cmd *se_cmd = &cmd->se_cmd; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + if (conn->sess->sess_ops->InitialR2T) { + pr_err("Received unexpected unsolicited data" + " while InitialR2T=Yes, protocol error.\n"); + transport_send_check_condition_and_sense(se_cmd, + TCM_UNEXPECTED_UNSOLICITED_DATA, 0); + return -1; + } + + if ((cmd->first_burst_len + payload_length) > + conn->sess->sess_ops->FirstBurstLength) { + pr_err("Total %u bytes exceeds FirstBurstLength: %u" + " for this Unsolicited DataOut Burst.\n", + (cmd->first_burst_len + payload_length), + conn->sess->sess_ops->FirstBurstLength); + transport_send_check_condition_and_sense(se_cmd, + TCM_INCORRECT_AMOUNT_OF_DATA, 0); + return -1; + } + + if (!(hdr->flags & ISCSI_FLAG_CMD_FINAL)) + return 0; + + if (((cmd->first_burst_len + payload_length) != cmd->data_length) && + ((cmd->first_burst_len + payload_length) != + conn->sess->sess_ops->FirstBurstLength)) { + pr_err("Unsolicited non-immediate data received %u" + " does not equal FirstBurstLength: %u, and does" + " not equal ExpXferLen %u.\n", + (cmd->first_burst_len + payload_length), + conn->sess->sess_ops->FirstBurstLength, cmd->data_length); + transport_send_check_condition_and_sense(se_cmd, + TCM_INCORRECT_AMOUNT_OF_DATA, 0); + return -1; + } + return 0; +} + +struct iscsi_cmd *iscsit_find_cmd_from_itt( + struct iscsi_conn *conn, + u32 init_task_tag) +{ + struct iscsi_cmd *cmd; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->init_task_tag == init_task_tag) { + spin_unlock_bh(&conn->cmd_lock); + return cmd; + } + } + spin_unlock_bh(&conn->cmd_lock); + + pr_err("Unable to locate ITT: 0x%08x on CID: %hu", + init_task_tag, conn->cid); + return NULL; +} + +struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump( + struct iscsi_conn *conn, + u32 init_task_tag, + u32 length) +{ + struct iscsi_cmd *cmd; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->init_task_tag == init_task_tag) { + spin_unlock_bh(&conn->cmd_lock); + return cmd; + } + } + spin_unlock_bh(&conn->cmd_lock); + + pr_err("Unable to locate ITT: 0x%08x on CID: %hu," + " dumping payload\n", init_task_tag, conn->cid); + if (length) + iscsit_dump_data_payload(conn, length, 1); + + return NULL; +} + +struct iscsi_cmd *iscsit_find_cmd_from_ttt( + struct iscsi_conn *conn, + u32 targ_xfer_tag) +{ + struct iscsi_cmd *cmd = NULL; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->targ_xfer_tag == targ_xfer_tag) { + spin_unlock_bh(&conn->cmd_lock); + return cmd; + } + } + spin_unlock_bh(&conn->cmd_lock); + + pr_err("Unable to locate TTT: 0x%08x on CID: %hu\n", + targ_xfer_tag, conn->cid); + return NULL; +} + +int iscsit_find_cmd_for_recovery( + struct iscsi_session *sess, + struct iscsi_cmd **cmd_ptr, + struct iscsi_conn_recovery **cr_ptr, + u32 init_task_tag) +{ + struct iscsi_cmd *cmd = NULL; + struct iscsi_conn_recovery *cr; + /* + * Scan through the inactive connection recovery list's command list. + * If init_task_tag matches the command is still alligent. + */ + spin_lock(&sess->cr_i_lock); + list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) { + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) { + if (cmd->init_task_tag == init_task_tag) { + spin_unlock(&cr->conn_recovery_cmd_lock); + spin_unlock(&sess->cr_i_lock); + + *cr_ptr = cr; + *cmd_ptr = cmd; + return -2; + } + } + spin_unlock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&sess->cr_i_lock); + /* + * Scan through the active connection recovery list's command list. + * If init_task_tag matches the command is ready to be reassigned. + */ + spin_lock(&sess->cr_a_lock); + list_for_each_entry(cr, &sess->cr_active_list, cr_list) { + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) { + if (cmd->init_task_tag == init_task_tag) { + spin_unlock(&cr->conn_recovery_cmd_lock); + spin_unlock(&sess->cr_a_lock); + + *cr_ptr = cr; + *cmd_ptr = cmd; + return 0; + } + } + spin_unlock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&sess->cr_a_lock); + + return -1; +} + +void iscsit_add_cmd_to_immediate_queue( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + u8 state) +{ + struct iscsi_queue_req *qr; + + qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC); + if (!qr) { + pr_err("Unable to allocate memory for" + " struct iscsi_queue_req\n"); + return; + } + INIT_LIST_HEAD(&qr->qr_list); + qr->cmd = cmd; + qr->state = state; + + spin_lock_bh(&conn->immed_queue_lock); + list_add_tail(&qr->qr_list, &conn->immed_queue_list); + atomic_inc(&cmd->immed_queue_count); + atomic_set(&conn->check_immediate_queue, 1); + spin_unlock_bh(&conn->immed_queue_lock); + + wake_up_process(conn->thread_set->tx_thread); +} + +struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr; + + spin_lock_bh(&conn->immed_queue_lock); + if (list_empty(&conn->immed_queue_list)) { + spin_unlock_bh(&conn->immed_queue_lock); + return NULL; + } + list_for_each_entry(qr, &conn->immed_queue_list, qr_list) + break; + + list_del(&qr->qr_list); + if (qr->cmd) + atomic_dec(&qr->cmd->immed_queue_count); + spin_unlock_bh(&conn->immed_queue_lock); + + return qr; +} + +static void iscsit_remove_cmd_from_immediate_queue( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr, *qr_tmp; + + spin_lock_bh(&conn->immed_queue_lock); + if (!atomic_read(&cmd->immed_queue_count)) { + spin_unlock_bh(&conn->immed_queue_lock); + return; + } + + list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) { + if (qr->cmd != cmd) + continue; + + atomic_dec(&qr->cmd->immed_queue_count); + list_del(&qr->qr_list); + kmem_cache_free(lio_qr_cache, qr); + } + spin_unlock_bh(&conn->immed_queue_lock); + + if (atomic_read(&cmd->immed_queue_count)) { + pr_err("ITT: 0x%08x immed_queue_count: %d\n", + cmd->init_task_tag, + atomic_read(&cmd->immed_queue_count)); + } +} + +void iscsit_add_cmd_to_response_queue( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + u8 state) +{ + struct iscsi_queue_req *qr; + + qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC); + if (!qr) { + pr_err("Unable to allocate memory for" + " struct iscsi_queue_req\n"); + return; + } + INIT_LIST_HEAD(&qr->qr_list); + qr->cmd = cmd; + qr->state = state; + + spin_lock_bh(&conn->response_queue_lock); + list_add_tail(&qr->qr_list, &conn->response_queue_list); + atomic_inc(&cmd->response_queue_count); + spin_unlock_bh(&conn->response_queue_lock); + + wake_up_process(conn->thread_set->tx_thread); +} + +struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr; + + spin_lock_bh(&conn->response_queue_lock); + if (list_empty(&conn->response_queue_list)) { + spin_unlock_bh(&conn->response_queue_lock); + return NULL; + } + + list_for_each_entry(qr, &conn->response_queue_list, qr_list) + break; + + list_del(&qr->qr_list); + if (qr->cmd) + atomic_dec(&qr->cmd->response_queue_count); + spin_unlock_bh(&conn->response_queue_lock); + + return qr; +} + +static void iscsit_remove_cmd_from_response_queue( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr, *qr_tmp; + + spin_lock_bh(&conn->response_queue_lock); + if (!atomic_read(&cmd->response_queue_count)) { + spin_unlock_bh(&conn->response_queue_lock); + return; + } + + list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list, + qr_list) { + if (qr->cmd != cmd) + continue; + + atomic_dec(&qr->cmd->response_queue_count); + list_del(&qr->qr_list); + kmem_cache_free(lio_qr_cache, qr); + } + spin_unlock_bh(&conn->response_queue_lock); + + if (atomic_read(&cmd->response_queue_count)) { + pr_err("ITT: 0x%08x response_queue_count: %d\n", + cmd->init_task_tag, + atomic_read(&cmd->response_queue_count)); + } +} + +void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr, *qr_tmp; + + spin_lock_bh(&conn->immed_queue_lock); + list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) { + list_del(&qr->qr_list); + if (qr->cmd) + atomic_dec(&qr->cmd->immed_queue_count); + + kmem_cache_free(lio_qr_cache, qr); + } + spin_unlock_bh(&conn->immed_queue_lock); + + spin_lock_bh(&conn->response_queue_lock); + list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list, + qr_list) { + list_del(&qr->qr_list); + if (qr->cmd) + atomic_dec(&qr->cmd->response_queue_count); + + kmem_cache_free(lio_qr_cache, qr); + } + spin_unlock_bh(&conn->response_queue_lock); +} + +void iscsit_release_cmd(struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn = cmd->conn; + int i; + + iscsit_free_r2ts_from_list(cmd); + iscsit_free_all_datain_reqs(cmd); + + kfree(cmd->buf_ptr); + kfree(cmd->pdu_list); + kfree(cmd->seq_list); + kfree(cmd->tmr_req); + kfree(cmd->iov_data); + + for (i = 0; i < cmd->t_mem_sg_nents; i++) + __free_page(sg_page(&cmd->t_mem_sg[i])); + + kfree(cmd->t_mem_sg); + + if (conn) { + iscsit_remove_cmd_from_immediate_queue(cmd, conn); + iscsit_remove_cmd_from_response_queue(cmd, conn); + } + + kmem_cache_free(lio_cmd_cache, cmd); +} + +int iscsit_check_session_usage_count(struct iscsi_session *sess) +{ + spin_lock_bh(&sess->session_usage_lock); + if (sess->session_usage_count != 0) { + sess->session_waiting_on_uc = 1; + spin_unlock_bh(&sess->session_usage_lock); + if (in_interrupt()) + return 2; + + wait_for_completion(&sess->session_waiting_on_uc_comp); + return 1; + } + spin_unlock_bh(&sess->session_usage_lock); + + return 0; +} + +void iscsit_dec_session_usage_count(struct iscsi_session *sess) +{ + spin_lock_bh(&sess->session_usage_lock); + sess->session_usage_count--; + + if (!sess->session_usage_count && sess->session_waiting_on_uc) + complete(&sess->session_waiting_on_uc_comp); + + spin_unlock_bh(&sess->session_usage_lock); +} + +void iscsit_inc_session_usage_count(struct iscsi_session *sess) +{ + spin_lock_bh(&sess->session_usage_lock); + sess->session_usage_count++; + spin_unlock_bh(&sess->session_usage_lock); +} + +/* + * Used before iscsi_do[rx,tx]_data() to determine iov and [rx,tx]_marker + * array counts needed for sync and steering. + */ +static int iscsit_determine_sync_and_steering_counts( + struct iscsi_conn *conn, + struct iscsi_data_count *count) +{ + u32 length = count->data_length; + u32 marker, markint; + + count->sync_and_steering = 1; + + marker = (count->type == ISCSI_RX_DATA) ? + conn->of_marker : conn->if_marker; + markint = (count->type == ISCSI_RX_DATA) ? + (conn->conn_ops->OFMarkInt * 4) : + (conn->conn_ops->IFMarkInt * 4); + count->ss_iov_count = count->iov_count; + + while (length > 0) { + if (length >= marker) { + count->ss_iov_count += 3; + count->ss_marker_count += 2; + + length -= marker; + marker = markint; + } else + length = 0; + } + + return 0; +} + +/* + * Setup conn->if_marker and conn->of_marker values based upon + * the initial marker-less interval. (see iSCSI v19 A.2) + */ +int iscsit_set_sync_and_steering_values(struct iscsi_conn *conn) +{ + int login_ifmarker_count = 0, login_ofmarker_count = 0, next_marker = 0; + /* + * IFMarkInt and OFMarkInt are negotiated as 32-bit words. + */ + u32 IFMarkInt = (conn->conn_ops->IFMarkInt * 4); + u32 OFMarkInt = (conn->conn_ops->OFMarkInt * 4); + + if (conn->conn_ops->OFMarker) { + /* + * Account for the first Login Command received not + * via iscsi_recv_msg(). + */ + conn->of_marker += ISCSI_HDR_LEN; + if (conn->of_marker <= OFMarkInt) { + conn->of_marker = (OFMarkInt - conn->of_marker); + } else { + login_ofmarker_count = (conn->of_marker / OFMarkInt); + next_marker = (OFMarkInt * (login_ofmarker_count + 1)) + + (login_ofmarker_count * MARKER_SIZE); + conn->of_marker = (next_marker - conn->of_marker); + } + conn->of_marker_offset = 0; + pr_debug("Setting OFMarker value to %u based on Initial" + " Markerless Interval.\n", conn->of_marker); + } + + if (conn->conn_ops->IFMarker) { + if (conn->if_marker <= IFMarkInt) { + conn->if_marker = (IFMarkInt - conn->if_marker); + } else { + login_ifmarker_count = (conn->if_marker / IFMarkInt); + next_marker = (IFMarkInt * (login_ifmarker_count + 1)) + + (login_ifmarker_count * MARKER_SIZE); + conn->if_marker = (next_marker - conn->if_marker); + } + pr_debug("Setting IFMarker value to %u based on Initial" + " Markerless Interval.\n", conn->if_marker); + } + + return 0; +} + +struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *sess, u16 cid) +{ + struct iscsi_conn *conn; + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) { + if ((conn->cid == cid) && + (conn->conn_state == TARG_CONN_STATE_LOGGED_IN)) { + iscsit_inc_conn_usage_count(conn); + spin_unlock_bh(&sess->conn_lock); + return conn; + } + } + spin_unlock_bh(&sess->conn_lock); + + return NULL; +} + +struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *sess, u16 cid) +{ + struct iscsi_conn *conn; + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) { + if (conn->cid == cid) { + iscsit_inc_conn_usage_count(conn); + spin_lock(&conn->state_lock); + atomic_set(&conn->connection_wait_rcfr, 1); + spin_unlock(&conn->state_lock); + spin_unlock_bh(&sess->conn_lock); + return conn; + } + } + spin_unlock_bh(&sess->conn_lock); + + return NULL; +} + +void iscsit_check_conn_usage_count(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->conn_usage_lock); + if (conn->conn_usage_count != 0) { + conn->conn_waiting_on_uc = 1; + spin_unlock_bh(&conn->conn_usage_lock); + + wait_for_completion(&conn->conn_waiting_on_uc_comp); + return; + } + spin_unlock_bh(&conn->conn_usage_lock); +} + +void iscsit_dec_conn_usage_count(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->conn_usage_lock); + conn->conn_usage_count--; + + if (!conn->conn_usage_count && conn->conn_waiting_on_uc) + complete(&conn->conn_waiting_on_uc_comp); + + spin_unlock_bh(&conn->conn_usage_lock); +} + +void iscsit_inc_conn_usage_count(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->conn_usage_lock); + conn->conn_usage_count++; + spin_unlock_bh(&conn->conn_usage_lock); +} + +static int iscsit_add_nopin(struct iscsi_conn *conn, int want_response) +{ + u8 state; + struct iscsi_cmd *cmd; + + cmd = iscsit_allocate_cmd(conn, GFP_ATOMIC); + if (!cmd) + return -1; + + cmd->iscsi_opcode = ISCSI_OP_NOOP_IN; + state = (want_response) ? ISTATE_SEND_NOPIN_WANT_RESPONSE : + ISTATE_SEND_NOPIN_NO_RESPONSE; + cmd->init_task_tag = 0xFFFFFFFF; + spin_lock_bh(&conn->sess->ttt_lock); + cmd->targ_xfer_tag = (want_response) ? conn->sess->targ_xfer_tag++ : + 0xFFFFFFFF; + if (want_response && (cmd->targ_xfer_tag == 0xFFFFFFFF)) + cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++; + spin_unlock_bh(&conn->sess->ttt_lock); + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + if (want_response) + iscsit_start_nopin_response_timer(conn); + iscsit_add_cmd_to_immediate_queue(cmd, conn, state); + + return 0; +} + +static void iscsit_handle_nopin_response_timeout(unsigned long data) +{ + struct iscsi_conn *conn = (struct iscsi_conn *) data; + + iscsit_inc_conn_usage_count(conn); + + spin_lock_bh(&conn->nopin_timer_lock); + if (conn->nopin_response_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&conn->nopin_timer_lock); + iscsit_dec_conn_usage_count(conn); + return; + } + + pr_debug("Did not receive response to NOPIN on CID: %hu on" + " SID: %u, failing connection.\n", conn->cid, + conn->sess->sid); + conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&conn->nopin_timer_lock); + + { + struct iscsi_portal_group *tpg = conn->sess->tpg; + struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; + + if (tiqn) { + spin_lock_bh(&tiqn->sess_err_stats.lock); + strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name, + (void *)conn->sess->sess_ops->InitiatorName); + tiqn->sess_err_stats.last_sess_failure_type = + ISCSI_SESS_ERR_CXN_TIMEOUT; + tiqn->sess_err_stats.cxn_timeout_errors++; + conn->sess->conn_timeout_errors++; + spin_unlock_bh(&tiqn->sess_err_stats.lock); + } + } + + iscsit_cause_connection_reinstatement(conn, 0); + iscsit_dec_conn_usage_count(conn); +} + +void iscsit_mod_nopin_response_timer(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + + spin_lock_bh(&conn->nopin_timer_lock); + if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + + mod_timer(&conn->nopin_response_timer, + (get_jiffies_64() + na->nopin_response_timeout * HZ)); + spin_unlock_bh(&conn->nopin_timer_lock); +} + +/* + * Called with conn->nopin_timer_lock held. + */ +void iscsit_start_nopin_response_timer(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + + spin_lock_bh(&conn->nopin_timer_lock); + if (conn->nopin_response_timer_flags & ISCSI_TF_RUNNING) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + + init_timer(&conn->nopin_response_timer); + conn->nopin_response_timer.expires = + (get_jiffies_64() + na->nopin_response_timeout * HZ); + conn->nopin_response_timer.data = (unsigned long)conn; + conn->nopin_response_timer.function = iscsit_handle_nopin_response_timeout; + conn->nopin_response_timer_flags &= ~ISCSI_TF_STOP; + conn->nopin_response_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&conn->nopin_response_timer); + + pr_debug("Started NOPIN Response Timer on CID: %d to %u" + " seconds\n", conn->cid, na->nopin_response_timeout); + spin_unlock_bh(&conn->nopin_timer_lock); +} + +void iscsit_stop_nopin_response_timer(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->nopin_timer_lock); + if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + conn->nopin_response_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&conn->nopin_timer_lock); + + del_timer_sync(&conn->nopin_response_timer); + + spin_lock_bh(&conn->nopin_timer_lock); + conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&conn->nopin_timer_lock); +} + +static void iscsit_handle_nopin_timeout(unsigned long data) +{ + struct iscsi_conn *conn = (struct iscsi_conn *) data; + + iscsit_inc_conn_usage_count(conn); + + spin_lock_bh(&conn->nopin_timer_lock); + if (conn->nopin_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&conn->nopin_timer_lock); + iscsit_dec_conn_usage_count(conn); + return; + } + conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&conn->nopin_timer_lock); + + iscsit_add_nopin(conn, 1); + iscsit_dec_conn_usage_count(conn); +} + +/* + * Called with conn->nopin_timer_lock held. + */ +void __iscsit_start_nopin_timer(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + /* + * NOPIN timeout is disabled. + */ + if (!na->nopin_timeout) + return; + + if (conn->nopin_timer_flags & ISCSI_TF_RUNNING) + return; + + init_timer(&conn->nopin_timer); + conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ); + conn->nopin_timer.data = (unsigned long)conn; + conn->nopin_timer.function = iscsit_handle_nopin_timeout; + conn->nopin_timer_flags &= ~ISCSI_TF_STOP; + conn->nopin_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&conn->nopin_timer); + + pr_debug("Started NOPIN Timer on CID: %d at %u second" + " interval\n", conn->cid, na->nopin_timeout); +} + +void iscsit_start_nopin_timer(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + /* + * NOPIN timeout is disabled.. + */ + if (!na->nopin_timeout) + return; + + spin_lock_bh(&conn->nopin_timer_lock); + if (conn->nopin_timer_flags & ISCSI_TF_RUNNING) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + + init_timer(&conn->nopin_timer); + conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ); + conn->nopin_timer.data = (unsigned long)conn; + conn->nopin_timer.function = iscsit_handle_nopin_timeout; + conn->nopin_timer_flags &= ~ISCSI_TF_STOP; + conn->nopin_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&conn->nopin_timer); + + pr_debug("Started NOPIN Timer on CID: %d at %u second" + " interval\n", conn->cid, na->nopin_timeout); + spin_unlock_bh(&conn->nopin_timer_lock); +} + +void iscsit_stop_nopin_timer(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->nopin_timer_lock); + if (!(conn->nopin_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + conn->nopin_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&conn->nopin_timer_lock); + + del_timer_sync(&conn->nopin_timer); + + spin_lock_bh(&conn->nopin_timer_lock); + conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&conn->nopin_timer_lock); +} + +int iscsit_send_tx_data( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + int use_misc) +{ + int tx_sent, tx_size; + u32 iov_count; + struct kvec *iov; + +send_data: + tx_size = cmd->tx_size; + + if (!use_misc) { + iov = &cmd->iov_data[0]; + iov_count = cmd->iov_data_count; + } else { + iov = &cmd->iov_misc[0]; + iov_count = cmd->iov_misc_count; + } + + tx_sent = tx_data(conn, &iov[0], iov_count, tx_size); + if (tx_size != tx_sent) { + if (tx_sent == -EAGAIN) { + pr_err("tx_data() returned -EAGAIN\n"); + goto send_data; + } else + return -1; + } + cmd->tx_size = 0; + + return 0; +} + +int iscsit_fe_sendpage_sg( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct scatterlist *sg = cmd->first_data_sg; + struct kvec iov; + u32 tx_hdr_size, data_len; + u32 offset = cmd->first_data_sg_off; + int tx_sent; + +send_hdr: + tx_hdr_size = ISCSI_HDR_LEN; + if (conn->conn_ops->HeaderDigest) + tx_hdr_size += ISCSI_CRC_LEN; + + iov.iov_base = cmd->pdu; + iov.iov_len = tx_hdr_size; + + tx_sent = tx_data(conn, &iov, 1, tx_hdr_size); + if (tx_hdr_size != tx_sent) { + if (tx_sent == -EAGAIN) { + pr_err("tx_data() returned -EAGAIN\n"); + goto send_hdr; + } + return -1; + } + + data_len = cmd->tx_size - tx_hdr_size - cmd->padding; + if (conn->conn_ops->DataDigest) + data_len -= ISCSI_CRC_LEN; + + /* + * Perform sendpage() for each page in the scatterlist + */ + while (data_len) { + u32 space = (sg->length - offset); + u32 sub_len = min_t(u32, data_len, space); +send_pg: + tx_sent = conn->sock->ops->sendpage(conn->sock, + sg_page(sg), sg->offset + offset, sub_len, 0); + if (tx_sent != sub_len) { + if (tx_sent == -EAGAIN) { + pr_err("tcp_sendpage() returned" + " -EAGAIN\n"); + goto send_pg; + } + + pr_err("tcp_sendpage() failure: %d\n", + tx_sent); + return -1; + } + + data_len -= sub_len; + offset = 0; + sg = sg_next(sg); + } + +send_padding: + if (cmd->padding) { + struct kvec *iov_p = + &cmd->iov_data[cmd->iov_data_count-1]; + + tx_sent = tx_data(conn, iov_p, 1, cmd->padding); + if (cmd->padding != tx_sent) { + if (tx_sent == -EAGAIN) { + pr_err("tx_data() returned -EAGAIN\n"); + goto send_padding; + } + return -1; + } + } + +send_datacrc: + if (conn->conn_ops->DataDigest) { + struct kvec *iov_d = + &cmd->iov_data[cmd->iov_data_count]; + + tx_sent = tx_data(conn, iov_d, 1, ISCSI_CRC_LEN); + if (ISCSI_CRC_LEN != tx_sent) { + if (tx_sent == -EAGAIN) { + pr_err("tx_data() returned -EAGAIN\n"); + goto send_datacrc; + } + return -1; + } + } + + return 0; +} + +/* + * This function is used for mainly sending a ISCSI_TARG_LOGIN_RSP PDU + * back to the Initiator when an expection condition occurs with the + * errors set in status_class and status_detail. + * + * Parameters: iSCSI Connection, Status Class, Status Detail. + * Returns: 0 on success, -1 on error. + */ +int iscsit_tx_login_rsp(struct iscsi_conn *conn, u8 status_class, u8 status_detail) +{ + u8 iscsi_hdr[ISCSI_HDR_LEN]; + int err; + struct kvec iov; + struct iscsi_login_rsp *hdr; + + iscsit_collect_login_stats(conn, status_class, status_detail); + + memset(&iov, 0, sizeof(struct kvec)); + memset(&iscsi_hdr, 0x0, ISCSI_HDR_LEN); + + hdr = (struct iscsi_login_rsp *)&iscsi_hdr; + hdr->opcode = ISCSI_OP_LOGIN_RSP; + hdr->status_class = status_class; + hdr->status_detail = status_detail; + hdr->itt = cpu_to_be32(conn->login_itt); + + iov.iov_base = &iscsi_hdr; + iov.iov_len = ISCSI_HDR_LEN; + + PRINT_BUFF(iscsi_hdr, ISCSI_HDR_LEN); + + err = tx_data(conn, &iov, 1, ISCSI_HDR_LEN); + if (err != ISCSI_HDR_LEN) { + pr_err("tx_data returned less than expected\n"); + return -1; + } + + return 0; +} + +void iscsit_print_session_params(struct iscsi_session *sess) +{ + struct iscsi_conn *conn; + + pr_debug("-----------------------------[Session Params for" + " SID: %u]-----------------------------\n", sess->sid); + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) + iscsi_dump_conn_ops(conn->conn_ops); + spin_unlock_bh(&sess->conn_lock); + + iscsi_dump_sess_ops(sess->sess_ops); +} + +static int iscsit_do_rx_data( + struct iscsi_conn *conn, + struct iscsi_data_count *count) +{ + int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len; + u32 rx_marker_val[count->ss_marker_count], rx_marker_iov = 0; + struct kvec iov[count->ss_iov_count], *iov_p; + struct msghdr msg; + + if (!conn || !conn->sock || !conn->conn_ops) + return -1; + + memset(&msg, 0, sizeof(struct msghdr)); + + if (count->sync_and_steering) { + int size = 0; + u32 i, orig_iov_count = 0; + u32 orig_iov_len = 0, orig_iov_loc = 0; + u32 iov_count = 0, per_iov_bytes = 0; + u32 *rx_marker, old_rx_marker = 0; + struct kvec *iov_record; + + memset(&rx_marker_val, 0, + count->ss_marker_count * sizeof(u32)); + memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec)); + + iov_record = count->iov; + orig_iov_count = count->iov_count; + rx_marker = &conn->of_marker; + + i = 0; + size = data; + orig_iov_len = iov_record[orig_iov_loc].iov_len; + while (size > 0) { + pr_debug("rx_data: #1 orig_iov_len %u," + " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc); + pr_debug("rx_data: #2 rx_marker %u, size" + " %u\n", *rx_marker, size); + + if (orig_iov_len >= *rx_marker) { + iov[iov_count].iov_len = *rx_marker; + iov[iov_count++].iov_base = + (iov_record[orig_iov_loc].iov_base + + per_iov_bytes); + + iov[iov_count].iov_len = (MARKER_SIZE / 2); + iov[iov_count++].iov_base = + &rx_marker_val[rx_marker_iov++]; + iov[iov_count].iov_len = (MARKER_SIZE / 2); + iov[iov_count++].iov_base = + &rx_marker_val[rx_marker_iov++]; + old_rx_marker = *rx_marker; + + /* + * OFMarkInt is in 32-bit words. + */ + *rx_marker = (conn->conn_ops->OFMarkInt * 4); + size -= old_rx_marker; + orig_iov_len -= old_rx_marker; + per_iov_bytes += old_rx_marker; + + pr_debug("rx_data: #3 new_rx_marker" + " %u, size %u\n", *rx_marker, size); + } else { + iov[iov_count].iov_len = orig_iov_len; + iov[iov_count++].iov_base = + (iov_record[orig_iov_loc].iov_base + + per_iov_bytes); + + per_iov_bytes = 0; + *rx_marker -= orig_iov_len; + size -= orig_iov_len; + + if (size) + orig_iov_len = + iov_record[++orig_iov_loc].iov_len; + + pr_debug("rx_data: #4 new_rx_marker" + " %u, size %u\n", *rx_marker, size); + } + } + data += (rx_marker_iov * (MARKER_SIZE / 2)); + + iov_p = &iov[0]; + iov_len = iov_count; + + if (iov_count > count->ss_iov_count) { + pr_err("iov_count: %d, count->ss_iov_count:" + " %d\n", iov_count, count->ss_iov_count); + return -1; + } + if (rx_marker_iov > count->ss_marker_count) { + pr_err("rx_marker_iov: %d, count->ss_marker" + "_count: %d\n", rx_marker_iov, + count->ss_marker_count); + return -1; + } + } else { + iov_p = count->iov; + iov_len = count->iov_count; + } + + while (total_rx < data) { + rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len, + (data - total_rx), MSG_WAITALL); + if (rx_loop <= 0) { + pr_debug("rx_loop: %d total_rx: %d\n", + rx_loop, total_rx); + return rx_loop; + } + total_rx += rx_loop; + pr_debug("rx_loop: %d, total_rx: %d, data: %d\n", + rx_loop, total_rx, data); + } + + if (count->sync_and_steering) { + int j; + for (j = 0; j < rx_marker_iov; j++) { + pr_debug("rx_data: #5 j: %d, offset: %d\n", + j, rx_marker_val[j]); + conn->of_marker_offset = rx_marker_val[j]; + } + total_rx -= (rx_marker_iov * (MARKER_SIZE / 2)); + } + + return total_rx; +} + +static int iscsit_do_tx_data( + struct iscsi_conn *conn, + struct iscsi_data_count *count) +{ + int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len; + u32 tx_marker_val[count->ss_marker_count], tx_marker_iov = 0; + struct kvec iov[count->ss_iov_count], *iov_p; + struct msghdr msg; + + if (!conn || !conn->sock || !conn->conn_ops) + return -1; + + if (data <= 0) { + pr_err("Data length is: %d\n", data); + return -1; + } + + memset(&msg, 0, sizeof(struct msghdr)); + + if (count->sync_and_steering) { + int size = 0; + u32 i, orig_iov_count = 0; + u32 orig_iov_len = 0, orig_iov_loc = 0; + u32 iov_count = 0, per_iov_bytes = 0; + u32 *tx_marker, old_tx_marker = 0; + struct kvec *iov_record; + + memset(&tx_marker_val, 0, + count->ss_marker_count * sizeof(u32)); + memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec)); + + iov_record = count->iov; + orig_iov_count = count->iov_count; + tx_marker = &conn->if_marker; + + i = 0; + size = data; + orig_iov_len = iov_record[orig_iov_loc].iov_len; + while (size > 0) { + pr_debug("tx_data: #1 orig_iov_len %u," + " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc); + pr_debug("tx_data: #2 tx_marker %u, size" + " %u\n", *tx_marker, size); + + if (orig_iov_len >= *tx_marker) { + iov[iov_count].iov_len = *tx_marker; + iov[iov_count++].iov_base = + (iov_record[orig_iov_loc].iov_base + + per_iov_bytes); + + tx_marker_val[tx_marker_iov] = + (size - *tx_marker); + iov[iov_count].iov_len = (MARKER_SIZE / 2); + iov[iov_count++].iov_base = + &tx_marker_val[tx_marker_iov++]; + iov[iov_count].iov_len = (MARKER_SIZE / 2); + iov[iov_count++].iov_base = + &tx_marker_val[tx_marker_iov++]; + old_tx_marker = *tx_marker; + + /* + * IFMarkInt is in 32-bit words. + */ + *tx_marker = (conn->conn_ops->IFMarkInt * 4); + size -= old_tx_marker; + orig_iov_len -= old_tx_marker; + per_iov_bytes += old_tx_marker; + + pr_debug("tx_data: #3 new_tx_marker" + " %u, size %u\n", *tx_marker, size); + pr_debug("tx_data: #4 offset %u\n", + tx_marker_val[tx_marker_iov-1]); + } else { + iov[iov_count].iov_len = orig_iov_len; + iov[iov_count++].iov_base + = (iov_record[orig_iov_loc].iov_base + + per_iov_bytes); + + per_iov_bytes = 0; + *tx_marker -= orig_iov_len; + size -= orig_iov_len; + + if (size) + orig_iov_len = + iov_record[++orig_iov_loc].iov_len; + + pr_debug("tx_data: #5 new_tx_marker" + " %u, size %u\n", *tx_marker, size); + } + } + + data += (tx_marker_iov * (MARKER_SIZE / 2)); + + iov_p = &iov[0]; + iov_len = iov_count; + + if (iov_count > count->ss_iov_count) { + pr_err("iov_count: %d, count->ss_iov_count:" + " %d\n", iov_count, count->ss_iov_count); + return -1; + } + if (tx_marker_iov > count->ss_marker_count) { + pr_err("tx_marker_iov: %d, count->ss_marker" + "_count: %d\n", tx_marker_iov, + count->ss_marker_count); + return -1; + } + } else { + iov_p = count->iov; + iov_len = count->iov_count; + } + + while (total_tx < data) { + tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len, + (data - total_tx)); + if (tx_loop <= 0) { + pr_debug("tx_loop: %d total_tx %d\n", + tx_loop, total_tx); + return tx_loop; + } + total_tx += tx_loop; + pr_debug("tx_loop: %d, total_tx: %d, data: %d\n", + tx_loop, total_tx, data); + } + + if (count->sync_and_steering) + total_tx -= (tx_marker_iov * (MARKER_SIZE / 2)); + + return total_tx; +} + +int rx_data( + struct iscsi_conn *conn, + struct kvec *iov, + int iov_count, + int data) +{ + struct iscsi_data_count c; + + if (!conn || !conn->sock || !conn->conn_ops) + return -1; + + memset(&c, 0, sizeof(struct iscsi_data_count)); + c.iov = iov; + c.iov_count = iov_count; + c.data_length = data; + c.type = ISCSI_RX_DATA; + + if (conn->conn_ops->OFMarker && + (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) { + if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0) + return -1; + } + + return iscsit_do_rx_data(conn, &c); +} + +int tx_data( + struct iscsi_conn *conn, + struct kvec *iov, + int iov_count, + int data) +{ + struct iscsi_data_count c; + + if (!conn || !conn->sock || !conn->conn_ops) + return -1; + + memset(&c, 0, sizeof(struct iscsi_data_count)); + c.iov = iov; + c.iov_count = iov_count; + c.data_length = data; + c.type = ISCSI_TX_DATA; + + if (conn->conn_ops->IFMarker && + (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) { + if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0) + return -1; + } + + return iscsit_do_tx_data(conn, &c); +} + +void iscsit_collect_login_stats( + struct iscsi_conn *conn, + u8 status_class, + u8 status_detail) +{ + struct iscsi_param *intrname = NULL; + struct iscsi_tiqn *tiqn; + struct iscsi_login_stats *ls; + + tiqn = iscsit_snmp_get_tiqn(conn); + if (!tiqn) + return; + + ls = &tiqn->login_stats; + + spin_lock(&ls->lock); + if (!strcmp(conn->login_ip, ls->last_intr_fail_ip_addr) && + ((get_jiffies_64() - ls->last_fail_time) < 10)) { + /* We already have the failure info for this login */ + spin_unlock(&ls->lock); + return; + } + + if (status_class == ISCSI_STATUS_CLS_SUCCESS) + ls->accepts++; + else if (status_class == ISCSI_STATUS_CLS_REDIRECT) { + ls->redirects++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_REDIRECT; + } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) && + (status_detail == ISCSI_LOGIN_STATUS_AUTH_FAILED)) { + ls->authenticate_fails++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_AUTHENTICATE; + } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) && + (status_detail == ISCSI_LOGIN_STATUS_TGT_FORBIDDEN)) { + ls->authorize_fails++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_AUTHORIZE; + } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) && + (status_detail == ISCSI_LOGIN_STATUS_INIT_ERR)) { + ls->negotiate_fails++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_NEGOTIATE; + } else { + ls->other_fails++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_OTHER; + } + + /* Save initiator name, ip address and time, if it is a failed login */ + if (status_class != ISCSI_STATUS_CLS_SUCCESS) { + if (conn->param_list) + intrname = iscsi_find_param_from_key(INITIATORNAME, + conn->param_list); + strcpy(ls->last_intr_fail_name, + (intrname ? intrname->value : "Unknown")); + + ls->last_intr_fail_ip_family = conn->sock->sk->sk_family; + snprintf(ls->last_intr_fail_ip_addr, IPV6_ADDRESS_SPACE, + "%s", conn->login_ip); + ls->last_fail_time = get_jiffies_64(); + } + + spin_unlock(&ls->lock); +} + +struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *conn) +{ + struct iscsi_portal_group *tpg; + + if (!conn || !conn->sess) + return NULL; + + tpg = conn->sess->tpg; + if (!tpg) + return NULL; + + if (!tpg->tpg_tiqn) + return NULL; + + return tpg->tpg_tiqn; +} diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h new file mode 100644 index 000000000000..2cd49d607bda --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -0,0 +1,60 @@ +#ifndef ISCSI_TARGET_UTIL_H +#define ISCSI_TARGET_UTIL_H + +#define MARKER_SIZE 8 + +extern int iscsit_add_r2t_to_list(struct iscsi_cmd *, u32, u32, int, u32); +extern struct iscsi_r2t *iscsit_get_r2t_for_eos(struct iscsi_cmd *, u32, u32); +extern struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *); +extern void iscsit_free_r2t(struct iscsi_r2t *, struct iscsi_cmd *); +extern void iscsit_free_r2ts_from_list(struct iscsi_cmd *); +extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t); +extern struct iscsi_cmd *iscsit_allocate_se_cmd(struct iscsi_conn *, u32, int, int); +extern struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(struct iscsi_conn *, u8); +extern int iscsit_decide_list_to_build(struct iscsi_cmd *, u32); +extern struct iscsi_seq *iscsit_get_seq_holder_for_datain(struct iscsi_cmd *, u32); +extern struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *); +extern struct iscsi_r2t *iscsit_get_holder_for_r2tsn(struct iscsi_cmd *, u32); +int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, u32 cmdsn); +extern int iscsit_check_unsolicited_dataout(struct iscsi_cmd *, unsigned char *); +extern struct iscsi_cmd *iscsit_find_cmd_from_itt(struct iscsi_conn *, u32); +extern struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *, + u32, u32); +extern struct iscsi_cmd *iscsit_find_cmd_from_ttt(struct iscsi_conn *, u32); +extern int iscsit_find_cmd_for_recovery(struct iscsi_session *, struct iscsi_cmd **, + struct iscsi_conn_recovery **, u32); +extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); +extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *); +extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); +extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *); +extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *); +extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *); +extern void iscsit_release_cmd(struct iscsi_cmd *); +extern int iscsit_check_session_usage_count(struct iscsi_session *); +extern void iscsit_dec_session_usage_count(struct iscsi_session *); +extern void iscsit_inc_session_usage_count(struct iscsi_session *); +extern int iscsit_set_sync_and_steering_values(struct iscsi_conn *); +extern struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *, u16); +extern struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *, u16); +extern void iscsit_check_conn_usage_count(struct iscsi_conn *); +extern void iscsit_dec_conn_usage_count(struct iscsi_conn *); +extern void iscsit_inc_conn_usage_count(struct iscsi_conn *); +extern void iscsit_mod_nopin_response_timer(struct iscsi_conn *); +extern void iscsit_start_nopin_response_timer(struct iscsi_conn *); +extern void iscsit_stop_nopin_response_timer(struct iscsi_conn *); +extern void __iscsit_start_nopin_timer(struct iscsi_conn *); +extern void iscsit_start_nopin_timer(struct iscsi_conn *); +extern void iscsit_stop_nopin_timer(struct iscsi_conn *); +extern int iscsit_send_tx_data(struct iscsi_cmd *, struct iscsi_conn *, int); +extern int iscsit_fe_sendpage_sg(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_tx_login_rsp(struct iscsi_conn *, u8, u8); +extern void iscsit_print_session_params(struct iscsi_session *); +extern int iscsit_print_dev_to_proc(char *, char **, off_t, int); +extern int iscsit_print_sessions_to_proc(char *, char **, off_t, int); +extern int iscsit_print_tpg_to_proc(char *, char **, off_t, int); +extern int rx_data(struct iscsi_conn *, struct kvec *, int, int); +extern int tx_data(struct iscsi_conn *, struct kvec *, int, int); +extern void iscsit_collect_login_stats(struct iscsi_conn *, u8, u8); +extern struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *); + +#endif /*** ISCSI_TARGET_UTIL_H ***/ diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 46352d658e35..c75a01a1c475 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -4052,17 +4052,16 @@ static int transport_allocate_data_tasks( struct se_task *task; struct se_device *dev = cmd->se_dev; unsigned long flags; - sector_t sectors; int task_count, i, ret; - sector_t dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors; + sector_t sectors, dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors; u32 sector_size = dev->se_sub_dev->se_dev_attrib.block_size; struct scatterlist *sg; struct scatterlist *cmd_sg; WARN_ON(cmd->data_length % sector_size); sectors = DIV_ROUND_UP(cmd->data_length, sector_size); - task_count = DIV_ROUND_UP(sectors, dev_max_sectors); - + task_count = DIV_ROUND_UP_SECTOR_T(sectors, dev_max_sectors); + cmd_sg = sgl; for (i = 0; i < task_count; i++) { unsigned int task_size; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 21d816e9dfa5..f441726ddf2b 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -28,6 +28,17 @@ menuconfig WATCHDOG if WATCHDOG +config WATCHDOG_CORE + bool "WatchDog Timer Driver Core" + ---help--- + Say Y here if you want to use the new watchdog timer driver core. + This driver provides a framework for all watchdog timer drivers + and gives them the /dev/watchdog interface (and later also the + sysfs interface). + + To compile this driver as a module, choose M here: the module will + be called watchdog. + config WATCHDOG_NOWAYOUT bool "Disable watchdog shutdown on close" help @@ -186,6 +197,15 @@ config SA1100_WATCHDOG To compile this driver as a module, choose M here: the module will be called sa1100_wdt. +config DW_WATCHDOG + tristate "Synopsys DesignWare watchdog" + depends on ARM && HAVE_CLK + help + Say Y here if to include support for the Synopsys DesignWare + watchdog timer found in many ARM chips. + To compile this driver as a module, choose M here: the + module will be called dw_wdt. + config MPCORE_WATCHDOG tristate "MPcore watchdog" depends on HAVE_ARM_TWD @@ -321,7 +341,7 @@ config MAX63XX_WATCHDOG config IMX2_WDT tristate "IMX2+ Watchdog" - depends on ARCH_MX2 || ARCH_MX25 || ARCH_MX3 || ARCH_MX5 + depends on IMX_HAVE_PLATFORM_IMX2_WDT help This is the driver for the hardware watchdog on the Freescale IMX2 and later processors. @@ -879,6 +899,20 @@ config M54xx_WATCHDOG To compile this driver as a module, choose M here: the module will be called m54xx_wdt. +# MicroBlaze Architecture + +config XILINX_WATCHDOG + tristate "Xilinx Watchdog timer" + depends on MICROBLAZE + ---help--- + Watchdog driver for the xps_timebase_wdt ip core. + + IMPORTANT: The xps_timebase_wdt parent must have the property + "clock-frequency" at device tree. + + To compile this driver as a module, choose M here: the + module will be called of_xilinx_wdt. + # MIPS Architecture config ATH79_WDT diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index ed26f7094e47..55bd5740e910 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -2,6 +2,10 @@ # Makefile for the WatchDog device drivers. # +# The WatchDog Timer Driver Core. +watchdog-objs += watchdog_core.o watchdog_dev.o +obj-$(CONFIG_WATCHDOG_CORE) += watchdog.o + # Only one watchdog can succeed. We probe the ISA/PCI/USB based # watchdog-cards first, then the architecture specific watchdog # drivers and then the architecture independent "softdog" driver. @@ -37,6 +41,7 @@ obj-$(CONFIG_IXP4XX_WATCHDOG) += ixp4xx_wdt.o obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o +obj-$(CONFIG_DW_WATCHDOG) += dw_wdt.o obj-$(CONFIG_MPCORE_WATCHDOG) += mpcore_wdt.o obj-$(CONFIG_EP93XX_WATCHDOG) += ep93xx_wdt.o obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o @@ -109,6 +114,9 @@ obj-$(CONFIG_INTEL_SCU_WATCHDOG) += intel_scu_watchdog.o # M68K Architecture obj-$(CONFIG_M54xx_WATCHDOG) += m54xx_wdt.o +# MicroBlaze Architecture +obj-$(CONFIG_XILINX_WATCHDOG) += of_xilinx_wdt.o + # MIPS Architecture obj-$(CONFIG_ATH79_WDT) += ath79_wdt.o obj-$(CONFIG_BCM47XX_WDT) += bcm47xx_wdt.o diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index eac26021e8da..87445b2d72a7 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -31,7 +31,7 @@ #include <linux/bitops.h> #include <linux/uaccess.h> -#include <mach/at91_wdt.h> +#include "at91sam9_wdt.h" #define DRV_NAME "AT91SAM9 Watchdog" @@ -284,27 +284,8 @@ static int __exit at91wdt_remove(struct platform_device *pdev) return res; } -#ifdef CONFIG_PM - -static int at91wdt_suspend(struct platform_device *pdev, pm_message_t message) -{ - return 0; -} - -static int at91wdt_resume(struct platform_device *pdev) -{ - return 0; -} - -#else -#define at91wdt_suspend NULL -#define at91wdt_resume NULL -#endif - static struct platform_driver at91wdt_driver = { .remove = __exit_p(at91wdt_remove), - .suspend = at91wdt_suspend, - .resume = at91wdt_resume, .driver = { .name = "at91_wdt", .owner = THIS_MODULE, diff --git a/drivers/watchdog/at91sam9_wdt.h b/drivers/watchdog/at91sam9_wdt.h new file mode 100644 index 000000000000..757f9cab5c82 --- /dev/null +++ b/drivers/watchdog/at91sam9_wdt.h @@ -0,0 +1,37 @@ +/* + * drivers/watchdog/at91sam9_wdt.h + * + * Copyright (C) 2007 Andrew Victor + * Copyright (C) 2007 Atmel Corporation. + * + * Watchdog Timer (WDT) - System peripherals regsters. + * Based on AT91SAM9261 datasheet revision D. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef AT91_WDT_H +#define AT91_WDT_H + +#define AT91_WDT_CR (AT91_WDT + 0x00) /* Watchdog Control Register */ +#define AT91_WDT_WDRSTT (1 << 0) /* Restart */ +#define AT91_WDT_KEY (0xa5 << 24) /* KEY Password */ + +#define AT91_WDT_MR (AT91_WDT + 0x04) /* Watchdog Mode Register */ +#define AT91_WDT_WDV (0xfff << 0) /* Counter Value */ +#define AT91_WDT_WDFIEN (1 << 12) /* Fault Interrupt Enable */ +#define AT91_WDT_WDRSTEN (1 << 13) /* Reset Processor */ +#define AT91_WDT_WDRPROC (1 << 14) /* Timer Restart */ +#define AT91_WDT_WDDIS (1 << 15) /* Watchdog Disable */ +#define AT91_WDT_WDD (0xfff << 16) /* Delta Value */ +#define AT91_WDT_WDDBGHLT (1 << 28) /* Debug Halt */ +#define AT91_WDT_WDIDLEHLT (1 << 29) /* Idle Halt */ + +#define AT91_WDT_SR (AT91_WDT + 0x08) /* Watchdog Status Register */ +#define AT91_WDT_WDUNF (1 << 0) /* Watchdog Underflow */ +#define AT91_WDT_WDERR (1 << 1) /* Watchdog Error */ + +#endif diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c new file mode 100644 index 000000000000..f10f8c0abba4 --- /dev/null +++ b/drivers/watchdog/dw_wdt.c @@ -0,0 +1,376 @@ +/* + * Copyright 2010-2011 Picochip Ltd., Jamie Iles + * http://www.picochip.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file implements a driver for the Synopsys DesignWare watchdog device + * in the many ARM subsystems. The watchdog has 16 different timeout periods + * and these are a function of the input clock frequency. + * + * The DesignWare watchdog cannot be stopped once it has been started so we + * use a software timer to implement a ping that will keep the watchdog alive. + * If we receive an expected close for the watchdog then we keep the timer + * running, otherwise the timer is stopped and the watchdog will expire. + */ +#define pr_fmt(fmt) "dw_wdt: " fmt + +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/pm.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/timer.h> +#include <linux/uaccess.h> +#include <linux/watchdog.h> + +#define WDOG_CONTROL_REG_OFFSET 0x00 +#define WDOG_CONTROL_REG_WDT_EN_MASK 0x01 +#define WDOG_TIMEOUT_RANGE_REG_OFFSET 0x04 +#define WDOG_CURRENT_COUNT_REG_OFFSET 0x08 +#define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c +#define WDOG_COUNTER_RESTART_KICK_VALUE 0x76 + +/* The maximum TOP (timeout period) value that can be set in the watchdog. */ +#define DW_WDT_MAX_TOP 15 + +static int nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, int, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " + "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +#define WDT_TIMEOUT (HZ / 2) + +static struct { + spinlock_t lock; + void __iomem *regs; + struct clk *clk; + unsigned long in_use; + unsigned long next_heartbeat; + struct timer_list timer; + int expect_close; +} dw_wdt; + +static inline int dw_wdt_is_enabled(void) +{ + return readl(dw_wdt.regs + WDOG_CONTROL_REG_OFFSET) & + WDOG_CONTROL_REG_WDT_EN_MASK; +} + +static inline int dw_wdt_top_in_seconds(unsigned top) +{ + /* + * There are 16 possible timeout values in 0..15 where the number of + * cycles is 2 ^ (16 + i) and the watchdog counts down. + */ + return (1 << (16 + top)) / clk_get_rate(dw_wdt.clk); +} + +static int dw_wdt_get_top(void) +{ + int top = readl(dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF; + + return dw_wdt_top_in_seconds(top); +} + +static inline void dw_wdt_set_next_heartbeat(void) +{ + dw_wdt.next_heartbeat = jiffies + dw_wdt_get_top() * HZ; +} + +static int dw_wdt_set_top(unsigned top_s) +{ + int i, top_val = DW_WDT_MAX_TOP; + + /* + * Iterate over the timeout values until we find the closest match. We + * always look for >=. + */ + for (i = 0; i <= DW_WDT_MAX_TOP; ++i) + if (dw_wdt_top_in_seconds(i) >= top_s) { + top_val = i; + break; + } + + /* Set the new value in the watchdog. */ + writel(top_val, dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); + + dw_wdt_set_next_heartbeat(); + + return dw_wdt_top_in_seconds(top_val); +} + +static void dw_wdt_keepalive(void) +{ + writel(WDOG_COUNTER_RESTART_KICK_VALUE, dw_wdt.regs + + WDOG_COUNTER_RESTART_REG_OFFSET); +} + +static void dw_wdt_ping(unsigned long data) +{ + if (time_before(jiffies, dw_wdt.next_heartbeat) || + (!nowayout && !dw_wdt.in_use)) { + dw_wdt_keepalive(); + mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT); + } else + pr_crit("keepalive missed, machine will reset\n"); +} + +static int dw_wdt_open(struct inode *inode, struct file *filp) +{ + if (test_and_set_bit(0, &dw_wdt.in_use)) + return -EBUSY; + + /* Make sure we don't get unloaded. */ + __module_get(THIS_MODULE); + + spin_lock(&dw_wdt.lock); + if (!dw_wdt_is_enabled()) { + /* + * The watchdog is not currently enabled. Set the timeout to + * the maximum and then start it. + */ + dw_wdt_set_top(DW_WDT_MAX_TOP); + writel(WDOG_CONTROL_REG_WDT_EN_MASK, + dw_wdt.regs + WDOG_CONTROL_REG_OFFSET); + } + + dw_wdt_set_next_heartbeat(); + + spin_unlock(&dw_wdt.lock); + + return nonseekable_open(inode, filp); +} + +ssize_t dw_wdt_write(struct file *filp, const char __user *buf, size_t len, + loff_t *offset) +{ + if (!len) + return 0; + + if (!nowayout) { + size_t i; + + dw_wdt.expect_close = 0; + + for (i = 0; i < len; ++i) { + char c; + + if (get_user(c, buf + i)) + return -EFAULT; + + if (c == 'V') { + dw_wdt.expect_close = 1; + break; + } + } + } + + dw_wdt_set_next_heartbeat(); + mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT); + + return len; +} + +static u32 dw_wdt_time_left(void) +{ + return readl(dw_wdt.regs + WDOG_CURRENT_COUNT_REG_OFFSET) / + clk_get_rate(dw_wdt.clk); +} + +static const struct watchdog_info dw_wdt_ident = { + .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | + WDIOF_MAGICCLOSE, + .identity = "Synopsys DesignWare Watchdog", +}; + +static long dw_wdt_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + unsigned long val; + int timeout; + + switch (cmd) { + case WDIOC_GETSUPPORT: + return copy_to_user((struct watchdog_info *)arg, &dw_wdt_ident, + sizeof(dw_wdt_ident)) ? -EFAULT : 0; + + case WDIOC_GETSTATUS: + case WDIOC_GETBOOTSTATUS: + return put_user(0, (int *)arg); + + case WDIOC_KEEPALIVE: + dw_wdt_set_next_heartbeat(); + return 0; + + case WDIOC_SETTIMEOUT: + if (get_user(val, (int __user *)arg)) + return -EFAULT; + timeout = dw_wdt_set_top(val); + return put_user(timeout , (int __user *)arg); + + case WDIOC_GETTIMEOUT: + return put_user(dw_wdt_get_top(), (int __user *)arg); + + case WDIOC_GETTIMELEFT: + /* Get the time left until expiry. */ + if (get_user(val, (int __user *)arg)) + return -EFAULT; + return put_user(dw_wdt_time_left(), (int __user *)arg); + + default: + return -ENOTTY; + } +} + +static int dw_wdt_release(struct inode *inode, struct file *filp) +{ + clear_bit(0, &dw_wdt.in_use); + + if (!dw_wdt.expect_close) { + del_timer(&dw_wdt.timer); + + if (!nowayout) + pr_crit("unexpected close, system will reboot soon\n"); + else + pr_crit("watchdog cannot be disabled, system will reboot soon\n"); + } + + dw_wdt.expect_close = 0; + + return 0; +} + +#ifdef CONFIG_PM +static int dw_wdt_suspend(struct device *dev) +{ + clk_disable(dw_wdt.clk); + + return 0; +} + +static int dw_wdt_resume(struct device *dev) +{ + int err = clk_enable(dw_wdt.clk); + + if (err) + return err; + + dw_wdt_keepalive(); + + return 0; +} + +static const struct dev_pm_ops dw_wdt_pm_ops = { + .suspend = dw_wdt_suspend, + .resume = dw_wdt_resume, +}; +#endif /* CONFIG_PM */ + +static const struct file_operations wdt_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = dw_wdt_open, + .write = dw_wdt_write, + .unlocked_ioctl = dw_wdt_ioctl, + .release = dw_wdt_release +}; + +static struct miscdevice dw_wdt_miscdev = { + .fops = &wdt_fops, + .name = "watchdog", + .minor = WATCHDOG_MINOR, +}; + +static int __devinit dw_wdt_drv_probe(struct platform_device *pdev) +{ + int ret; + struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + if (!mem) + return -EINVAL; + + if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem), + "dw_wdt")) + return -ENOMEM; + + dw_wdt.regs = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); + if (!dw_wdt.regs) + return -ENOMEM; + + dw_wdt.clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(dw_wdt.clk)) + return PTR_ERR(dw_wdt.clk); + + ret = clk_enable(dw_wdt.clk); + if (ret) + goto out_put_clk; + + spin_lock_init(&dw_wdt.lock); + + ret = misc_register(&dw_wdt_miscdev); + if (ret) + goto out_disable_clk; + + dw_wdt_set_next_heartbeat(); + setup_timer(&dw_wdt.timer, dw_wdt_ping, 0); + mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT); + + return 0; + +out_disable_clk: + clk_disable(dw_wdt.clk); +out_put_clk: + clk_put(dw_wdt.clk); + + return ret; +} + +static int __devexit dw_wdt_drv_remove(struct platform_device *pdev) +{ + misc_deregister(&dw_wdt_miscdev); + + clk_disable(dw_wdt.clk); + clk_put(dw_wdt.clk); + + return 0; +} + +static struct platform_driver dw_wdt_driver = { + .probe = dw_wdt_drv_probe, + .remove = __devexit_p(dw_wdt_drv_remove), + .driver = { + .name = "dw_wdt", + .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &dw_wdt_pm_ops, +#endif /* CONFIG_PM */ + }, +}; + +static int __init dw_wdt_watchdog_init(void) +{ + return platform_driver_register(&dw_wdt_driver); +} +module_init(dw_wdt_watchdog_init); + +static void __exit dw_wdt_watchdog_exit(void) +{ + platform_driver_unregister(&dw_wdt_driver); +} +module_exit(dw_wdt_watchdog_exit); + +MODULE_AUTHOR("Jamie Iles"); +MODULE_DESCRIPTION("Synopsys DesignWare Watchdog Driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 8cb26855bfed..410fba45378d 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -36,7 +36,7 @@ #include <asm/cacheflush.h> #endif /* CONFIG_HPWDT_NMI_DECODING */ -#define HPWDT_VERSION "1.2.0" +#define HPWDT_VERSION "1.3.0" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) #define TICKS_TO_SECS(ticks) ((ticks) * 128 / 1000) #define HPWDT_MAX_TIMER TICKS_TO_SECS(65535) @@ -87,6 +87,19 @@ struct smbios_cru64_info { }; #define SMBIOS_CRU64_INFORMATION 212 +/* type 219 */ +struct smbios_proliant_info { + u8 type; + u8 byte_length; + u16 handle; + u32 power_features; + u32 omega_features; + u32 reserved; + u32 misc_features; +}; +#define SMBIOS_ICRU_INFORMATION 219 + + struct cmn_registers { union { struct { @@ -132,6 +145,7 @@ struct cmn_registers { static unsigned int hpwdt_nmi_decoding; static unsigned int allow_kdump; static unsigned int priority; /* hpwdt at end of die_notify list */ +static unsigned int is_icru; static DEFINE_SPINLOCK(rom_lock); static void *cru_rom_addr; static struct cmn_registers cmn_regs; @@ -476,19 +490,22 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, goto out; spin_lock_irqsave(&rom_lock, rom_pl); - if (!die_nmi_called) + if (!die_nmi_called && !is_icru) asminline_call(&cmn_regs, cru_rom_addr); die_nmi_called = 1; spin_unlock_irqrestore(&rom_lock, rom_pl); - if (cmn_regs.u1.ral == 0) { - printk(KERN_WARNING "hpwdt: An NMI occurred, " - "but unable to determine source.\n"); - } else { - if (allow_kdump) - hpwdt_stop(); - panic("An NMI occurred, please see the Integrated " - "Management Log for details.\n"); + if (!is_icru) { + if (cmn_regs.u1.ral == 0) { + printk(KERN_WARNING "hpwdt: An NMI occurred, " + "but unable to determine source.\n"); + } } + + if (allow_kdump) + hpwdt_stop(); + panic("An NMI occurred, please see the Integrated " + "Management Log for details.\n"); + out: return NOTIFY_OK; } @@ -659,30 +676,63 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) } #endif /* CONFIG_X86_LOCAL_APIC */ +/* + * dmi_find_icru + * + * Routine Description: + * This function checks whether or not we are on an iCRU-based server. + * This check is independent of architecture and needs to be made for + * any ProLiant system. + */ +static void __devinit dmi_find_icru(const struct dmi_header *dm, void *dummy) +{ + struct smbios_proliant_info *smbios_proliant_ptr; + + if (dm->type == SMBIOS_ICRU_INFORMATION) { + smbios_proliant_ptr = (struct smbios_proliant_info *) dm; + if (smbios_proliant_ptr->misc_features & 0x01) + is_icru = 1; + } +} + static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) { int retval; /* - * We need to map the ROM to get the CRU service. - * For 32 bit Operating Systems we need to go through the 32 Bit - * BIOS Service Directory - * For 64 bit Operating Systems we get that service through SMBIOS. + * On typical CRU-based systems we need to map that service in + * the BIOS. For 32 bit Operating Systems we need to go through + * the 32 Bit BIOS Service Directory. For 64 bit Operating + * Systems we get that service through SMBIOS. + * + * On systems that support the new iCRU service all we need to + * do is call dmi_walk to get the supported flag value and skip + * the old cru detect code. */ - retval = detect_cru_service(); - if (retval < 0) { - dev_warn(&dev->dev, - "Unable to detect the %d Bit CRU Service.\n", - HPWDT_ARCH); - return retval; - } + dmi_walk(dmi_find_icru, NULL); + if (!is_icru) { + + /* + * We need to map the ROM to get the CRU service. + * For 32 bit Operating Systems we need to go through the 32 Bit + * BIOS Service Directory + * For 64 bit Operating Systems we get that service through SMBIOS. + */ + retval = detect_cru_service(); + if (retval < 0) { + dev_warn(&dev->dev, + "Unable to detect the %d Bit CRU Service.\n", + HPWDT_ARCH); + return retval; + } - /* - * We know this is the only CRU call we need to make so lets keep as - * few instructions as possible once the NMI comes in. - */ - cmn_regs.u1.rah = 0x0D; - cmn_regs.u1.ral = 0x02; + /* + * We know this is the only CRU call we need to make so lets keep as + * few instructions as possible once the NMI comes in. + */ + cmn_regs.u1.rah = 0x0D; + cmn_regs.u1.ral = 0x02; + } /* * If the priority is set to 1, then we will be put first on the diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 5fd020da7c55..751a591684da 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -120,72 +120,12 @@ enum iTCO_chipsets { TCO_3420, /* 3420 */ TCO_3450, /* 3450 */ TCO_EP80579, /* EP80579 */ - TCO_CPT1, /* Cougar Point */ - TCO_CPT2, /* Cougar Point Desktop */ - TCO_CPT3, /* Cougar Point Mobile */ - TCO_CPT4, /* Cougar Point */ - TCO_CPT5, /* Cougar Point */ - TCO_CPT6, /* Cougar Point */ - TCO_CPT7, /* Cougar Point */ - TCO_CPT8, /* Cougar Point */ - TCO_CPT9, /* Cougar Point */ - TCO_CPT10, /* Cougar Point */ - TCO_CPT11, /* Cougar Point */ - TCO_CPT12, /* Cougar Point */ - TCO_CPT13, /* Cougar Point */ - TCO_CPT14, /* Cougar Point */ - TCO_CPT15, /* Cougar Point */ - TCO_CPT16, /* Cougar Point */ - TCO_CPT17, /* Cougar Point */ - TCO_CPT18, /* Cougar Point */ - TCO_CPT19, /* Cougar Point */ - TCO_CPT20, /* Cougar Point */ - TCO_CPT21, /* Cougar Point */ - TCO_CPT22, /* Cougar Point */ - TCO_CPT23, /* Cougar Point */ - TCO_CPT24, /* Cougar Point */ - TCO_CPT25, /* Cougar Point */ - TCO_CPT26, /* Cougar Point */ - TCO_CPT27, /* Cougar Point */ - TCO_CPT28, /* Cougar Point */ - TCO_CPT29, /* Cougar Point */ - TCO_CPT30, /* Cougar Point */ - TCO_CPT31, /* Cougar Point */ - TCO_PBG1, /* Patsburg */ - TCO_PBG2, /* Patsburg */ + TCO_CPT, /* Cougar Point */ + TCO_CPTD, /* Cougar Point Desktop */ + TCO_CPTM, /* Cougar Point Mobile */ + TCO_PBG, /* Patsburg */ TCO_DH89XXCC, /* DH89xxCC */ - TCO_PPT0, /* Panther Point */ - TCO_PPT1, /* Panther Point */ - TCO_PPT2, /* Panther Point */ - TCO_PPT3, /* Panther Point */ - TCO_PPT4, /* Panther Point */ - TCO_PPT5, /* Panther Point */ - TCO_PPT6, /* Panther Point */ - TCO_PPT7, /* Panther Point */ - TCO_PPT8, /* Panther Point */ - TCO_PPT9, /* Panther Point */ - TCO_PPT10, /* Panther Point */ - TCO_PPT11, /* Panther Point */ - TCO_PPT12, /* Panther Point */ - TCO_PPT13, /* Panther Point */ - TCO_PPT14, /* Panther Point */ - TCO_PPT15, /* Panther Point */ - TCO_PPT16, /* Panther Point */ - TCO_PPT17, /* Panther Point */ - TCO_PPT18, /* Panther Point */ - TCO_PPT19, /* Panther Point */ - TCO_PPT20, /* Panther Point */ - TCO_PPT21, /* Panther Point */ - TCO_PPT22, /* Panther Point */ - TCO_PPT23, /* Panther Point */ - TCO_PPT24, /* Panther Point */ - TCO_PPT25, /* Panther Point */ - TCO_PPT26, /* Panther Point */ - TCO_PPT27, /* Panther Point */ - TCO_PPT28, /* Panther Point */ - TCO_PPT29, /* Panther Point */ - TCO_PPT30, /* Panther Point */ - TCO_PPT31, /* Panther Point */ + TCO_PPT, /* Panther Point */ }; static struct { @@ -244,83 +184,14 @@ static struct { {"3450", 2}, {"EP80579", 2}, {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Patsburg", 2}, + {"Cougar Point Desktop", 2}, + {"Cougar Point Mobile", 2}, {"Patsburg", 2}, {"DH89xxCC", 2}, {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, {NULL, 0} }; -#define ITCO_PCI_DEVICE(dev, data) \ - .vendor = PCI_VENDOR_ID_INTEL, \ - .device = dev, \ - .subvendor = PCI_ANY_ID, \ - .subdevice = PCI_ANY_ID, \ - .class = 0, \ - .class_mask = 0, \ - .driver_data = data - /* * This data only exists for exporting the supported PCI ids * via MODULE_DEVICE_TABLE. We do not actually register a @@ -328,138 +199,138 @@ static struct { * functions that probably will be registered by other drivers. */ static DEFINE_PCI_DEVICE_TABLE(iTCO_wdt_pci_tbl) = { - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AA_0, TCO_ICH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AB_0, TCO_ICH0)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_0, TCO_ICH2)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_10, TCO_ICH2M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_0, TCO_ICH3)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_12, TCO_ICH3M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_0, TCO_ICH4)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_12, TCO_ICH4M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801E_0, TCO_CICH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801EB_0, TCO_ICH5)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB_1, TCO_6300ESB)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_0, TCO_ICH6)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_1, TCO_ICH6M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_2, TCO_ICH6W)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB2_0, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2671, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2672, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2673, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2674, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2675, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2676, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2677, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2678, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2679, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267a, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267b, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267c, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267d, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267e, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267f, TCO_631XESB)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_0, TCO_ICH7)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_30, TCO_ICH7DH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_1, TCO_ICH7M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_31, TCO_ICH7MDH)}, - { ITCO_PCI_DEVICE(0x27bc, TCO_NM10)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_0, TCO_ICH8)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_2, TCO_ICH8DH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_3, TCO_ICH8DO)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_4, TCO_ICH8M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_1, TCO_ICH8ME)}, - { ITCO_PCI_DEVICE(0x2918, TCO_ICH9)}, - { ITCO_PCI_DEVICE(0x2916, TCO_ICH9R)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_2, TCO_ICH9DH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_4, TCO_ICH9DO)}, - { ITCO_PCI_DEVICE(0x2919, TCO_ICH9M)}, - { ITCO_PCI_DEVICE(0x2917, TCO_ICH9ME)}, - { ITCO_PCI_DEVICE(0x3a18, TCO_ICH10)}, - { ITCO_PCI_DEVICE(0x3a16, TCO_ICH10R)}, - { ITCO_PCI_DEVICE(0x3a1a, TCO_ICH10D)}, - { ITCO_PCI_DEVICE(0x3a14, TCO_ICH10DO)}, - { ITCO_PCI_DEVICE(0x3b00, TCO_PCH)}, - { ITCO_PCI_DEVICE(0x3b01, TCO_PCHM)}, - { ITCO_PCI_DEVICE(0x3b02, TCO_P55)}, - { ITCO_PCI_DEVICE(0x3b03, TCO_PM55)}, - { ITCO_PCI_DEVICE(0x3b06, TCO_H55)}, - { ITCO_PCI_DEVICE(0x3b07, TCO_QM57)}, - { ITCO_PCI_DEVICE(0x3b08, TCO_H57)}, - { ITCO_PCI_DEVICE(0x3b09, TCO_HM55)}, - { ITCO_PCI_DEVICE(0x3b0a, TCO_Q57)}, - { ITCO_PCI_DEVICE(0x3b0b, TCO_HM57)}, - { ITCO_PCI_DEVICE(0x3b0d, TCO_PCHMSFF)}, - { ITCO_PCI_DEVICE(0x3b0f, TCO_QS57)}, - { ITCO_PCI_DEVICE(0x3b12, TCO_3400)}, - { ITCO_PCI_DEVICE(0x3b14, TCO_3420)}, - { ITCO_PCI_DEVICE(0x3b16, TCO_3450)}, - { ITCO_PCI_DEVICE(0x5031, TCO_EP80579)}, - { ITCO_PCI_DEVICE(0x1c41, TCO_CPT1)}, - { ITCO_PCI_DEVICE(0x1c42, TCO_CPT2)}, - { ITCO_PCI_DEVICE(0x1c43, TCO_CPT3)}, - { ITCO_PCI_DEVICE(0x1c44, TCO_CPT4)}, - { ITCO_PCI_DEVICE(0x1c45, TCO_CPT5)}, - { ITCO_PCI_DEVICE(0x1c46, TCO_CPT6)}, - { ITCO_PCI_DEVICE(0x1c47, TCO_CPT7)}, - { ITCO_PCI_DEVICE(0x1c48, TCO_CPT8)}, - { ITCO_PCI_DEVICE(0x1c49, TCO_CPT9)}, - { ITCO_PCI_DEVICE(0x1c4a, TCO_CPT10)}, - { ITCO_PCI_DEVICE(0x1c4b, TCO_CPT11)}, - { ITCO_PCI_DEVICE(0x1c4c, TCO_CPT12)}, - { ITCO_PCI_DEVICE(0x1c4d, TCO_CPT13)}, - { ITCO_PCI_DEVICE(0x1c4e, TCO_CPT14)}, - { ITCO_PCI_DEVICE(0x1c4f, TCO_CPT15)}, - { ITCO_PCI_DEVICE(0x1c50, TCO_CPT16)}, - { ITCO_PCI_DEVICE(0x1c51, TCO_CPT17)}, - { ITCO_PCI_DEVICE(0x1c52, TCO_CPT18)}, - { ITCO_PCI_DEVICE(0x1c53, TCO_CPT19)}, - { ITCO_PCI_DEVICE(0x1c54, TCO_CPT20)}, - { ITCO_PCI_DEVICE(0x1c55, TCO_CPT21)}, - { ITCO_PCI_DEVICE(0x1c56, TCO_CPT22)}, - { ITCO_PCI_DEVICE(0x1c57, TCO_CPT23)}, - { ITCO_PCI_DEVICE(0x1c58, TCO_CPT24)}, - { ITCO_PCI_DEVICE(0x1c59, TCO_CPT25)}, - { ITCO_PCI_DEVICE(0x1c5a, TCO_CPT26)}, - { ITCO_PCI_DEVICE(0x1c5b, TCO_CPT27)}, - { ITCO_PCI_DEVICE(0x1c5c, TCO_CPT28)}, - { ITCO_PCI_DEVICE(0x1c5d, TCO_CPT29)}, - { ITCO_PCI_DEVICE(0x1c5e, TCO_CPT30)}, - { ITCO_PCI_DEVICE(0x1c5f, TCO_CPT31)}, - { ITCO_PCI_DEVICE(0x1d40, TCO_PBG1)}, - { ITCO_PCI_DEVICE(0x1d41, TCO_PBG2)}, - { ITCO_PCI_DEVICE(0x2310, TCO_DH89XXCC)}, - { ITCO_PCI_DEVICE(0x1e40, TCO_PPT0)}, - { ITCO_PCI_DEVICE(0x1e41, TCO_PPT1)}, - { ITCO_PCI_DEVICE(0x1e42, TCO_PPT2)}, - { ITCO_PCI_DEVICE(0x1e43, TCO_PPT3)}, - { ITCO_PCI_DEVICE(0x1e44, TCO_PPT4)}, - { ITCO_PCI_DEVICE(0x1e45, TCO_PPT5)}, - { ITCO_PCI_DEVICE(0x1e46, TCO_PPT6)}, - { ITCO_PCI_DEVICE(0x1e47, TCO_PPT7)}, - { ITCO_PCI_DEVICE(0x1e48, TCO_PPT8)}, - { ITCO_PCI_DEVICE(0x1e49, TCO_PPT9)}, - { ITCO_PCI_DEVICE(0x1e4a, TCO_PPT10)}, - { ITCO_PCI_DEVICE(0x1e4b, TCO_PPT11)}, - { ITCO_PCI_DEVICE(0x1e4c, TCO_PPT12)}, - { ITCO_PCI_DEVICE(0x1e4d, TCO_PPT13)}, - { ITCO_PCI_DEVICE(0x1e4e, TCO_PPT14)}, - { ITCO_PCI_DEVICE(0x1e4f, TCO_PPT15)}, - { ITCO_PCI_DEVICE(0x1e50, TCO_PPT16)}, - { ITCO_PCI_DEVICE(0x1e51, TCO_PPT17)}, - { ITCO_PCI_DEVICE(0x1e52, TCO_PPT18)}, - { ITCO_PCI_DEVICE(0x1e53, TCO_PPT19)}, - { ITCO_PCI_DEVICE(0x1e54, TCO_PPT20)}, - { ITCO_PCI_DEVICE(0x1e55, TCO_PPT21)}, - { ITCO_PCI_DEVICE(0x1e56, TCO_PPT22)}, - { ITCO_PCI_DEVICE(0x1e57, TCO_PPT23)}, - { ITCO_PCI_DEVICE(0x1e58, TCO_PPT24)}, - { ITCO_PCI_DEVICE(0x1e59, TCO_PPT25)}, - { ITCO_PCI_DEVICE(0x1e5a, TCO_PPT26)}, - { ITCO_PCI_DEVICE(0x1e5b, TCO_PPT27)}, - { ITCO_PCI_DEVICE(0x1e5c, TCO_PPT28)}, - { ITCO_PCI_DEVICE(0x1e5d, TCO_PPT29)}, - { ITCO_PCI_DEVICE(0x1e5e, TCO_PPT30)}, - { ITCO_PCI_DEVICE(0x1e5f, TCO_PPT31)}, + { PCI_VDEVICE(INTEL, 0x2410), TCO_ICH}, + { PCI_VDEVICE(INTEL, 0x2420), TCO_ICH0}, + { PCI_VDEVICE(INTEL, 0x2440), TCO_ICH2}, + { PCI_VDEVICE(INTEL, 0x244c), TCO_ICH2M}, + { PCI_VDEVICE(INTEL, 0x2480), TCO_ICH3}, + { PCI_VDEVICE(INTEL, 0x248c), TCO_ICH3M}, + { PCI_VDEVICE(INTEL, 0x24c0), TCO_ICH4}, + { PCI_VDEVICE(INTEL, 0x24cc), TCO_ICH4M}, + { PCI_VDEVICE(INTEL, 0x2450), TCO_CICH}, + { PCI_VDEVICE(INTEL, 0x24d0), TCO_ICH5}, + { PCI_VDEVICE(INTEL, 0x25a1), TCO_6300ESB}, + { PCI_VDEVICE(INTEL, 0x2640), TCO_ICH6}, + { PCI_VDEVICE(INTEL, 0x2641), TCO_ICH6M}, + { PCI_VDEVICE(INTEL, 0x2642), TCO_ICH6W}, + { PCI_VDEVICE(INTEL, 0x2670), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2671), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2672), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2673), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2674), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2675), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2676), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2677), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2678), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2679), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267a), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267b), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267c), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267d), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267e), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267f), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x27b8), TCO_ICH7}, + { PCI_VDEVICE(INTEL, 0x27b0), TCO_ICH7DH}, + { PCI_VDEVICE(INTEL, 0x27b9), TCO_ICH7M}, + { PCI_VDEVICE(INTEL, 0x27bd), TCO_ICH7MDH}, + { PCI_VDEVICE(INTEL, 0x27bc), TCO_NM10}, + { PCI_VDEVICE(INTEL, 0x2810), TCO_ICH8}, + { PCI_VDEVICE(INTEL, 0x2812), TCO_ICH8DH}, + { PCI_VDEVICE(INTEL, 0x2814), TCO_ICH8DO}, + { PCI_VDEVICE(INTEL, 0x2815), TCO_ICH8M}, + { PCI_VDEVICE(INTEL, 0x2811), TCO_ICH8ME}, + { PCI_VDEVICE(INTEL, 0x2918), TCO_ICH9}, + { PCI_VDEVICE(INTEL, 0x2916), TCO_ICH9R}, + { PCI_VDEVICE(INTEL, 0x2912), TCO_ICH9DH}, + { PCI_VDEVICE(INTEL, 0x2914), TCO_ICH9DO}, + { PCI_VDEVICE(INTEL, 0x2919), TCO_ICH9M}, + { PCI_VDEVICE(INTEL, 0x2917), TCO_ICH9ME}, + { PCI_VDEVICE(INTEL, 0x3a18), TCO_ICH10}, + { PCI_VDEVICE(INTEL, 0x3a16), TCO_ICH10R}, + { PCI_VDEVICE(INTEL, 0x3a1a), TCO_ICH10D}, + { PCI_VDEVICE(INTEL, 0x3a14), TCO_ICH10DO}, + { PCI_VDEVICE(INTEL, 0x3b00), TCO_PCH}, + { PCI_VDEVICE(INTEL, 0x3b01), TCO_PCHM}, + { PCI_VDEVICE(INTEL, 0x3b02), TCO_P55}, + { PCI_VDEVICE(INTEL, 0x3b03), TCO_PM55}, + { PCI_VDEVICE(INTEL, 0x3b06), TCO_H55}, + { PCI_VDEVICE(INTEL, 0x3b07), TCO_QM57}, + { PCI_VDEVICE(INTEL, 0x3b08), TCO_H57}, + { PCI_VDEVICE(INTEL, 0x3b09), TCO_HM55}, + { PCI_VDEVICE(INTEL, 0x3b0a), TCO_Q57}, + { PCI_VDEVICE(INTEL, 0x3b0b), TCO_HM57}, + { PCI_VDEVICE(INTEL, 0x3b0d), TCO_PCHMSFF}, + { PCI_VDEVICE(INTEL, 0x3b0f), TCO_QS57}, + { PCI_VDEVICE(INTEL, 0x3b12), TCO_3400}, + { PCI_VDEVICE(INTEL, 0x3b14), TCO_3420}, + { PCI_VDEVICE(INTEL, 0x3b16), TCO_3450}, + { PCI_VDEVICE(INTEL, 0x5031), TCO_EP80579}, + { PCI_VDEVICE(INTEL, 0x1c41), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c42), TCO_CPTD}, + { PCI_VDEVICE(INTEL, 0x1c43), TCO_CPTM}, + { PCI_VDEVICE(INTEL, 0x1c44), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c45), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c46), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c47), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c48), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c49), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4a), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4b), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4c), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4d), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4e), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4f), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c50), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c51), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c52), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c53), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c54), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c55), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c56), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c57), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c58), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c59), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5a), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5b), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5c), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5d), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5e), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5f), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1d40), TCO_PBG}, + { PCI_VDEVICE(INTEL, 0x1d41), TCO_PBG}, + { PCI_VDEVICE(INTEL, 0x2310), TCO_DH89XXCC}, + { PCI_VDEVICE(INTEL, 0x1e40), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e41), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e42), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e43), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e44), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e45), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e46), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e47), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e48), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e49), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4a), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4b), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4c), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4d), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4e), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4f), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e50), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e51), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e52), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e53), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e54), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e55), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e56), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e57), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e58), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e59), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5a), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5b), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5c), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5d), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5e), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5f), TCO_PPT}, { 0, }, /* End of list */ }; MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl); @@ -1052,15 +923,10 @@ static void iTCO_wdt_shutdown(struct platform_device *dev) iTCO_wdt_stop(); } -#define iTCO_wdt_suspend NULL -#define iTCO_wdt_resume NULL - static struct platform_driver iTCO_wdt_driver = { .probe = iTCO_wdt_probe, .remove = __devexit_p(iTCO_wdt_remove), .shutdown = iTCO_wdt_shutdown, - .suspend = iTCO_wdt_suspend, - .resume = iTCO_wdt_resume, .driver = { .owner = THIS_MODULE, .name = DRV_NAME, diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 86f7cac1026c..b8ef2c6dca7c 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -329,12 +329,18 @@ static void imx2_wdt_shutdown(struct platform_device *pdev) } } +static const struct of_device_id imx2_wdt_dt_ids[] = { + { .compatible = "fsl,imx21-wdt", }, + { /* sentinel */ } +}; + static struct platform_driver imx2_wdt_driver = { .remove = __exit_p(imx2_wdt_remove), .shutdown = imx2_wdt_shutdown, .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, + .of_match_table = imx2_wdt_dt_ids, }, }; diff --git a/drivers/watchdog/it8712f_wdt.c b/drivers/watchdog/it8712f_wdt.c index 6143f52ba6b8..8d2d8502d3e8 100644 --- a/drivers/watchdog/it8712f_wdt.c +++ b/drivers/watchdog/it8712f_wdt.c @@ -28,10 +28,10 @@ #include <linux/notifier.h> #include <linux/reboot.h> #include <linux/fs.h> -#include <linux/pci.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/ioport.h> #define NAME "it8712f_wdt" @@ -51,7 +51,6 @@ MODULE_PARM_DESC(nowayout, "Disable watchdog shutdown on close"); static unsigned long wdt_open; static unsigned expect_close; -static spinlock_t io_lock; static unsigned char revision; /* Dog Food address - We use the game port address */ @@ -121,20 +120,26 @@ static inline void superio_select(int ldn) outb(ldn, VAL); } -static inline void superio_enter(void) +static inline int superio_enter(void) { - spin_lock(&io_lock); + /* + * Try to reserve REG and REG + 1 for exclusive access. + */ + if (!request_muxed_region(REG, 2, NAME)) + return -EBUSY; + outb(0x87, REG); outb(0x01, REG); outb(0x55, REG); outb(0x55, REG); + return 0; } static inline void superio_exit(void) { outb(0x02, REG); outb(0x02, VAL); - spin_unlock(&io_lock); + release_region(REG, 2); } static inline void it8712f_wdt_ping(void) @@ -173,10 +178,13 @@ static int it8712f_wdt_get_status(void) return 0; } -static void it8712f_wdt_enable(void) +static int it8712f_wdt_enable(void) { + int ret = superio_enter(); + if (ret) + return ret; + printk(KERN_DEBUG NAME ": enabling watchdog timer\n"); - superio_enter(); superio_select(LDN_GPIO); superio_outb(wdt_control_reg, WDT_CONTROL); @@ -186,13 +194,17 @@ static void it8712f_wdt_enable(void) superio_exit(); it8712f_wdt_ping(); + + return 0; } -static void it8712f_wdt_disable(void) +static int it8712f_wdt_disable(void) { - printk(KERN_DEBUG NAME ": disabling watchdog timer\n"); + int ret = superio_enter(); + if (ret) + return ret; - superio_enter(); + printk(KERN_DEBUG NAME ": disabling watchdog timer\n"); superio_select(LDN_GPIO); superio_outb(0, WDT_CONFIG); @@ -202,6 +214,7 @@ static void it8712f_wdt_disable(void) superio_outb(0, WDT_TIMEOUT); superio_exit(); + return 0; } static int it8712f_wdt_notify(struct notifier_block *this, @@ -252,6 +265,7 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, WDIOF_MAGICCLOSE, }; int value; + int ret; switch (cmd) { case WDIOC_GETSUPPORT: @@ -259,7 +273,9 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, return -EFAULT; return 0; case WDIOC_GETSTATUS: - superio_enter(); + ret = superio_enter(); + if (ret) + return ret; superio_select(LDN_GPIO); value = it8712f_wdt_get_status(); @@ -280,7 +296,9 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, if (value > (max_units * 60)) return -EINVAL; margin = value; - superio_enter(); + ret = superio_enter(); + if (ret) + return ret; superio_select(LDN_GPIO); it8712f_wdt_update_margin(); @@ -299,10 +317,14 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, static int it8712f_wdt_open(struct inode *inode, struct file *file) { + int ret; /* only allow one at a time */ if (test_and_set_bit(0, &wdt_open)) return -EBUSY; - it8712f_wdt_enable(); + + ret = it8712f_wdt_enable(); + if (ret) + return ret; return nonseekable_open(inode, file); } @@ -313,7 +335,8 @@ static int it8712f_wdt_release(struct inode *inode, struct file *file) ": watchdog device closed unexpectedly, will not" " disable the watchdog timer\n"); } else if (!nowayout) { - it8712f_wdt_disable(); + if (it8712f_wdt_disable()) + printk(KERN_WARNING NAME "Watchdog disable failed\n"); } expect_close = 0; clear_bit(0, &wdt_open); @@ -340,8 +363,10 @@ static int __init it8712f_wdt_find(unsigned short *address) { int err = -ENODEV; int chip_type; + int ret = superio_enter(); + if (ret) + return ret; - superio_enter(); chip_type = superio_inw(DEVID); if (chip_type != IT8712F_DEVID) goto exit; @@ -382,8 +407,6 @@ static int __init it8712f_wdt_init(void) { int err = 0; - spin_lock_init(&io_lock); - if (it8712f_wdt_find(&address)) return -ENODEV; @@ -392,7 +415,11 @@ static int __init it8712f_wdt_init(void) return -EBUSY; } - it8712f_wdt_disable(); + err = it8712f_wdt_disable(); + if (err) { + printk(KERN_ERR NAME ": unable to disable watchdog timer.\n"); + goto out; + } err = register_reboot_notifier(&it8712f_wdt_notifier); if (err) { diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c index b1bc72f9a209..a2d9a1266a23 100644 --- a/drivers/watchdog/it87_wdt.c +++ b/drivers/watchdog/it87_wdt.c @@ -137,7 +137,6 @@ static unsigned int base, gpact, ciract, max_units, chip_type; static unsigned long wdt_status; -static DEFINE_SPINLOCK(spinlock); static int nogameport = DEFAULT_NOGAMEPORT; static int exclusive = DEFAULT_EXCLUSIVE; @@ -163,18 +162,26 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started, default=" /* Superio Chip */ -static inline void superio_enter(void) +static inline int superio_enter(void) { + /* + * Try to reserve REG and REG + 1 for exclusive access. + */ + if (!request_muxed_region(REG, 2, WATCHDOG_NAME)) + return -EBUSY; + outb(0x87, REG); outb(0x01, REG); outb(0x55, REG); outb(0x55, REG); + return 0; } static inline void superio_exit(void) { outb(0x02, REG); outb(0x02, VAL); + release_region(REG, 2); } static inline void superio_select(int ldn) @@ -255,12 +262,11 @@ static void wdt_keepalive(void) set_bit(WDTS_KEEPALIVE, &wdt_status); } -static void wdt_start(void) +static int wdt_start(void) { - unsigned long flags; - - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + int ret = superio_enter(); + if (ret) + return ret; superio_select(GPIO); if (test_bit(WDTS_USE_GP, &wdt_status)) @@ -270,15 +276,15 @@ static void wdt_start(void) wdt_update_timeout(); superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); + + return 0; } -static void wdt_stop(void) +static int wdt_stop(void) { - unsigned long flags; - - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + int ret = superio_enter(); + if (ret) + return ret; superio_select(GPIO); superio_outb(0x00, WDTCTRL); @@ -288,7 +294,7 @@ static void wdt_stop(void) superio_outb(0x00, WDTVALMSB); superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); + return 0; } /** @@ -303,8 +309,6 @@ static void wdt_stop(void) static int wdt_set_timeout(int t) { - unsigned long flags; - if (t < 1 || t > max_units * 60) return -EINVAL; @@ -313,14 +317,15 @@ static int wdt_set_timeout(int t) else timeout = t; - spin_lock_irqsave(&spinlock, flags); if (test_bit(WDTS_TIMER_RUN, &wdt_status)) { - superio_enter(); + int ret = superio_enter(); + if (ret) + return ret; + superio_select(GPIO); wdt_update_timeout(); superio_exit(); } - spin_unlock_irqrestore(&spinlock, flags); return 0; } @@ -339,12 +344,12 @@ static int wdt_set_timeout(int t) static int wdt_get_status(int *status) { - unsigned long flags; - *status = 0; if (testmode) { - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + int ret = superio_enter(); + if (ret) + return ret; + superio_select(GPIO); if (superio_inb(WDTCTRL) & WDT_ZERO) { superio_outb(0x00, WDTCTRL); @@ -353,7 +358,6 @@ static int wdt_get_status(int *status) } superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); } if (test_and_clear_bit(WDTS_KEEPALIVE, &wdt_status)) *status |= WDIOF_KEEPALIVEPING; @@ -379,9 +383,17 @@ static int wdt_open(struct inode *inode, struct file *file) if (exclusive && test_and_set_bit(WDTS_DEV_OPEN, &wdt_status)) return -EBUSY; if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) { + int ret; if (nowayout && !test_and_set_bit(WDTS_LOCKED, &wdt_status)) __module_get(THIS_MODULE); - wdt_start(); + + ret = wdt_start(); + if (ret) { + clear_bit(WDTS_LOCKED, &wdt_status); + clear_bit(WDTS_TIMER_RUN, &wdt_status); + clear_bit(WDTS_DEV_OPEN, &wdt_status); + return ret; + } } return nonseekable_open(inode, file); } @@ -403,7 +415,16 @@ static int wdt_release(struct inode *inode, struct file *file) { if (test_bit(WDTS_TIMER_RUN, &wdt_status)) { if (test_and_clear_bit(WDTS_EXPECTED, &wdt_status)) { - wdt_stop(); + int ret = wdt_stop(); + if (ret) { + /* + * Stop failed. Just keep the watchdog alive + * and hope nothing bad happens. + */ + set_bit(WDTS_EXPECTED, &wdt_status); + wdt_keepalive(); + return ret; + } clear_bit(WDTS_TIMER_RUN, &wdt_status); } else { wdt_keepalive(); @@ -484,7 +505,9 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) &ident, sizeof(ident)) ? -EFAULT : 0; case WDIOC_GETSTATUS: - wdt_get_status(&status); + rc = wdt_get_status(&status); + if (rc) + return rc; return put_user(status, uarg.i); case WDIOC_GETBOOTSTATUS: @@ -500,14 +523,22 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) switch (new_options) { case WDIOS_DISABLECARD: - if (test_bit(WDTS_TIMER_RUN, &wdt_status)) - wdt_stop(); + if (test_bit(WDTS_TIMER_RUN, &wdt_status)) { + rc = wdt_stop(); + if (rc) + return rc; + } clear_bit(WDTS_TIMER_RUN, &wdt_status); return 0; case WDIOS_ENABLECARD: - if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) - wdt_start(); + if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) { + rc = wdt_start(); + if (rc) { + clear_bit(WDTS_TIMER_RUN, &wdt_status); + return rc; + } + } return 0; default: @@ -560,16 +591,17 @@ static int __init it87_wdt_init(void) int rc = 0; int try_gameport = !nogameport; u8 chip_rev; - unsigned long flags; + int gp_rreq_fail = 0; wdt_status = 0; - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + rc = superio_enter(); + if (rc) + return rc; + chip_type = superio_inw(CHIPID); chip_rev = superio_inb(CHIPREV) & 0x0f; superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); switch (chip_type) { case IT8702_ID: @@ -603,8 +635,9 @@ static int __init it87_wdt_init(void) return -ENODEV; } - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + rc = superio_enter(); + if (rc) + return rc; superio_select(GPIO); superio_outb(WDT_TOV1, WDTCFG); @@ -620,21 +653,16 @@ static int __init it87_wdt_init(void) } gpact = superio_inb(ACTREG); superio_outb(0x01, ACTREG); - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); if (request_region(base, 1, WATCHDOG_NAME)) set_bit(WDTS_USE_GP, &wdt_status); else - rc = -EIO; - } else { - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); + gp_rreq_fail = 1; } /* If we haven't Gameport support, try to get CIR support */ if (!test_bit(WDTS_USE_GP, &wdt_status)) { if (!request_region(CIR_BASE, 8, WATCHDOG_NAME)) { - if (rc == -EIO) + if (gp_rreq_fail) printk(KERN_ERR PFX "I/O Address 0x%04x and 0x%04x" " already in use\n", base, CIR_BASE); @@ -646,21 +674,16 @@ static int __init it87_wdt_init(void) goto err_out; } base = CIR_BASE; - spin_lock_irqsave(&spinlock, flags); - superio_enter(); superio_select(CIR); superio_outw(base, BASEREG); superio_outb(0x00, CIR_ILS); ciract = superio_inb(ACTREG); superio_outb(0x01, ACTREG); - if (rc == -EIO) { + if (gp_rreq_fail) { superio_select(GAMEPORT); superio_outb(gpact, ACTREG); } - - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); } if (timeout < 1 || timeout > max_units * 60) { @@ -704,6 +727,7 @@ static int __init it87_wdt_init(void) "nogameport=%d)\n", chip_type, chip_rev, timeout, nowayout, testmode, exclusive, nogameport); + superio_exit(); return 0; err_out_reboot: @@ -711,49 +735,37 @@ err_out_reboot: err_out_region: release_region(base, test_bit(WDTS_USE_GP, &wdt_status) ? 1 : 8); if (!test_bit(WDTS_USE_GP, &wdt_status)) { - spin_lock_irqsave(&spinlock, flags); - superio_enter(); superio_select(CIR); superio_outb(ciract, ACTREG); - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); } err_out: if (try_gameport) { - spin_lock_irqsave(&spinlock, flags); - superio_enter(); superio_select(GAMEPORT); superio_outb(gpact, ACTREG); - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); } + superio_exit(); return rc; } static void __exit it87_wdt_exit(void) { - unsigned long flags; - int nolock; - - nolock = !spin_trylock_irqsave(&spinlock, flags); - superio_enter(); - superio_select(GPIO); - superio_outb(0x00, WDTCTRL); - superio_outb(0x00, WDTCFG); - superio_outb(0x00, WDTVALLSB); - if (max_units > 255) - superio_outb(0x00, WDTVALMSB); - if (test_bit(WDTS_USE_GP, &wdt_status)) { - superio_select(GAMEPORT); - superio_outb(gpact, ACTREG); - } else { - superio_select(CIR); - superio_outb(ciract, ACTREG); + if (superio_enter() == 0) { + superio_select(GPIO); + superio_outb(0x00, WDTCTRL); + superio_outb(0x00, WDTCFG); + superio_outb(0x00, WDTVALLSB); + if (max_units > 255) + superio_outb(0x00, WDTVALMSB); + if (test_bit(WDTS_USE_GP, &wdt_status)) { + superio_select(GAMEPORT); + superio_outb(gpact, ACTREG); + } else { + superio_select(CIR); + superio_outb(ciract, ACTREG); + } + superio_exit(); } - superio_exit(); - if (!nolock) - spin_unlock_irqrestore(&spinlock, flags); misc_deregister(&wdt_miscdev); unregister_reboot_notifier(&wdt_notifier); diff --git a/drivers/watchdog/mpcore_wdt.c b/drivers/watchdog/mpcore_wdt.c index 2b4af222b5f2..4dc31024d26c 100644 --- a/drivers/watchdog/mpcore_wdt.c +++ b/drivers/watchdog/mpcore_wdt.c @@ -407,12 +407,35 @@ static int __devexit mpcore_wdt_remove(struct platform_device *dev) return 0; } +#ifdef CONFIG_PM +static int mpcore_wdt_suspend(struct platform_device *dev, pm_message_t msg) +{ + struct mpcore_wdt *wdt = platform_get_drvdata(dev); + mpcore_wdt_stop(wdt); /* Turn the WDT off */ + return 0; +} + +static int mpcore_wdt_resume(struct platform_device *dev) +{ + struct mpcore_wdt *wdt = platform_get_drvdata(dev); + /* re-activate timer */ + if (test_bit(0, &wdt->timer_alive)) + mpcore_wdt_start(wdt); + return 0; +} +#else +#define mpcore_wdt_suspend NULL +#define mpcore_wdt_resume NULL +#endif + /* work with hotplug and coldplug */ MODULE_ALIAS("platform:mpcore_wdt"); static struct platform_driver mpcore_wdt_driver = { .probe = mpcore_wdt_probe, .remove = __devexit_p(mpcore_wdt_remove), + .suspend = mpcore_wdt_suspend, + .resume = mpcore_wdt_resume, .shutdown = mpcore_wdt_shutdown, .driver = { .owner = THIS_MODULE, diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c index 0430e093b1a0..ac37bb82392c 100644 --- a/drivers/watchdog/mtx-1_wdt.c +++ b/drivers/watchdog/mtx-1_wdt.c @@ -225,11 +225,11 @@ static int __devinit mtx1_wdt_probe(struct platform_device *pdev) ret = misc_register(&mtx1_wdt_misc); if (ret < 0) { - printk(KERN_ERR " mtx-1_wdt : failed to register\n"); + dev_err(&pdev->dev, "failed to register\n"); return ret; } mtx1_wdt_start(); - printk(KERN_INFO "MTX-1 Watchdog driver\n"); + dev_info(&pdev->dev, "MTX-1 Watchdog driver\n"); return 0; } diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c new file mode 100644 index 000000000000..4ec741ac952c --- /dev/null +++ b/drivers/watchdog/of_xilinx_wdt.c @@ -0,0 +1,433 @@ +/* +* of_xilinx_wdt.c 1.01 A Watchdog Device Driver for Xilinx xps_timebase_wdt +* +* (C) Copyright 2011 (Alejandro Cabrera <aldaya@gmail.com>) +* +* ----------------------- +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version +* 2 of the License, or (at your option) any later version. +* +* ----------------------- +* 30-May-2011 Alejandro Cabrera <aldaya@gmail.com> +* - If "xlnx,wdt-enable-once" wasn't found on device tree the +* module will use CONFIG_WATCHDOG_NOWAYOUT +* - If the device tree parameters ("clock-frequency" and +* "xlnx,wdt-interval") wasn't found the driver won't +* know the wdt reset interval +*/ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/watchdog.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_address.h> + +/* Register offsets for the Wdt device */ +#define XWT_TWCSR0_OFFSET 0x0 /* Control/Status Register0 */ +#define XWT_TWCSR1_OFFSET 0x4 /* Control/Status Register1 */ +#define XWT_TBR_OFFSET 0x8 /* Timebase Register Offset */ + +/* Control/Status Register Masks */ +#define XWT_CSR0_WRS_MASK 0x00000008 /* Reset status */ +#define XWT_CSR0_WDS_MASK 0x00000004 /* Timer state */ +#define XWT_CSR0_EWDT1_MASK 0x00000002 /* Enable bit 1 */ + +/* Control/Status Register 0/1 bits */ +#define XWT_CSRX_EWDT2_MASK 0x00000001 /* Enable bit 2 */ + +/* SelfTest constants */ +#define XWT_MAX_SELFTEST_LOOP_COUNT 0x00010000 +#define XWT_TIMER_FAILED 0xFFFFFFFF + +#define WATCHDOG_NAME "Xilinx Watchdog" +#define PFX WATCHDOG_NAME ": " + +struct xwdt_device { + struct resource res; + void __iomem *base; + u32 nowayout; + u32 wdt_interval; + u32 boot_status; +}; + +static struct xwdt_device xdev; + +static u32 timeout; +static u32 control_status_reg; +static u8 expect_close; +static u8 no_timeout; +static unsigned long driver_open; + +static DEFINE_SPINLOCK(spinlock); + +static void xwdt_start(void) +{ + spin_lock(&spinlock); + + /* Clean previous status and enable the watchdog timer */ + control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET); + control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK); + + iowrite32((control_status_reg | XWT_CSR0_EWDT1_MASK), + xdev.base + XWT_TWCSR0_OFFSET); + + iowrite32(XWT_CSRX_EWDT2_MASK, xdev.base + XWT_TWCSR1_OFFSET); + + spin_unlock(&spinlock); +} + +static void xwdt_stop(void) +{ + spin_lock(&spinlock); + + control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET); + + iowrite32((control_status_reg & ~XWT_CSR0_EWDT1_MASK), + xdev.base + XWT_TWCSR0_OFFSET); + + iowrite32(0, xdev.base + XWT_TWCSR1_OFFSET); + + spin_unlock(&spinlock); + printk(KERN_INFO PFX "Stopped!\n"); +} + +static void xwdt_keepalive(void) +{ + spin_lock(&spinlock); + + control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET); + control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK); + iowrite32(control_status_reg, xdev.base + XWT_TWCSR0_OFFSET); + + spin_unlock(&spinlock); +} + +static void xwdt_get_status(int *status) +{ + int new_status; + + spin_lock(&spinlock); + + control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET); + new_status = ((control_status_reg & + (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK)) != 0); + spin_unlock(&spinlock); + + *status = 0; + if (new_status & 1) + *status |= WDIOF_CARDRESET; +} + +static u32 xwdt_selftest(void) +{ + int i; + u32 timer_value1; + u32 timer_value2; + + spin_lock(&spinlock); + + timer_value1 = ioread32(xdev.base + XWT_TBR_OFFSET); + timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET); + + for (i = 0; + ((i <= XWT_MAX_SELFTEST_LOOP_COUNT) && + (timer_value2 == timer_value1)); i++) { + timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET); + } + + spin_unlock(&spinlock); + + if (timer_value2 != timer_value1) + return ~XWT_TIMER_FAILED; + else + return XWT_TIMER_FAILED; +} + +static int xwdt_open(struct inode *inode, struct file *file) +{ + /* Only one process can handle the wdt at a time */ + if (test_and_set_bit(0, &driver_open)) + return -EBUSY; + + /* Make sure that the module are always loaded...*/ + if (xdev.nowayout) + __module_get(THIS_MODULE); + + xwdt_start(); + printk(KERN_INFO PFX "Started...\n"); + + return nonseekable_open(inode, file); +} + +static int xwdt_release(struct inode *inode, struct file *file) +{ + if (expect_close == 42) { + xwdt_stop(); + } else { + printk(KERN_CRIT PFX + "Unexpected close, not stopping watchdog!\n"); + xwdt_keepalive(); + } + + clear_bit(0, &driver_open); + expect_close = 0; + return 0; +} + +/* + * xwdt_write: + * @file: file handle to the watchdog + * @buf: buffer to write (unused as data does not matter here + * @count: count of bytes + * @ppos: pointer to the position to write. No seeks allowed + * + * A write to a watchdog device is defined as a keepalive signal. Any + * write of data will do, as we don't define content meaning. + */ +static ssize_t xwdt_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + if (len) { + if (!xdev.nowayout) { + size_t i; + + /* In case it was set long ago */ + expect_close = 0; + + for (i = 0; i != len; i++) { + char c; + + if (get_user(c, buf + i)) + return -EFAULT; + if (c == 'V') + expect_close = 42; + } + } + xwdt_keepalive(); + } + return len; +} + +static const struct watchdog_info ident = { + .options = WDIOF_MAGICCLOSE | + WDIOF_KEEPALIVEPING, + .firmware_version = 1, + .identity = WATCHDOG_NAME, +}; + +/* + * xwdt_ioctl: + * @file: file handle to the device + * @cmd: watchdog command + * @arg: argument pointer + * + * The watchdog API defines a common set of functions for all watchdogs + * according to their available features. + */ +static long xwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int status; + + union { + struct watchdog_info __user *ident; + int __user *i; + } uarg; + + uarg.i = (int __user *)arg; + + switch (cmd) { + case WDIOC_GETSUPPORT: + return copy_to_user(uarg.ident, &ident, + sizeof(ident)) ? -EFAULT : 0; + + case WDIOC_GETBOOTSTATUS: + return put_user(xdev.boot_status, uarg.i); + + case WDIOC_GETSTATUS: + xwdt_get_status(&status); + return put_user(status, uarg.i); + + case WDIOC_KEEPALIVE: + xwdt_keepalive(); + return 0; + + case WDIOC_GETTIMEOUT: + if (no_timeout) + return -ENOTTY; + else + return put_user(timeout, uarg.i); + + default: + return -ENOTTY; + } +} + +static const struct file_operations xwdt_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .write = xwdt_write, + .open = xwdt_open, + .release = xwdt_release, + .unlocked_ioctl = xwdt_ioctl, +}; + +static struct miscdevice xwdt_miscdev = { + .minor = WATCHDOG_MINOR, + .name = "watchdog", + .fops = &xwdt_fops, +}; + +static int __devinit xwdt_probe(struct platform_device *pdev) +{ + int rc; + u32 *tmptr; + u32 *pfreq; + + no_timeout = 0; + + pfreq = (u32 *)of_get_property(pdev->dev.of_node->parent, + "clock-frequency", NULL); + + if (pfreq == NULL) { + printk(KERN_WARNING PFX + "The watchdog clock frequency cannot be obtained!\n"); + no_timeout = 1; + } + + rc = of_address_to_resource(pdev->dev.of_node, 0, &xdev.res); + if (rc) { + printk(KERN_WARNING PFX "invalid address!\n"); + return rc; + } + + tmptr = (u32 *)of_get_property(pdev->dev.of_node, + "xlnx,wdt-interval", NULL); + if (tmptr == NULL) { + printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-interval\"" + " not found in device tree!\n"); + no_timeout = 1; + } else { + xdev.wdt_interval = *tmptr; + } + + tmptr = (u32 *)of_get_property(pdev->dev.of_node, + "xlnx,wdt-enable-once", NULL); + if (tmptr == NULL) { + printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-enable-once\"" + " not found in device tree!\n"); + xdev.nowayout = WATCHDOG_NOWAYOUT; + } + +/* + * Twice of the 2^wdt_interval / freq because the first wdt overflow is + * ignored (interrupt), reset is only generated at second wdt overflow + */ + if (!no_timeout) + timeout = 2 * ((1<<xdev.wdt_interval) / *pfreq); + + if (!request_mem_region(xdev.res.start, + xdev.res.end - xdev.res.start + 1, WATCHDOG_NAME)) { + rc = -ENXIO; + printk(KERN_ERR PFX "memory request failure!\n"); + goto err_out; + } + + xdev.base = ioremap(xdev.res.start, xdev.res.end - xdev.res.start + 1); + if (xdev.base == NULL) { + rc = -ENOMEM; + printk(KERN_ERR PFX "ioremap failure!\n"); + goto release_mem; + } + + rc = xwdt_selftest(); + if (rc == XWT_TIMER_FAILED) { + printk(KERN_ERR PFX "SelfTest routine error!\n"); + goto unmap_io; + } + + xwdt_get_status(&xdev.boot_status); + + rc = misc_register(&xwdt_miscdev); + if (rc) { + printk(KERN_ERR PFX + "cannot register miscdev on minor=%d (err=%d)\n", + xwdt_miscdev.minor, rc); + goto unmap_io; + } + + if (no_timeout) + printk(KERN_INFO PFX + "driver loaded (timeout=? sec, nowayout=%d)\n", + xdev.nowayout); + else + printk(KERN_INFO PFX + "driver loaded (timeout=%d sec, nowayout=%d)\n", + timeout, xdev.nowayout); + + expect_close = 0; + clear_bit(0, &driver_open); + + return 0; + +unmap_io: + iounmap(xdev.base); +release_mem: + release_mem_region(xdev.res.start, resource_size(&xdev.res)); +err_out: + return rc; +} + +static int __devexit xwdt_remove(struct platform_device *dev) +{ + misc_deregister(&xwdt_miscdev); + iounmap(xdev.base); + release_mem_region(xdev.res.start, resource_size(&xdev.res)); + + return 0; +} + +/* Match table for of_platform binding */ +static struct of_device_id __devinitdata xwdt_of_match[] = { + { .compatible = "xlnx,xps-timebase-wdt-1.01.a", }, + {}, +}; +MODULE_DEVICE_TABLE(of, xwdt_of_match); + +static struct platform_driver xwdt_driver = { + .probe = xwdt_probe, + .remove = __devexit_p(xwdt_remove), + .driver = { + .owner = THIS_MODULE, + .name = WATCHDOG_NAME, + .of_match_table = xwdt_of_match, + }, +}; + +static int __init xwdt_init(void) +{ + return platform_driver_register(&xwdt_driver); +} + +static void __exit xwdt_exit(void) +{ + platform_driver_unregister(&xwdt_driver); +} + +module_init(xwdt_init); +module_exit(xwdt_exit); + +MODULE_AUTHOR("Alejandro Cabrera <aldaya@gmail.com>"); +MODULE_DESCRIPTION("Xilinx Watchdog driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c index b7c139051575..e78d89986768 100644 --- a/drivers/watchdog/pc87413_wdt.c +++ b/drivers/watchdog/pc87413_wdt.c @@ -56,6 +56,7 @@ #define IO_DEFAULT 0x2E /* Address used on Portwell Boards */ static int io = IO_DEFAULT; +static int swc_base_addr = -1; static int timeout = DEFAULT_TIMEOUT; /* timeout value */ static unsigned long timer_enabled; /* is the timer enabled? */ @@ -116,9 +117,8 @@ static inline void pc87413_enable_swc(void) /* Read SWC I/O base address */ -static inline unsigned int pc87413_get_swc_base(void) +static void pc87413_get_swc_base_addr(void) { - unsigned int swc_base_addr = 0; unsigned char addr_l, addr_h = 0; /* Step 3: Read SWC I/O Base Address */ @@ -136,12 +136,11 @@ static inline unsigned int pc87413_get_swc_base(void) "Read SWC I/O Base Address: low %d, high %d, res %d\n", addr_l, addr_h, swc_base_addr); #endif - return swc_base_addr; } /* Select Bank 3 of SWC */ -static inline void pc87413_swc_bank3(unsigned int swc_base_addr) +static inline void pc87413_swc_bank3(void) { /* Step 4: Select Bank3 of SWC */ outb_p(inb(swc_base_addr + 0x0f) | 0x03, swc_base_addr + 0x0f); @@ -152,8 +151,7 @@ static inline void pc87413_swc_bank3(unsigned int swc_base_addr) /* Set watchdog timeout to x minutes */ -static inline void pc87413_programm_wdto(unsigned int swc_base_addr, - char pc87413_time) +static inline void pc87413_programm_wdto(char pc87413_time) { /* Step 5: Programm WDTO, Twd. */ outb_p(pc87413_time, swc_base_addr + WDTO); @@ -164,7 +162,7 @@ static inline void pc87413_programm_wdto(unsigned int swc_base_addr, /* Enable WDEN */ -static inline void pc87413_enable_wden(unsigned int swc_base_addr) +static inline void pc87413_enable_wden(void) { /* Step 6: Enable WDEN */ outb_p(inb(swc_base_addr + WDCTL) | 0x01, swc_base_addr + WDCTL); @@ -174,7 +172,7 @@ static inline void pc87413_enable_wden(unsigned int swc_base_addr) } /* Enable SW_WD_TREN */ -static inline void pc87413_enable_sw_wd_tren(unsigned int swc_base_addr) +static inline void pc87413_enable_sw_wd_tren(void) { /* Enable SW_WD_TREN */ outb_p(inb(swc_base_addr + WDCFG) | 0x80, swc_base_addr + WDCFG); @@ -185,7 +183,7 @@ static inline void pc87413_enable_sw_wd_tren(unsigned int swc_base_addr) /* Disable SW_WD_TREN */ -static inline void pc87413_disable_sw_wd_tren(unsigned int swc_base_addr) +static inline void pc87413_disable_sw_wd_tren(void) { /* Disable SW_WD_TREN */ outb_p(inb(swc_base_addr + WDCFG) & 0x7f, swc_base_addr + WDCFG); @@ -196,7 +194,7 @@ static inline void pc87413_disable_sw_wd_tren(unsigned int swc_base_addr) /* Enable SW_WD_TRG */ -static inline void pc87413_enable_sw_wd_trg(unsigned int swc_base_addr) +static inline void pc87413_enable_sw_wd_trg(void) { /* Enable SW_WD_TRG */ outb_p(inb(swc_base_addr + WDCTL) | 0x80, swc_base_addr + WDCTL); @@ -207,7 +205,7 @@ static inline void pc87413_enable_sw_wd_trg(unsigned int swc_base_addr) /* Disable SW_WD_TRG */ -static inline void pc87413_disable_sw_wd_trg(unsigned int swc_base_addr) +static inline void pc87413_disable_sw_wd_trg(void) { /* Disable SW_WD_TRG */ outb_p(inb(swc_base_addr + WDCTL) & 0x7f, swc_base_addr + WDCTL); @@ -222,18 +220,13 @@ static inline void pc87413_disable_sw_wd_trg(unsigned int swc_base_addr) static void pc87413_enable(void) { - unsigned int swc_base_addr; - spin_lock(&io_lock); - pc87413_select_wdt_out(); - pc87413_enable_swc(); - swc_base_addr = pc87413_get_swc_base(); - pc87413_swc_bank3(swc_base_addr); - pc87413_programm_wdto(swc_base_addr, timeout); - pc87413_enable_wden(swc_base_addr); - pc87413_enable_sw_wd_tren(swc_base_addr); - pc87413_enable_sw_wd_trg(swc_base_addr); + pc87413_swc_bank3(); + pc87413_programm_wdto(timeout); + pc87413_enable_wden(); + pc87413_enable_sw_wd_tren(); + pc87413_enable_sw_wd_trg(); spin_unlock(&io_lock); } @@ -242,17 +235,12 @@ static void pc87413_enable(void) static void pc87413_disable(void) { - unsigned int swc_base_addr; - spin_lock(&io_lock); - pc87413_select_wdt_out(); - pc87413_enable_swc(); - swc_base_addr = pc87413_get_swc_base(); - pc87413_swc_bank3(swc_base_addr); - pc87413_disable_sw_wd_tren(swc_base_addr); - pc87413_disable_sw_wd_trg(swc_base_addr); - pc87413_programm_wdto(swc_base_addr, 0); + pc87413_swc_bank3(); + pc87413_disable_sw_wd_tren(); + pc87413_disable_sw_wd_trg(); + pc87413_programm_wdto(0); spin_unlock(&io_lock); } @@ -261,20 +249,15 @@ static void pc87413_disable(void) static void pc87413_refresh(void) { - unsigned int swc_base_addr; - spin_lock(&io_lock); - pc87413_select_wdt_out(); - pc87413_enable_swc(); - swc_base_addr = pc87413_get_swc_base(); - pc87413_swc_bank3(swc_base_addr); - pc87413_disable_sw_wd_tren(swc_base_addr); - pc87413_disable_sw_wd_trg(swc_base_addr); - pc87413_programm_wdto(swc_base_addr, timeout); - pc87413_enable_wden(swc_base_addr); - pc87413_enable_sw_wd_tren(swc_base_addr); - pc87413_enable_sw_wd_trg(swc_base_addr); + pc87413_swc_bank3(); + pc87413_disable_sw_wd_tren(); + pc87413_disable_sw_wd_trg(); + pc87413_programm_wdto(timeout); + pc87413_enable_wden(); + pc87413_enable_sw_wd_tren(); + pc87413_enable_sw_wd_trg(); spin_unlock(&io_lock); } @@ -528,7 +511,8 @@ static int __init pc87413_init(void) printk(KERN_INFO PFX "Version " VERSION " at io 0x%X\n", WDT_INDEX_IO_PORT); - /* request_region(io, 2, "pc87413"); */ + if (!request_muxed_region(io, 2, MODNAME)) + return -EBUSY; ret = register_reboot_notifier(&pc87413_notifier); if (ret != 0) { @@ -541,12 +525,32 @@ static int __init pc87413_init(void) printk(KERN_ERR PFX "cannot register miscdev on minor=%d (err=%d)\n", WATCHDOG_MINOR, ret); - unregister_reboot_notifier(&pc87413_notifier); - return ret; + goto reboot_unreg; } printk(KERN_INFO PFX "initialized. timeout=%d min \n", timeout); + + pc87413_select_wdt_out(); + pc87413_enable_swc(); + pc87413_get_swc_base_addr(); + + if (!request_region(swc_base_addr, 0x20, MODNAME)) { + printk(KERN_ERR PFX + "cannot request SWC region at 0x%x\n", swc_base_addr); + ret = -EBUSY; + goto misc_unreg; + } + pc87413_enable(); + + release_region(io, 2); return 0; + +misc_unreg: + misc_deregister(&pc87413_miscdev); +reboot_unreg: + unregister_reboot_notifier(&pc87413_notifier); + release_region(io, 2); + return ret; } /** @@ -569,7 +573,7 @@ static void __exit pc87413_exit(void) misc_deregister(&pc87413_miscdev); unregister_reboot_notifier(&pc87413_notifier); - /* release_region(io, 2); */ + release_region(swc_base_addr, 0x20); printk(KERN_INFO MODNAME " watchdog component driver removed.\n"); } diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index f7f5aa00df60..30da88f47cd3 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -589,6 +589,15 @@ static int s3c2410wdt_resume(struct platform_device *dev) #define s3c2410wdt_resume NULL #endif /* CONFIG_PM */ +#ifdef CONFIG_OF +static const struct of_device_id s3c2410_wdt_match[] = { + { .compatible = "samsung,s3c2410-wdt" }, + {}, +}; +MODULE_DEVICE_TABLE(of, s3c2410_wdt_match); +#else +#define s3c2410_wdt_match NULL +#endif static struct platform_driver s3c2410wdt_driver = { .probe = s3c2410wdt_probe, @@ -599,6 +608,7 @@ static struct platform_driver s3c2410wdt_driver = { .driver = { .owner = THIS_MODULE, .name = "s3c2410-wdt", + .of_match_table = s3c2410_wdt_match, }, }; diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c index c7cf4b01f58d..029467e34636 100644 --- a/drivers/watchdog/sch311x_wdt.c +++ b/drivers/watchdog/sch311x_wdt.c @@ -472,15 +472,10 @@ static void sch311x_wdt_shutdown(struct platform_device *dev) sch311x_wdt_stop(); } -#define sch311x_wdt_suspend NULL -#define sch311x_wdt_resume NULL - static struct platform_driver sch311x_wdt_driver = { .probe = sch311x_wdt_probe, .remove = __devexit_p(sch311x_wdt_remove), .shutdown = sch311x_wdt_shutdown, - .suspend = sch311x_wdt_suspend, - .resume = sch311x_wdt_resume, .driver = { .owner = THIS_MODULE, .name = DRV_NAME, diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index 0d80e08b6439..cc2cfbe33b30 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -134,6 +134,8 @@ static void wdt_enable(void) writel(INT_ENABLE | RESET_ENABLE, wdt->base + WDTCONTROL); writel(LOCK, wdt->base + WDTLOCK); + /* Flush posted writes. */ + readl(wdt->base + WDTLOCK); spin_unlock(&wdt->lock); } @@ -144,9 +146,10 @@ static void wdt_disable(void) writel(UNLOCK, wdt->base + WDTLOCK); writel(0, wdt->base + WDTCONTROL); - writel(0, wdt->base + WDTLOAD); writel(LOCK, wdt->base + WDTLOCK); + /* Flush posted writes. */ + readl(wdt->base + WDTLOCK); spin_unlock(&wdt->lock); } diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c new file mode 100644 index 000000000000..cfa1a1518aad --- /dev/null +++ b/drivers/watchdog/watchdog_core.c @@ -0,0 +1,111 @@ +/* + * watchdog_core.c + * + * (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>, + * All Rights Reserved. + * + * (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>. + * + * This source code is part of the generic code that can be used + * by all the watchdog timer drivers. + * + * Based on source code of the following authors: + * Matt Domsch <Matt_Domsch@dell.com>, + * Rob Radez <rob@osinvestor.com>, + * Rusty Lynch <rusty@linux.co.intel.com> + * Satyam Sharma <satyam@infradead.org> + * Randy Dunlap <randy.dunlap@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw. + * admit liability nor provide warranty for any of this software. + * This material is provided "AS-IS" and at no charge. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> /* For EXPORT_SYMBOL/module stuff/... */ +#include <linux/types.h> /* For standard types */ +#include <linux/errno.h> /* For the -ENODEV/... values */ +#include <linux/kernel.h> /* For printk/panic/... */ +#include <linux/watchdog.h> /* For watchdog specific items */ +#include <linux/init.h> /* For __init/__exit/... */ + +#include "watchdog_dev.h" /* For watchdog_dev_register/... */ + +/** + * watchdog_register_device() - register a watchdog device + * @wdd: watchdog device + * + * Register a watchdog device with the kernel so that the + * watchdog timer can be accessed from userspace. + * + * A zero is returned on success and a negative errno code for + * failure. + */ +int watchdog_register_device(struct watchdog_device *wdd) +{ + int ret; + + if (wdd == NULL || wdd->info == NULL || wdd->ops == NULL) + return -EINVAL; + + /* Mandatory operations need to be supported */ + if (wdd->ops->start == NULL || wdd->ops->stop == NULL) + return -EINVAL; + + /* + * Check that we have valid min and max timeout values, if + * not reset them both to 0 (=not used or unknown) + */ + if (wdd->min_timeout > wdd->max_timeout) { + pr_info("Invalid min and max timeout values, resetting to 0!\n"); + wdd->min_timeout = 0; + wdd->max_timeout = 0; + } + + /* + * Note: now that all watchdog_device data has been verified, we + * will not check this anymore in other functions. If data gets + * corrupted in a later stage then we expect a kernel panic! + */ + + /* We only support 1 watchdog device via the /dev/watchdog interface */ + ret = watchdog_dev_register(wdd); + if (ret) { + pr_err("error registering /dev/watchdog (err=%d).\n", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(watchdog_register_device); + +/** + * watchdog_unregister_device() - unregister a watchdog device + * @wdd: watchdog device to unregister + * + * Unregister a watchdog device that was previously successfully + * registered with watchdog_register_device(). + */ +void watchdog_unregister_device(struct watchdog_device *wdd) +{ + int ret; + + if (wdd == NULL) + return; + + ret = watchdog_dev_unregister(wdd); + if (ret) + pr_err("error unregistering /dev/watchdog (err=%d).\n", ret); +} +EXPORT_SYMBOL_GPL(watchdog_unregister_device); + +MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>"); +MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>"); +MODULE_DESCRIPTION("WatchDog Timer Driver Core"); +MODULE_LICENSE("GPL"); diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c new file mode 100644 index 000000000000..d33520d0b4c9 --- /dev/null +++ b/drivers/watchdog/watchdog_dev.c @@ -0,0 +1,395 @@ +/* + * watchdog_dev.c + * + * (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>, + * All Rights Reserved. + * + * (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>. + * + * + * This source code is part of the generic code that can be used + * by all the watchdog timer drivers. + * + * This part of the generic code takes care of the following + * misc device: /dev/watchdog. + * + * Based on source code of the following authors: + * Matt Domsch <Matt_Domsch@dell.com>, + * Rob Radez <rob@osinvestor.com>, + * Rusty Lynch <rusty@linux.co.intel.com> + * Satyam Sharma <satyam@infradead.org> + * Randy Dunlap <randy.dunlap@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw. + * admit liability nor provide warranty for any of this software. + * This material is provided "AS-IS" and at no charge. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> /* For module stuff/... */ +#include <linux/types.h> /* For standard types (like size_t) */ +#include <linux/errno.h> /* For the -ENODEV/... values */ +#include <linux/kernel.h> /* For printk/panic/... */ +#include <linux/fs.h> /* For file operations */ +#include <linux/watchdog.h> /* For watchdog specific items */ +#include <linux/miscdevice.h> /* For handling misc devices */ +#include <linux/init.h> /* For __init/__exit/... */ +#include <linux/uaccess.h> /* For copy_to_user/put_user/... */ + +/* make sure we only register one /dev/watchdog device */ +static unsigned long watchdog_dev_busy; +/* the watchdog device behind /dev/watchdog */ +static struct watchdog_device *wdd; + +/* + * watchdog_ping: ping the watchdog. + * @wddev: the watchdog device to ping + * + * If the watchdog has no own ping operation then it needs to be + * restarted via the start operation. This wrapper function does + * exactly that. + * We only ping when the watchdog device is running. + */ + +static int watchdog_ping(struct watchdog_device *wddev) +{ + if (test_bit(WDOG_ACTIVE, &wdd->status)) { + if (wddev->ops->ping) + return wddev->ops->ping(wddev); /* ping the watchdog */ + else + return wddev->ops->start(wddev); /* restart watchdog */ + } + return 0; +} + +/* + * watchdog_start: wrapper to start the watchdog. + * @wddev: the watchdog device to start + * + * Start the watchdog if it is not active and mark it active. + * This function returns zero on success or a negative errno code for + * failure. + */ + +static int watchdog_start(struct watchdog_device *wddev) +{ + int err; + + if (!test_bit(WDOG_ACTIVE, &wdd->status)) { + err = wddev->ops->start(wddev); + if (err < 0) + return err; + + set_bit(WDOG_ACTIVE, &wdd->status); + } + return 0; +} + +/* + * watchdog_stop: wrapper to stop the watchdog. + * @wddev: the watchdog device to stop + * + * Stop the watchdog if it is still active and unmark it active. + * This function returns zero on success or a negative errno code for + * failure. + * If the 'nowayout' feature was set, the watchdog cannot be stopped. + */ + +static int watchdog_stop(struct watchdog_device *wddev) +{ + int err = -EBUSY; + + if (test_bit(WDOG_NO_WAY_OUT, &wdd->status)) { + pr_info("%s: nowayout prevents watchdog to be stopped!\n", + wdd->info->identity); + return err; + } + + if (test_bit(WDOG_ACTIVE, &wdd->status)) { + err = wddev->ops->stop(wddev); + if (err < 0) + return err; + + clear_bit(WDOG_ACTIVE, &wdd->status); + } + return 0; +} + +/* + * watchdog_write: writes to the watchdog. + * @file: file from VFS + * @data: user address of data + * @len: length of data + * @ppos: pointer to the file offset + * + * A write to a watchdog device is defined as a keepalive ping. + * Writing the magic 'V' sequence allows the next close to turn + * off the watchdog (if 'nowayout' is not set). + */ + +static ssize_t watchdog_write(struct file *file, const char __user *data, + size_t len, loff_t *ppos) +{ + size_t i; + char c; + + if (len == 0) + return 0; + + /* + * Note: just in case someone wrote the magic character + * five months ago... + */ + clear_bit(WDOG_ALLOW_RELEASE, &wdd->status); + + /* scan to see whether or not we got the magic character */ + for (i = 0; i != len; i++) { + if (get_user(c, data + i)) + return -EFAULT; + if (c == 'V') + set_bit(WDOG_ALLOW_RELEASE, &wdd->status); + } + + /* someone wrote to us, so we send the watchdog a keepalive ping */ + watchdog_ping(wdd); + + return len; +} + +/* + * watchdog_ioctl: handle the different ioctl's for the watchdog device. + * @file: file handle to the device + * @cmd: watchdog command + * @arg: argument pointer + * + * The watchdog API defines a common set of functions for all watchdogs + * according to their available features. + */ + +static long watchdog_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + int __user *p = argp; + unsigned int val; + int err; + + if (wdd->ops->ioctl) { + err = wdd->ops->ioctl(wdd, cmd, arg); + if (err != -ENOIOCTLCMD) + return err; + } + + switch (cmd) { + case WDIOC_GETSUPPORT: + return copy_to_user(argp, wdd->info, + sizeof(struct watchdog_info)) ? -EFAULT : 0; + case WDIOC_GETSTATUS: + val = wdd->ops->status ? wdd->ops->status(wdd) : 0; + return put_user(val, p); + case WDIOC_GETBOOTSTATUS: + return put_user(wdd->bootstatus, p); + case WDIOC_SETOPTIONS: + if (get_user(val, p)) + return -EFAULT; + if (val & WDIOS_DISABLECARD) { + err = watchdog_stop(wdd); + if (err < 0) + return err; + } + if (val & WDIOS_ENABLECARD) { + err = watchdog_start(wdd); + if (err < 0) + return err; + } + return 0; + case WDIOC_KEEPALIVE: + if (!(wdd->info->options & WDIOF_KEEPALIVEPING)) + return -EOPNOTSUPP; + watchdog_ping(wdd); + return 0; + case WDIOC_SETTIMEOUT: + if ((wdd->ops->set_timeout == NULL) || + !(wdd->info->options & WDIOF_SETTIMEOUT)) + return -EOPNOTSUPP; + if (get_user(val, p)) + return -EFAULT; + if ((wdd->max_timeout != 0) && + (val < wdd->min_timeout || val > wdd->max_timeout)) + return -EINVAL; + err = wdd->ops->set_timeout(wdd, val); + if (err < 0) + return err; + wdd->timeout = val; + /* If the watchdog is active then we send a keepalive ping + * to make sure that the watchdog keep's running (and if + * possible that it takes the new timeout) */ + watchdog_ping(wdd); + /* Fall */ + case WDIOC_GETTIMEOUT: + /* timeout == 0 means that we don't know the timeout */ + if (wdd->timeout == 0) + return -EOPNOTSUPP; + return put_user(wdd->timeout, p); + default: + return -ENOTTY; + } +} + +/* + * watchdog_open: open the /dev/watchdog device. + * @inode: inode of device + * @file: file handle to device + * + * When the /dev/watchdog device gets opened, we start the watchdog. + * Watch out: the /dev/watchdog device is single open, so we make sure + * it can only be opened once. + */ + +static int watchdog_open(struct inode *inode, struct file *file) +{ + int err = -EBUSY; + + /* the watchdog is single open! */ + if (test_and_set_bit(WDOG_DEV_OPEN, &wdd->status)) + return -EBUSY; + + /* + * If the /dev/watchdog device is open, we don't want the module + * to be unloaded. + */ + if (!try_module_get(wdd->ops->owner)) + goto out; + + err = watchdog_start(wdd); + if (err < 0) + goto out_mod; + + /* dev/watchdog is a virtual (and thus non-seekable) filesystem */ + return nonseekable_open(inode, file); + +out_mod: + module_put(wdd->ops->owner); +out: + clear_bit(WDOG_DEV_OPEN, &wdd->status); + return err; +} + +/* + * watchdog_release: release the /dev/watchdog device. + * @inode: inode of device + * @file: file handle to device + * + * This is the code for when /dev/watchdog gets closed. We will only + * stop the watchdog when we have received the magic char (and nowayout + * was not set), else the watchdog will keep running. + */ + +static int watchdog_release(struct inode *inode, struct file *file) +{ + int err = -EBUSY; + + /* + * We only stop the watchdog if we received the magic character + * or if WDIOF_MAGICCLOSE is not set. If nowayout was set then + * watchdog_stop will fail. + */ + if (test_and_clear_bit(WDOG_ALLOW_RELEASE, &wdd->status) || + !(wdd->info->options & WDIOF_MAGICCLOSE)) + err = watchdog_stop(wdd); + + /* If the watchdog was not stopped, send a keepalive ping */ + if (err < 0) { + pr_crit("%s: watchdog did not stop!\n", wdd->info->identity); + watchdog_ping(wdd); + } + + /* Allow the owner module to be unloaded again */ + module_put(wdd->ops->owner); + + /* make sure that /dev/watchdog can be re-opened */ + clear_bit(WDOG_DEV_OPEN, &wdd->status); + + return 0; +} + +static const struct file_operations watchdog_fops = { + .owner = THIS_MODULE, + .write = watchdog_write, + .unlocked_ioctl = watchdog_ioctl, + .open = watchdog_open, + .release = watchdog_release, +}; + +static struct miscdevice watchdog_miscdev = { + .minor = WATCHDOG_MINOR, + .name = "watchdog", + .fops = &watchdog_fops, +}; + +/* + * watchdog_dev_register: + * @watchdog: watchdog device + * + * Register a watchdog device as /dev/watchdog. /dev/watchdog + * is actually a miscdevice and thus we set it up like that. + */ + +int watchdog_dev_register(struct watchdog_device *watchdog) +{ + int err; + + /* Only one device can register for /dev/watchdog */ + if (test_and_set_bit(0, &watchdog_dev_busy)) { + pr_err("only one watchdog can use /dev/watchdog.\n"); + return -EBUSY; + } + + wdd = watchdog; + + err = misc_register(&watchdog_miscdev); + if (err != 0) { + pr_err("%s: cannot register miscdev on minor=%d (err=%d).\n", + watchdog->info->identity, WATCHDOG_MINOR, err); + goto out; + } + + return 0; + +out: + wdd = NULL; + clear_bit(0, &watchdog_dev_busy); + return err; +} + +/* + * watchdog_dev_unregister: + * @watchdog: watchdog device + * + * Deregister the /dev/watchdog device. + */ + +int watchdog_dev_unregister(struct watchdog_device *watchdog) +{ + /* Check that a watchdog device was registered in the past */ + if (!test_bit(0, &watchdog_dev_busy) || !wdd) + return -ENODEV; + + /* We can only unregister the watchdog device that was registered */ + if (watchdog != wdd) { + pr_err("%s: watchdog was not registered as /dev/watchdog.\n", + watchdog->info->identity); + return -ENODEV; + } + + misc_deregister(&watchdog_miscdev); + wdd = NULL; + clear_bit(0, &watchdog_dev_busy); + return 0; +} diff --git a/drivers/watchdog/watchdog_dev.h b/drivers/watchdog/watchdog_dev.h new file mode 100644 index 000000000000..bc7612be25ce --- /dev/null +++ b/drivers/watchdog/watchdog_dev.h @@ -0,0 +1,33 @@ +/* + * watchdog_core.h + * + * (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>, + * All Rights Reserved. + * + * (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>. + * + * This source code is part of the generic code that can be used + * by all the watchdog timer drivers. + * + * Based on source code of the following authors: + * Matt Domsch <Matt_Domsch@dell.com>, + * Rob Radez <rob@osinvestor.com>, + * Rusty Lynch <rusty@linux.co.intel.com> + * Satyam Sharma <satyam@infradead.org> + * Randy Dunlap <randy.dunlap@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw. + * admit liability nor provide warranty for any of this software. + * This material is provided "AS-IS" and at no charge. + */ + +/* + * Functions/procedures to be called by the core + */ +int watchdog_dev_register(struct watchdog_device *); +int watchdog_dev_unregister(struct watchdog_device *); |