415 files changed, 29586 insertions, 8121 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 320006019e68..92ed9692c47e 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -266,7 +266,8 @@ config ACPI_CUSTOM_DSDT
 	default ACPI_CUSTOM_DSDT_FILE != ""
 
 config ACPI_INITRD_TABLE_OVERRIDE
-	bool "ACPI tables can be passed via uncompressed cpio in initrd"
+	bool "ACPI tables override via initrd"
+	depends on BLK_DEV_INITRD && X86
 	default n
 	help
 	  This option provides functionality to override arbitrary ACPI tables
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 7ae2750bb457..d668a8ae602b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -48,8 +48,8 @@
 #include <linux/genalloc.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
-#include <acpi/apei.h>
-#include <acpi/hed.h>
+
+#include <acpi/ghes.h>
 #include <asm/mce.h>
 #include <asm/tlbflush.h>
 #include <asm/nmi.h>
@@ -84,42 +84,6 @@
 	((struct acpi_hest_generic_status *)				\
 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
 
-/*
- * One struct ghes is created for each generic hardware error source.
- * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
- * handler.
- *
- * estatus: memory buffer for error status block, allocated during
- * HEST parsing.
- */
-#define GHES_TO_CLEAR		0x0001
-#define GHES_EXITING		0x0002
-
-struct ghes {
-	struct acpi_hest_generic *generic;
-	struct acpi_hest_generic_status *estatus;
-	u64 buffer_paddr;
-	unsigned long flags;
-	union {
-		struct list_head list;
-		struct timer_list timer;
-		unsigned int irq;
-	};
-};
-
-struct ghes_estatus_node {
-	struct llist_node llnode;
-	struct acpi_hest_generic *generic;
-};
-
-struct ghes_estatus_cache {
-	u32 estatus_len;
-	atomic_t count;
-	struct acpi_hest_generic *generic;
-	unsigned long long time_in;
-	struct rcu_head rcu;
-};
-
 bool ghes_disable;
 module_param_named(disable, ghes_disable, bool, 0);
 
@@ -333,13 +297,6 @@ static void ghes_fini(struct ghes *ghes)
 	apei_unmap_generic_address(&ghes->generic->error_status_address);
 }
 
-enum {
-	GHES_SEV_NO = 0x0,
-	GHES_SEV_CORRECTED = 0x1,
-	GHES_SEV_RECOVERABLE = 0x2,
-	GHES_SEV_PANIC = 0x3,
-};
-
 static inline int ghes_severity(int severity)
 {
 	switch (severity) {
@@ -452,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
 	ghes->flags &= ~GHES_TO_CLEAR;
 }
 
-static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
+static void ghes_do_proc(struct ghes *ghes,
+			 const struct acpi_hest_generic_status *estatus)
 {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
@@ -464,6 +422,8 @@ static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
 				 CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
 			mem_err = (struct cper_sec_mem_err *)(gdata+1);
+			ghes_edac_report_mem_error(ghes, sev, mem_err);
+
 #ifdef CONFIG_X86_MCE
 			apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
 						  mem_err);
@@ -682,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
 		if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
 			ghes_estatus_cache_add(ghes->generic, ghes->estatus);
 	}
-	ghes_do_proc(ghes->estatus);
+	ghes_do_proc(ghes, ghes->estatus);
 out:
 	ghes_clear_estatus(ghes);
 	return 0;
@@ -775,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 		len = apei_estatus_len(estatus);
 		node_len = GHES_ESTATUS_NODE_LEN(len);
-		ghes_do_proc(estatus);
+		ghes_do_proc(estatus_node->ghes, estatus);
 		if (!ghes_estatus_cached(estatus)) {
 			generic = estatus_node->generic;
 			if (ghes_print_estatus(NULL, generic, estatus))
@@ -864,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 		estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
 						      node_len);
 		if (estatus_node) {
+			estatus_node->ghes = ghes;
 			estatus_node->generic = ghes->generic;
 			estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 			memcpy(estatus, ghes->estatus, len);
@@ -942,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
 		ghes = NULL;
 		goto err;
 	}
+
+	rc = ghes_edac_register(ghes, &ghes_dev->dev);
+	if (rc < 0)
+		goto err;
+
 	switch (generic->notify.type) {
 	case ACPI_HEST_NOTIFY_POLLED:
 		ghes->timer.function = ghes_poll_func;
@@ -954,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
 		if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) {
 			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
 			       generic->header.source_id);
-			goto err;
+			goto err_edac_unreg;
 		}
 		if (request_irq(ghes->irq, ghes_irq_func,
 				0, "GHES IRQ", ghes)) {
 			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
 			       generic->header.source_id);
-			goto err;
+			goto err_edac_unreg;
 		}
 		break;
 	case ACPI_HEST_NOTIFY_SCI:
@@ -986,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev)
 	platform_set_drvdata(ghes_dev, ghes);
 
 	return 0;
+err_edac_unreg:
+	ghes_edac_unregister(ghes);
 err:
 	if (ghes) {
 		ghes_fini(ghes);
@@ -1038,6 +1006,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
 	}
 
 	ghes_fini(ghes);
+
+	ghes_edac_unregister(ghes);
+
 	kfree(ghes);
 
 	platform_set_drvdata(ghes_dev, NULL);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 59844ee149be..33e609f63585 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id,
 					    handler, max_entries);
 }
 
-static int srat_mem_cnt;
-
-void __init early_parse_srat(void)
+int __init acpi_numa_init(void)
 {
+	int cnt = 0;
+
 	/*
 	 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
 	 * SRAT cpu entries could have different order with that in MADT.
@@ -295,24 +295,21 @@ void __init early_parse_srat(void)
 	/* SRAT: Static Resource Affinity Table */
 	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
-				      acpi_parse_x2apic_affinity, 0);
+				     acpi_parse_x2apic_affinity, 0);
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
-				      acpi_parse_processor_affinity, 0);
-		srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
-						     acpi_parse_memory_affinity,
-						     NR_NODE_MEMBLKS);
+				     acpi_parse_processor_affinity, 0);
+		cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
+					    acpi_parse_memory_affinity,
+					    NR_NODE_MEMBLKS);
 	}
-}
 
-int __init acpi_numa_init(void)
-{
 	/* SLIT: System Locality Information Table */
 	acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
 
 	acpi_numa_arch_fixup();
 
-	if (srat_mem_cnt < 0)
-		return srat_mem_cnt;
+	if (cnt < 0)
+		return cnt;
 	else if (!parsed_numa_memblks)
 		return -ENOENT;
 	return 0;
diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c
index ab92785f54dc..093c43554963 100644
--- a/drivers/amba/tegra-ahb.c
+++ b/drivers/amba/tegra-ahb.c
@@ -130,7 +130,7 @@ static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset)
 	writel(value, ahb->regs + offset);
 }
 
-#ifdef CONFIG_ARCH_TEGRA_3x_SOC
+#ifdef CONFIG_TEGRA_IOMMU_SMMU
 static int tegra_ahb_match_by_smmu(struct device *dev, void *data)
 {
 	struct tegra_ahb *ahb = dev_get_drvdata(dev);
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index b22d71cac54c..0e3f8f9dcd29 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -157,7 +157,6 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg)
 {
 	struct atm_cirange ci;
 	struct atm_vcc *vcc;
-	struct hlist_node *node;
 	struct sock *s;
 	int i;
 
@@ -171,7 +170,7 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg)
 	for(i = 0; i < VCC_HTABLE_SIZE; ++i) {
 		struct hlist_head *head = &vcc_hash[i];
 
-		sk_for_each(s, node, head) {
+		sk_for_each(s, head) {
 			vcc = atm_sk(s);
 			if (vcc->dev != dev)
 				continue;
@@ -264,12 +263,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci)
 {
         struct hlist_head *head;
         struct atm_vcc *vcc;
-        struct hlist_node *node;
         struct sock *s;
 
         head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)];
 
-        sk_for_each(s, node, head) {
+	sk_for_each(s, head) {
                 vcc = atm_sk(s);
                 if (vcc->dev == dev &&
                     vcc->vci == vci && vcc->vpi == vpi &&
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index c1eb6fa8ac35..b1955ba40d63 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -2093,7 +2093,6 @@ static unsigned char eni_phy_get(struct atm_dev *dev,unsigned long addr)
 
 static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page)
 {
-	struct hlist_node *node;
 	struct sock *s;
 	static const char *signal[] = { "LOST","unknown","okay" };
 	struct eni_dev *eni_dev = ENI_DEV(dev);
@@ -2171,7 +2170,7 @@ static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page)
 	for(i = 0; i < VCC_HTABLE_SIZE; ++i) {
 		struct hlist_head *head = &vcc_hash[i];
 
-		sk_for_each(s, node, head) {
+		sk_for_each(s, head) {
 			struct eni_vcc *eni_vcc;
 			int length;
 
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 72b6960fa95f..d6891267f5bb 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -329,7 +329,6 @@ __find_vcc(struct he_dev *he_dev, unsigned cid)
 {
 	struct hlist_head *head;
 	struct atm_vcc *vcc;
-	struct hlist_node *node;
 	struct sock *s;
 	short vpi;
 	int vci;
@@ -338,7 +337,7 @@ __find_vcc(struct he_dev *he_dev, unsigned cid)
 	vci = cid & ((1 << he_dev->vcibits) - 1);
 	head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)];
 
-	sk_for_each(s, node, head) {
+	sk_for_each(s, head) {
 		vcc = atm_sk(s);
 		if (vcc->dev == he_dev->atm_dev &&
 		    vcc->vci == vci && vcc->vpi == vpi &&
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index ed1d2b7f923b..6587dc295eb0 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -251,7 +251,6 @@ static void nicstar_remove_one(struct pci_dev *pcidev)
 		if (card->scd2vc[j] != NULL)
 			free_scq(card, card->scd2vc[j]->scq, card->scd2vc[j]->tx_vcc);
 	}
-	idr_remove_all(&card->idr);
 	idr_destroy(&card->idr);
 	pci_free_consistent(card->pcidev, NS_RSQSIZE + NS_RSQ_ALIGNMENT,
 			    card->rsq.org, card->rsq.dma);
@@ -950,11 +949,10 @@ static void free_scq(ns_dev *card, scq_info *scq, struct atm_vcc *vcc)
 static void push_rxbufs(ns_dev * card, struct sk_buff *skb)
 {
 	struct sk_buff *handle1, *handle2;
-	u32 id1 = 0, id2 = 0;
+	int id1, id2;
 	u32 addr1, addr2;
 	u32 stat;
 	unsigned long flags;
-	int err;
 
 	/* *BARF* */
 	handle2 = NULL;
@@ -1027,23 +1025,12 @@ static void push_rxbufs(ns_dev * card, struct sk_buff *skb)
 				card->lbfqc += 2;
 		}
 
-		do {
-			if (!idr_pre_get(&card->idr, GFP_ATOMIC)) {
-				printk(KERN_ERR
-				       "nicstar%d: no free memory for idr\n",
-				       card->index);
-				goto out;
-			}
-
-			if (!id1)
-				err = idr_get_new_above(&card->idr, handle1, 0, &id1);
-
-			if (!id2 && err == 0)
-				err = idr_get_new_above(&card->idr, handle2, 0, &id2);
-
-		} while (err == -EAGAIN);
+		id1 = idr_alloc(&card->idr, handle1, 0, 0, GFP_ATOMIC);
+		if (id1 < 0)
+			goto out;
 
-		if (err)
+		id2 = idr_alloc(&card->idr, handle2, 0, 0, GFP_ATOMIC);
+		if (id2 < 0)
 			goto out;
 
 		spin_lock_irqsave(&card->res_lock, flags);
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 0474a89170b9..32784d18d1f7 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -896,12 +896,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci)
 {
 	struct hlist_head *head;
 	struct atm_vcc *vcc = NULL;
-	struct hlist_node *node;
 	struct sock *s;
 
 	read_lock(&vcc_sklist_lock);
 	head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)];
-	sk_for_each(s, node, head) {
+	sk_for_each(s, head) {
 		vcc = atm_sk(s);
 		if (vcc->dev == dev && vcc->vci == vci &&
 		    vcc->vpi == vpi && vcc->qos.rxtp.traffic_class != ATM_NONE &&
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
index ff5b745c4705..2a7cb0df176b 100644
--- a/drivers/base/dma-buf.c
+++ b/drivers/base/dma-buf.c
@@ -39,6 +39,8 @@ static int dma_buf_release(struct inode *inode, struct file *file)
 
 	dmabuf = file->private_data;
 
+	BUG_ON(dmabuf->vmapping_counter);
+
 	dmabuf->ops->release(dmabuf);
 	kfree(dmabuf);
 	return 0;
@@ -445,6 +447,9 @@ EXPORT_SYMBOL_GPL(dma_buf_kunmap);
 int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
 		 unsigned long pgoff)
 {
+	struct file *oldfile;
+	int ret;
+
 	if (WARN_ON(!dmabuf || !vma))
 		return -EINVAL;
 
@@ -458,14 +463,22 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
 		return -EINVAL;
 
 	/* readjust the vma */
-	if (vma->vm_file)
-		fput(vma->vm_file);
-
-	vma->vm_file = get_file(dmabuf->file);
-
+	get_file(dmabuf->file);
+	oldfile = vma->vm_file;
+	vma->vm_file = dmabuf->file;
 	vma->vm_pgoff = pgoff;
 
-	return dmabuf->ops->mmap(dmabuf, vma);
+	ret = dmabuf->ops->mmap(dmabuf, vma);
+	if (ret) {
+		/* restore old parameters on failure */
+		vma->vm_file = oldfile;
+		fput(dmabuf->file);
+	} else {
+		if (oldfile)
+			fput(oldfile);
+	}
+	return ret;
+
 }
 EXPORT_SYMBOL_GPL(dma_buf_mmap);
 
@@ -481,12 +494,34 @@ EXPORT_SYMBOL_GPL(dma_buf_mmap);
  */
 void *dma_buf_vmap(struct dma_buf *dmabuf)
 {
+	void *ptr;
+
 	if (WARN_ON(!dmabuf))
 		return NULL;
 
-	if (dmabuf->ops->vmap)
-		return dmabuf->ops->vmap(dmabuf);
-	return NULL;
+	if (!dmabuf->ops->vmap)
+		return NULL;
+
+	mutex_lock(&dmabuf->lock);
+	if (dmabuf->vmapping_counter) {
+		dmabuf->vmapping_counter++;
+		BUG_ON(!dmabuf->vmap_ptr);
+		ptr = dmabuf->vmap_ptr;
+		goto out_unlock;
+	}
+
+	BUG_ON(dmabuf->vmap_ptr);
+
+	ptr = dmabuf->ops->vmap(dmabuf);
+	if (IS_ERR_OR_NULL(ptr))
+		goto out_unlock;
+
+	dmabuf->vmap_ptr = ptr;
+	dmabuf->vmapping_counter = 1;
+
+out_unlock:
+	mutex_unlock(&dmabuf->lock);
+	return ptr;
 }
 EXPORT_SYMBOL_GPL(dma_buf_vmap);
 
@@ -500,7 +535,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
 	if (WARN_ON(!dmabuf))
 		return;
 
-	if (dmabuf->ops->vunmap)
-		dmabuf->ops->vunmap(dmabuf, vaddr);
+	BUG_ON(!dmabuf->vmap_ptr);
+	BUG_ON(dmabuf->vmapping_counter == 0);
+	BUG_ON(dmabuf->vmap_ptr != vaddr);
+
+	mutex_lock(&dmabuf->lock);
+	if (--dmabuf->vmapping_counter == 0) {
+		if (dmabuf->ops->vunmap)
+			dmabuf->ops->vunmap(dmabuf, vaddr);
+		dmabuf->vmap_ptr = NULL;
+	}
+	mutex_unlock(&dmabuf->lock);
 }
 EXPORT_SYMBOL_GPL(dma_buf_vunmap);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 8f12dc78a848..5b5ee79ff236 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -7054,6 +7054,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
 	else
 		ErrorCode =  0;
       }
+      break;
       default:
 	ErrorCode = -ENOTTY;
     }
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 824e09c4d0d7..5dc0daed8fac 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -63,19 +63,6 @@ config AMIGA_Z2RAM
 	  To compile this driver as a module, choose M here: the
 	  module will be called z2ram.
 
-config BLK_DEV_XD
-	tristate "XT hard disk support"
-	depends on ISA && ISA_DMA_API
-	select CHECK_SIGNATURE
-	help
-	  Very old 8 bit hard disk controllers used in the IBM XT computer
-	  will be supported if you say Y here.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called xd.
-
-	  It's pretty unlikely that you have one of these: say N.
-
 config GDROM
 	tristate "SEGA Dreamcast GD-ROM drive"
 	depends on SH_DREAMCAST
@@ -544,4 +531,14 @@ config BLK_DEV_RBD
 
 	  If unsure, say N.
 
+config BLK_DEV_RSXX
+	tristate "RamSam PCIe Flash SSD Device Driver"
+	depends on PCI
+	help
+	  Device driver for IBM's high speed PCIe SSD
+	  storage devices: RamSan-70 and RamSan-80.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called rsxx.
+
 endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 17e82df3df74..a3b40232c6ab 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY)	+= ataflop.o
 obj-$(CONFIG_AMIGA_Z2RAM)	+= z2ram.o
 obj-$(CONFIG_BLK_DEV_RAM)	+= brd.o
 obj-$(CONFIG_BLK_DEV_LOOP)	+= loop.o
-obj-$(CONFIG_BLK_DEV_XD)	+= xd.o
 obj-$(CONFIG_BLK_CPQ_DA)	+= cpqarray.o
 obj-$(CONFIG_BLK_CPQ_CISS_DA)  += cciss.o
 obj-$(CONFIG_BLK_DEV_DAC960)	+= DAC960.o
@@ -41,4 +40,6 @@ obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/
 obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o
 obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX)	+= mtip32xx/
 
+obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
+
 swim_mod-y	:= swim.o swim_asm.o
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 8c13eeb83c53..e98da675f0c1 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2660,25 +2660,24 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
 	mdev->read_requests = RB_ROOT;
 	mdev->write_requests = RB_ROOT;
 
-	if (!idr_pre_get(&minors, GFP_KERNEL))
-		goto out_no_minor_idr;
-	if (idr_get_new_above(&minors, mdev, minor, &minor_got))
+	minor_got = idr_alloc(&minors, mdev, minor, minor + 1, GFP_KERNEL);
+	if (minor_got < 0) {
+		if (minor_got == -ENOSPC) {
+			err = ERR_MINOR_EXISTS;
+			drbd_msg_put_info("requested minor exists already");
+		}
 		goto out_no_minor_idr;
-	if (minor_got != minor) {
-		err = ERR_MINOR_EXISTS;
-		drbd_msg_put_info("requested minor exists already");
-		goto out_idr_remove_minor;
 	}
 
-	if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
-		goto out_idr_remove_minor;
-	if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got))
+	vnr_got = idr_alloc(&tconn->volumes, mdev, vnr, vnr + 1, GFP_KERNEL);
+	if (vnr_got < 0) {
+		if (vnr_got == -ENOSPC) {
+			err = ERR_INVALID_REQUEST;
+			drbd_msg_put_info("requested volume exists already");
+		}
 		goto out_idr_remove_minor;
-	if (vnr_got != vnr) {
-		err = ERR_INVALID_REQUEST;
-		drbd_msg_put_info("requested volume exists already");
-		goto out_idr_remove_vol;
 	}
+
 	add_disk(disk);
 	kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */
 
@@ -2689,8 +2688,6 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
 
 	return NO_ERROR;
 
-out_idr_remove_vol:
-	idr_remove(&tconn->volumes, vnr_got);
 out_idr_remove_minor:
 	idr_remove(&minors, minor_got);
 	synchronize_rcu();
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 8031a8cdd698..747bb2af69dc 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -162,12 +162,13 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
 
 static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file)
 {
-	loff_t size, loopsize;
+	loff_t loopsize;
 
 	/* Compute loopsize in bytes */
-	size = i_size_read(file->f_mapping->host);
-	loopsize = size - offset;
-	/* offset is beyond i_size, wierd but possible */
+	loopsize = i_size_read(file->f_mapping->host);
+	if (offset > 0)
+		loopsize -= offset;
+	/* offset is beyond i_size, weird but possible */
 	if (loopsize < 0)
 		return 0;
 
@@ -190,6 +191,7 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
 {
 	loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
 	sector_t x = (sector_t)size;
+	struct block_device *bdev = lo->lo_device;
 
 	if (unlikely((loff_t)x != size))
 		return -EFBIG;
@@ -198,6 +200,9 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
 	if (lo->lo_sizelimit != sizelimit)
 		lo->lo_sizelimit = sizelimit;
 	set_capacity(lo->lo_disk, x);
+	bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
+	/* let user-space know about the new size */
+	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
 	return 0;
 }
 
@@ -1091,10 +1096,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 		return err;
 
 	if (lo->lo_offset != info->lo_offset ||
-	    lo->lo_sizelimit != info->lo_sizelimit) {
+	    lo->lo_sizelimit != info->lo_sizelimit)
 		if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit))
 			return -EFBIG;
-	}
+
 	loop_config_discard(lo);
 
 	memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
@@ -1271,28 +1276,10 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
 
 static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
 {
-	int err;
-	sector_t sec;
-	loff_t sz;
-
-	err = -ENXIO;
 	if (unlikely(lo->lo_state != Lo_bound))
-		goto out;
-	err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
-	if (unlikely(err))
-		goto out;
-	sec = get_capacity(lo->lo_disk);
-	/* the width of sector_t may be narrow for bit-shift */
-	sz = sec;
-	sz <<= 9;
-	mutex_lock(&bdev->bd_mutex);
-	bd_set_size(bdev, sz);
-	/* let user-space know about the new size */
-	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
-	mutex_unlock(&bdev->bd_mutex);
+		return -ENXIO;
 
- out:
-	return err;
+	return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
 }
 
 static int lo_ioctl(struct block_device *bdev, fmode_t mode,
@@ -1624,30 +1611,17 @@ static int loop_add(struct loop_device **l, int i)
 	if (!lo)
 		goto out;
 
-	if (!idr_pre_get(&loop_index_idr, GFP_KERNEL))
-		goto out_free_dev;
-
+	/* allocate id, if @id >= 0, we're requesting that specific id */
 	if (i >= 0) {
-		int m;
-
-		/* create specific i in the index */
-		err = idr_get_new_above(&loop_index_idr, lo, i, &m);
-		if (err >= 0 && i != m) {
-			idr_remove(&loop_index_idr, m);
+		err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL);
+		if (err == -ENOSPC)
 			err = -EEXIST;
-		}
-	} else if (i == -1) {
-		int m;
-
-		/* get next free nr */
-		err = idr_get_new(&loop_index_idr, lo, &m);
-		if (err >= 0)
-			i = m;
 	} else {
-		err = -EINVAL;
+		err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
 	}
 	if (err < 0)
 		goto out_free_dev;
+	i = err;
 
 	lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
 	if (!lo->lo_queue)
@@ -1858,11 +1832,15 @@ static int __init loop_init(void)
 		max_part = (1UL << part_shift) - 1;
 	}
 
-	if ((1UL << part_shift) > DISK_MAX_PARTS)
-		return -EINVAL;
+	if ((1UL << part_shift) > DISK_MAX_PARTS) {
+		err = -EINVAL;
+		goto misc_out;
+	}
 
-	if (max_loop > 1UL << (MINORBITS - part_shift))
-		return -EINVAL;
+	if (max_loop > 1UL << (MINORBITS - part_shift)) {
+		err = -EINVAL;
+		goto misc_out;
+	}
 
 	/*
 	 * If max_loop is specified, create that many devices upfront.
@@ -1880,8 +1858,10 @@ static int __init loop_init(void)
 		range = 1UL << MINORBITS;
 	}
 
-	if (register_blkdev(LOOP_MAJOR, "loop"))
-		return -EIO;
+	if (register_blkdev(LOOP_MAJOR, "loop")) {
+		err = -EIO;
+		goto misc_out;
+	}
 
 	blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
 				  THIS_MODULE, loop_probe, NULL, NULL);
@@ -1894,6 +1874,10 @@ static int __init loop_init(void)
 
 	printk(KERN_INFO "loop: module loaded\n");
 	return 0;
+
+misc_out:
+	misc_deregister(&loop_misc);
+	return err;
 }
 
 static int loop_exit_cb(int id, void *ptr, void *data)
@@ -1911,7 +1895,6 @@ static void __exit loop_exit(void)
 	range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
 
 	idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
-	idr_remove_all(&loop_index_idr);
 	idr_destroy(&loop_index_idr);
 
 	blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig
index 0ba837fc62a8..1fca1f996b45 100644
--- a/drivers/block/mtip32xx/Kconfig
+++ b/drivers/block/mtip32xx/Kconfig
@@ -4,6 +4,6 @@
 
 config BLK_DEV_PCIESSD_MTIP32XX
 	tristate "Block Device Driver for Micron PCIe SSDs"
-	depends on PCI
+	depends on PCI && GENERIC_HARDIRQS
 	help
           This enables the block driver for Micron PCIe SSDs.
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3fd100990453..11cc9522cdd4 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -88,6 +88,8 @@ static int instance;
 static int mtip_major;
 static struct dentry *dfs_parent;
 
+static u32 cpu_use[NR_CPUS];
+
 static DEFINE_SPINLOCK(rssd_index_lock);
 static DEFINE_IDA(rssd_index_ida);
 
@@ -296,16 +298,17 @@ static int hba_reset_nosleep(struct driver_data *dd)
  */
 static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
 {
-	atomic_set(&port->commands[tag].active, 1);
+	int group = tag >> 5;
 
-	spin_lock(&port->cmd_issue_lock);
+	atomic_set(&port->commands[tag].active, 1);
 
+	/* guard SACT and CI registers */
+	spin_lock(&port->cmd_issue_lock[group]);
 	writel((1 << MTIP_TAG_BIT(tag)),
 			port->s_active[MTIP_TAG_INDEX(tag)]);
 	writel((1 << MTIP_TAG_BIT(tag)),
 			port->cmd_issue[MTIP_TAG_INDEX(tag)]);
-
-	spin_unlock(&port->cmd_issue_lock);
+	spin_unlock(&port->cmd_issue_lock[group]);
 
 	/* Set the command's timeout value.*/
 	port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
@@ -964,56 +967,56 @@ handle_tfe_exit:
 /*
  * Handle a set device bits interrupt
  */
-static inline void mtip_process_sdbf(struct driver_data *dd)
+static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
+							u32 completed)
 {
-	struct mtip_port  *port = dd->port;
-	int group, tag, bit;
-	u32 completed;
+	struct driver_data *dd = port->dd;
+	int tag, bit;
 	struct mtip_cmd *command;
 
-	/* walk all bits in all slot groups */
-	for (group = 0; group < dd->slot_groups; group++) {
-		completed = readl(port->completed[group]);
-		if (!completed)
-			continue;
+	if (!completed) {
+		WARN_ON_ONCE(!completed);
+		return;
+	}
+	/* clear completed status register in the hardware.*/
+	writel(completed, port->completed[group]);
 
-		/* clear completed status register in the hardware.*/
-		writel(completed, port->completed[group]);
+	/* Process completed commands. */
+	for (bit = 0; (bit < 32) && completed; bit++) {
+		if (completed & 0x01) {
+			tag = (group << 5) | bit;
 
-		/* Process completed commands. */
-		for (bit = 0;
-		     (bit < 32) && completed;
-		     bit++, completed >>= 1) {
-			if (completed & 0x01) {
-				tag = (group << 5) | bit;
+			/* skip internal command slot. */
+			if (unlikely(tag == MTIP_TAG_INTERNAL))
+				continue;
 
-				/* skip internal command slot. */
-				if (unlikely(tag == MTIP_TAG_INTERNAL))
-					continue;
+			command = &port->commands[tag];
+			/* make internal callback */
+			if (likely(command->comp_func)) {
+				command->comp_func(
+					port,
+					tag,
+					command->comp_data,
+					0);
+			} else {
+				dev_warn(&dd->pdev->dev,
+					"Null completion "
+					"for tag %d",
+					tag);
 
-				command = &port->commands[tag];
-				/* make internal callback */
-				if (likely(command->comp_func)) {
-					command->comp_func(
-						port,
-						tag,
-						command->comp_data,
-						0);
-				} else {
-					dev_warn(&dd->pdev->dev,
-						"Null completion "
-						"for tag %d",
-						tag);
-
-					if (mtip_check_surprise_removal(
-						dd->pdev)) {
-						mtip_command_cleanup(dd);
-						return;
-					}
+				if (mtip_check_surprise_removal(
+					dd->pdev)) {
+					mtip_command_cleanup(dd);
+					return;
 				}
 			}
 		}
+		completed >>= 1;
 	}
+
+	/* If last, re-enable interrupts */
+	if (atomic_dec_return(&dd->irq_workers_active) == 0)
+		writel(0xffffffff, dd->mmio + HOST_IRQ_STAT);
 }
 
 /*
@@ -1072,6 +1075,8 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
 	struct mtip_port *port = dd->port;
 	u32 hba_stat, port_stat;
 	int rv = IRQ_NONE;
+	int do_irq_enable = 1, i, workers;
+	struct mtip_work *twork;
 
 	hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
 	if (hba_stat) {
@@ -1082,8 +1087,42 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
 		writel(port_stat, port->mmio + PORT_IRQ_STAT);
 
 		/* Demux port status */
-		if (likely(port_stat & PORT_IRQ_SDB_FIS))
-			mtip_process_sdbf(dd);
+		if (likely(port_stat & PORT_IRQ_SDB_FIS)) {
+			do_irq_enable = 0;
+			WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0);
+
+			/* Start at 1: group zero is always local? */
+			for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS;
+									i++) {
+				twork = &dd->work[i];
+				twork->completed = readl(port->completed[i]);
+				if (twork->completed)
+					workers++;
+			}
+
+			atomic_set(&dd->irq_workers_active, workers);
+			if (workers) {
+				for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) {
+					twork = &dd->work[i];
+					if (twork->completed)
+						queue_work_on(
+							twork->cpu_binding,
+							dd->isr_workq,
+							&twork->work);
+				}
+
+				if (likely(dd->work[0].completed))
+					mtip_workq_sdbfx(port, 0,
+							dd->work[0].completed);
+
+			} else {
+				/*
+				 * Chip quirk: SDB interrupt but nothing
+				 * to complete
+				 */
+				do_irq_enable = 1;
+			}
+		}
 
 		if (unlikely(port_stat & PORT_IRQ_ERR)) {
 			if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
@@ -1103,21 +1142,13 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
 	}
 
 	/* acknowledge interrupt */
-	writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
+	if (unlikely(do_irq_enable))
+		writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
 
 	return rv;
 }
 
 /*
- * Wrapper for mtip_handle_irq
- * (ignores return code)
- */
-static void mtip_tasklet(unsigned long data)
-{
-	mtip_handle_irq((struct driver_data *) data);
-}
-
-/*
  * HBA interrupt subroutine.
  *
  * @irq		IRQ number.
@@ -1130,8 +1161,8 @@ static void mtip_tasklet(unsigned long data)
 static irqreturn_t mtip_irq_handler(int irq, void *instance)
 {
 	struct driver_data *dd = instance;
-	tasklet_schedule(&dd->tasklet);
-	return IRQ_HANDLED;
+
+	return mtip_handle_irq(dd);
 }
 
 static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
@@ -1489,6 +1520,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
 	}
 #endif
 
+	/* Demux ID.DRAT & ID.RZAT to determine trim support */
+	if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
+		port->dd->trim_supp = true;
+	else
+		port->dd->trim_supp = false;
+
 	/* Set the identify buffer as valid. */
 	port->identify_valid = 1;
 
@@ -1676,6 +1713,81 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
 }
 
 /*
+ * Trim unused sectors
+ *
+ * @dd		pointer to driver_data structure
+ * @lba		starting lba
+ * @len		# of 512b sectors to trim
+ *
+ * return value
+ *      -ENOMEM		Out of dma memory
+ *      -EINVAL		Invalid parameters passed in, trim not supported
+ *      -EIO		Error submitting trim request to hw
+ */
+static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len)
+{
+	int i, rv = 0;
+	u64 tlba, tlen, sect_left;
+	struct mtip_trim_entry *buf;
+	dma_addr_t dma_addr;
+	struct host_to_dev_fis fis;
+
+	if (!len || dd->trim_supp == false)
+		return -EINVAL;
+
+	/* Trim request too big */
+	WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES));
+
+	/* Trim request not aligned on 4k boundary */
+	WARN_ON(len % 8 != 0);
+
+	/* Warn if vu_trim structure is too big */
+	WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE);
+
+	/* Allocate a DMA buffer for the trim structure */
+	buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr,
+								GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	memset(buf, 0, ATA_SECT_SIZE);
+
+	for (i = 0, sect_left = len, tlba = lba;
+			i < MTIP_MAX_TRIM_ENTRIES && sect_left;
+			i++) {
+		tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ?
+					MTIP_MAX_TRIM_ENTRY_LEN :
+					sect_left);
+		buf[i].lba = __force_bit2int cpu_to_le32(tlba);
+		buf[i].range = __force_bit2int cpu_to_le16(tlen);
+		tlba += tlen;
+		sect_left -= tlen;
+	}
+	WARN_ON(sect_left != 0);
+
+	/* Build the fis */
+	memset(&fis, 0, sizeof(struct host_to_dev_fis));
+	fis.type       = 0x27;
+	fis.opts       = 1 << 7;
+	fis.command    = 0xfb;
+	fis.features   = 0x60;
+	fis.sect_count = 1;
+	fis.device     = ATA_DEVICE_OBS;
+
+	if (mtip_exec_internal_command(dd->port,
+					&fis,
+					5,
+					dma_addr,
+					ATA_SECT_SIZE,
+					0,
+					GFP_KERNEL,
+					MTIP_TRIM_TIMEOUT_MS) < 0)
+		rv = -EIO;
+
+	dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr);
+	return rv;
+}
+
+/*
  * Get the drive capacity.
  *
  * @dd      Pointer to the device data structure.
@@ -3005,20 +3117,24 @@ static int mtip_hw_init(struct driver_data *dd)
 
 	hba_setup(dd);
 
-	tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd);
-
-	dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL);
+	dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL,
+				dd->numa_node);
 	if (!dd->port) {
 		dev_err(&dd->pdev->dev,
 			"Memory allocation: port structure\n");
 		return -ENOMEM;
 	}
 
+	/* Continue workqueue setup */
+	for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
+		dd->work[i].port = dd->port;
+
 	/* Counting semaphore to track command slot usage */
 	sema_init(&dd->port->cmd_slot, num_command_slots - 1);
 
 	/* Spinlock to prevent concurrent issue */
-	spin_lock_init(&dd->port->cmd_issue_lock);
+	for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
+		spin_lock_init(&dd->port->cmd_issue_lock[i]);
 
 	/* Set the port mmio base address. */
 	dd->port->mmio	= dd->mmio + PORT_OFFSET;
@@ -3165,6 +3281,7 @@ static int mtip_hw_init(struct driver_data *dd)
 			"Unable to allocate IRQ %d\n", dd->pdev->irq);
 		goto out2;
 	}
+	irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding));
 
 	/* Enable interrupts on the HBA. */
 	writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
@@ -3241,7 +3358,8 @@ out3:
 	writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
 			dd->mmio + HOST_CTL);
 
-	/*Release the IRQ. */
+	/* Release the IRQ. */
+	irq_set_affinity_hint(dd->pdev->irq, NULL);
 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
 
 out2:
@@ -3291,11 +3409,9 @@ static int mtip_hw_exit(struct driver_data *dd)
 	del_timer_sync(&dd->port->cmd_timer);
 
 	/* Release the IRQ. */
+	irq_set_affinity_hint(dd->pdev->irq, NULL);
 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
 
-	/* Stop the bottom half tasklet. */
-	tasklet_kill(&dd->tasklet);
-
 	/* Free the command/command header memory. */
 	dmam_free_coherent(&dd->pdev->dev,
 			HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
@@ -3641,6 +3757,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
 		}
 	}
 
+	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+		bio_endio(bio, mtip_send_trim(dd, bio->bi_sector,
+						bio_sectors(bio)));
+		return;
+	}
+
 	if (unlikely(!bio_has_data(bio))) {
 		blk_queue_flush(queue, 0);
 		bio_endio(bio, 0);
@@ -3711,7 +3833,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 		goto protocol_init_error;
 	}
 
-	dd->disk = alloc_disk(MTIP_MAX_MINORS);
+	dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node);
 	if (dd->disk  == NULL) {
 		dev_err(&dd->pdev->dev,
 			"Unable to allocate gendisk structure\n");
@@ -3755,7 +3877,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 
 skip_create_disk:
 	/* Allocate the request queue. */
-	dd->queue = blk_alloc_queue(GFP_KERNEL);
+	dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node);
 	if (dd->queue == NULL) {
 		dev_err(&dd->pdev->dev,
 			"Unable to allocate request queue\n");
@@ -3783,6 +3905,15 @@ skip_create_disk:
 	 */
 	blk_queue_flush(dd->queue, 0);
 
+	/* Signal trim support */
+	if (dd->trim_supp == true) {
+		set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags);
+		dd->queue->limits.discard_granularity = 4096;
+		blk_queue_max_discard_sectors(dd->queue,
+			MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
+		dd->queue->limits.discard_zeroes_data = 0;
+	}
+
 	/* Set the capacity of the device in 512 byte sectors. */
 	if (!(mtip_hw_get_capacity(dd, &capacity))) {
 		dev_warn(&dd->pdev->dev,
@@ -3813,9 +3944,8 @@ skip_create_disk:
 
 start_service_thread:
 	sprintf(thd_name, "mtip_svc_thd_%02d", index);
-
-	dd->mtip_svc_handler = kthread_run(mtip_service_thread,
-						dd, thd_name);
+	dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
+						dd, dd->numa_node, thd_name);
 
 	if (IS_ERR(dd->mtip_svc_handler)) {
 		dev_err(&dd->pdev->dev, "service thread failed to start\n");
@@ -3823,7 +3953,7 @@ start_service_thread:
 		rv = -EFAULT;
 		goto kthread_run_error;
 	}
-
+	wake_up_process(dd->mtip_svc_handler);
 	if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
 		rv = wait_for_rebuild;
 
@@ -3963,6 +4093,56 @@ static int mtip_block_resume(struct driver_data *dd)
 	return 0;
 }
 
+static void drop_cpu(int cpu)
+{
+	cpu_use[cpu]--;
+}
+
+static int get_least_used_cpu_on_node(int node)
+{
+	int cpu, least_used_cpu, least_cnt;
+	const struct cpumask *node_mask;
+
+	node_mask = cpumask_of_node(node);
+	least_used_cpu = cpumask_first(node_mask);
+	least_cnt = cpu_use[least_used_cpu];
+	cpu = least_used_cpu;
+
+	for_each_cpu(cpu, node_mask) {
+		if (cpu_use[cpu] < least_cnt) {
+			least_used_cpu = cpu;
+			least_cnt = cpu_use[cpu];
+		}
+	}
+	cpu_use[least_used_cpu]++;
+	return least_used_cpu;
+}
+
+/* Helper for selecting a node in round robin mode */
+static inline int mtip_get_next_rr_node(void)
+{
+	static int next_node = -1;
+
+	if (next_node == -1) {
+		next_node = first_online_node;
+		return next_node;
+	}
+
+	next_node = next_online_node(next_node);
+	if (next_node == MAX_NUMNODES)
+		next_node = first_online_node;
+	return next_node;
+}
+
+static DEFINE_HANDLER(0);
+static DEFINE_HANDLER(1);
+static DEFINE_HANDLER(2);
+static DEFINE_HANDLER(3);
+static DEFINE_HANDLER(4);
+static DEFINE_HANDLER(5);
+static DEFINE_HANDLER(6);
+static DEFINE_HANDLER(7);
+
 /*
  * Called for each supported PCI device detected.
  *
@@ -3977,9 +4157,25 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 {
 	int rv = 0;
 	struct driver_data *dd = NULL;
+	char cpu_list[256];
+	const struct cpumask *node_mask;
+	int cpu, i = 0, j = 0;
+	int my_node = NUMA_NO_NODE;
 
 	/* Allocate memory for this devices private data. */
-	dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL);
+	my_node = pcibus_to_node(pdev->bus);
+	if (my_node != NUMA_NO_NODE) {
+		if (!node_online(my_node))
+			my_node = mtip_get_next_rr_node();
+	} else {
+		dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n");
+		my_node = mtip_get_next_rr_node();
+	}
+	dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n",
+		my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev),
+		cpu_to_node(smp_processor_id()), smp_processor_id());
+
+	dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node);
 	if (dd == NULL) {
 		dev_err(&pdev->dev,
 			"Unable to allocate memory for driver data\n");
@@ -4016,19 +4212,82 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 		}
 	}
 
-	pci_set_master(pdev);
+	/* Copy the info we may need later into the private data structure. */
+	dd->major	= mtip_major;
+	dd->instance	= instance;
+	dd->pdev	= pdev;
+	dd->numa_node	= my_node;
 
+	memset(dd->workq_name, 0, 32);
+	snprintf(dd->workq_name, 31, "mtipq%d", dd->instance);
+
+	dd->isr_workq = create_workqueue(dd->workq_name);
+	if (!dd->isr_workq) {
+		dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
+		goto block_initialize_err;
+	}
+
+	memset(cpu_list, 0, sizeof(cpu_list));
+
+	node_mask = cpumask_of_node(dd->numa_node);
+	if (!cpumask_empty(node_mask)) {
+		for_each_cpu(cpu, node_mask)
+		{
+			snprintf(&cpu_list[j], 256 - j, "%d ", cpu);
+			j = strlen(cpu_list);
+		}
+
+		dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n",
+			dd->numa_node,
+			topology_physical_package_id(cpumask_first(node_mask)),
+			nr_cpus_node(dd->numa_node),
+			cpu_list);
+	} else
+		dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n");
+
+	dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node);
+	dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n",
+		cpu_to_node(dd->isr_binding), dd->isr_binding);
+
+	/* first worker context always runs in ISR */
+	dd->work[0].cpu_binding = dd->isr_binding;
+	dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
+	dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
+	dd->work[3].cpu_binding = dd->work[0].cpu_binding;
+	dd->work[4].cpu_binding = dd->work[1].cpu_binding;
+	dd->work[5].cpu_binding = dd->work[2].cpu_binding;
+	dd->work[6].cpu_binding = dd->work[2].cpu_binding;
+	dd->work[7].cpu_binding = dd->work[1].cpu_binding;
+
+	/* Log the bindings */
+	for_each_present_cpu(cpu) {
+		memset(cpu_list, 0, sizeof(cpu_list));
+		for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) {
+			if (dd->work[i].cpu_binding == cpu) {
+				snprintf(&cpu_list[j], 256 - j, "%d ", i);
+				j = strlen(cpu_list);
+			}
+		}
+		if (j)
+			dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list);
+	}
+
+	INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0);
+	INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1);
+	INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2);
+	INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3);
+	INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4);
+	INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5);
+	INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6);
+	INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);
+
+	pci_set_master(pdev);
 	if (pci_enable_msi(pdev)) {
 		dev_warn(&pdev->dev,
 			"Unable to enable MSI interrupt.\n");
 		goto block_initialize_err;
 	}
 
-	/* Copy the info we may need later into the private data structure. */
-	dd->major	= mtip_major;
-	dd->instance	= instance;
-	dd->pdev	= pdev;
-
 	/* Initialize the block layer. */
 	rv = mtip_block_initialize(dd);
 	if (rv < 0) {
@@ -4048,7 +4307,13 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 
 block_initialize_err:
 	pci_disable_msi(pdev);
-
+	if (dd->isr_workq) {
+		flush_workqueue(dd->isr_workq);
+		destroy_workqueue(dd->isr_workq);
+		drop_cpu(dd->work[0].cpu_binding);
+		drop_cpu(dd->work[1].cpu_binding);
+		drop_cpu(dd->work[2].cpu_binding);
+	}
 setmask_err:
 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
 
@@ -4089,6 +4354,14 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 	/* Clean up the block layer. */
 	mtip_block_remove(dd);
 
+	if (dd->isr_workq) {
+		flush_workqueue(dd->isr_workq);
+		destroy_workqueue(dd->isr_workq);
+		drop_cpu(dd->work[0].cpu_binding);
+		drop_cpu(dd->work[1].cpu_binding);
+		drop_cpu(dd->work[2].cpu_binding);
+	}
+
 	pci_disable_msi(pdev);
 
 	kfree(dd);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index b1742640556a..3bffff5f670c 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -164,6 +164,35 @@ struct smart_attr {
 	u8 res[3];
 } __packed;
 
+struct mtip_work {
+	struct work_struct work;
+	void *port;
+	int cpu_binding;
+	u32 completed;
+} ____cacheline_aligned_in_smp;
+
+#define DEFINE_HANDLER(group)                                  \
+	void mtip_workq_sdbf##group(struct work_struct *work)       \
+	{                                                      \
+		struct mtip_work *w = (struct mtip_work *) work;         \
+		mtip_workq_sdbfx(w->port, group, w->completed);     \
+	}
+
+#define MTIP_TRIM_TIMEOUT_MS		240000
+#define MTIP_MAX_TRIM_ENTRIES		8
+#define MTIP_MAX_TRIM_ENTRY_LEN 	0xfff8
+
+struct mtip_trim_entry {
+	u32 lba;   /* starting lba of region */
+	u16 rsvd;  /* unused */
+	u16 range; /* # of 512b blocks to trim */
+} __packed;
+
+struct mtip_trim {
+	/* Array of regions to trim */
+	struct mtip_trim_entry entry[MTIP_MAX_TRIM_ENTRIES];
+} __packed;
+
 /* Register Frame Information Structure (FIS), host to device. */
 struct host_to_dev_fis {
 	/*
@@ -424,7 +453,7 @@ struct mtip_port {
 	 */
 	struct semaphore cmd_slot;
 	/* Spinlock for working around command-issue bug. */
-	spinlock_t cmd_issue_lock;
+	spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS];
 };
 
 /*
@@ -447,9 +476,6 @@ struct driver_data {
 
 	struct mtip_port *port; /* Pointer to the port data structure. */
 
-	/* Tasklet used to process the bottom half of the ISR. */
-	struct tasklet_struct tasklet;
-
 	unsigned product_type; /* magic value declaring the product type */
 
 	unsigned slot_groups; /* number of slot groups the product supports */
@@ -461,6 +487,20 @@ struct driver_data {
 	struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
 
 	struct dentry *dfs_node;
+
+	bool trim_supp; /* flag indicating trim support */
+
+	int numa_node; /* NUMA support */
+
+	char workq_name[32];
+
+	struct workqueue_struct *isr_workq;
+
+	struct mtip_work work[MTIP_MAX_SLOT_GROUPS];
+
+	atomic_t irq_workers_active;
+
+	int isr_binding;
 };
 
 #endif
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index ade146bf65e5..7fecc784be01 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -98,6 +98,7 @@ static const char *nbdcmd_to_ascii(int cmd)
 	case  NBD_CMD_READ: return "read";
 	case NBD_CMD_WRITE: return "write";
 	case  NBD_CMD_DISC: return "disconnect";
+	case NBD_CMD_FLUSH: return "flush";
 	case  NBD_CMD_TRIM: return "trim/discard";
 	}
 	return "invalid";
@@ -244,8 +245,15 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 
 	request.magic = htonl(NBD_REQUEST_MAGIC);
 	request.type = htonl(nbd_cmd(req));
-	request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
-	request.len = htonl(size);
+
+	if (nbd_cmd(req) == NBD_CMD_FLUSH) {
+		/* Other values are reserved for FLUSH requests.  */
+		request.from = 0;
+		request.len = 0;
+	} else {
+		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
+		request.len = htonl(size);
+	}
 	memcpy(request.handle, &req, sizeof(req));
 
 	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
@@ -482,6 +490,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
 		}
 	}
 
+	if (req->cmd_flags & REQ_FLUSH) {
+		BUG_ON(unlikely(blk_rq_sectors(req)));
+		nbd_cmd(req) = NBD_CMD_FLUSH;
+	}
+
 	req->errors = 0;
 
 	mutex_lock(&nbd->tx_lock);
@@ -551,6 +564,7 @@ static int nbd_thread(void *data)
  */
 
 static void do_nbd_request(struct request_queue *q)
+		__releases(q->queue_lock) __acquires(q->queue_lock)
 {
 	struct request *req;
 	
@@ -595,12 +609,20 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		struct request sreq;
 
 		dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
+		if (!nbd->sock)
+			return -EINVAL;
 
+		mutex_unlock(&nbd->tx_lock);
+		fsync_bdev(bdev);
+		mutex_lock(&nbd->tx_lock);
 		blk_rq_init(NULL, &sreq);
 		sreq.cmd_type = REQ_TYPE_SPECIAL;
 		nbd_cmd(&sreq) = NBD_CMD_DISC;
+
+		/* Check again after getting mutex back.  */
 		if (!nbd->sock)
 			return -EINVAL;
+
 		nbd_send_req(nbd, &sreq);
                 return 0;
 	}
@@ -614,6 +636,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		nbd_clear_que(nbd);
 		BUG_ON(!list_empty(&nbd->queue_head));
 		BUG_ON(!list_empty(&nbd->waiting_queue));
+		kill_bdev(bdev);
 		if (file)
 			fput(file);
 		return 0;
@@ -681,9 +704,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 
 		mutex_unlock(&nbd->tx_lock);
 
+		if (nbd->flags & NBD_FLAG_READ_ONLY)
+			set_device_ro(bdev, true);
 		if (nbd->flags & NBD_FLAG_SEND_TRIM)
 			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
 				nbd->disk->queue);
+		if (nbd->flags & NBD_FLAG_SEND_FLUSH)
+			blk_queue_flush(nbd->disk->queue, REQ_FLUSH);
+		else
+			blk_queue_flush(nbd->disk->queue, 0);
 
 		thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name);
 		if (IS_ERR(thread)) {
@@ -702,9 +731,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		nbd->file = NULL;
 		nbd_clear_que(nbd);
 		dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
+		kill_bdev(bdev);
 		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+		set_device_ro(bdev, false);
 		if (file)
 			fput(file);
+		nbd->flags = 0;
 		nbd->bytesize = 0;
 		bdev->bd_inode->i_size = 0;
 		set_capacity(nbd->disk, 0);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 89576a0b3f2e..6c81a4c040b9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -52,9 +52,12 @@
 #define	SECTOR_SHIFT	9
 #define	SECTOR_SIZE	(1ULL << SECTOR_SHIFT)
 
-/* It might be useful to have this defined elsewhere too */
+/* It might be useful to have these defined elsewhere */
 
-#define	U64_MAX	((u64) (~0ULL))
+#define	U8_MAX	((u8)	(~0U))
+#define	U16_MAX	((u16)	(~0U))
+#define	U32_MAX	((u32)	(~0U))
+#define	U64_MAX	((u64)	(~0ULL))
 
 #define RBD_DRV_NAME "rbd"
 #define RBD_DRV_NAME_LONG "rbd (rados block device)"
@@ -66,7 +69,6 @@
 			(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
 
 #define RBD_MAX_SNAP_COUNT	510	/* allows max snapc to fit in 4KB */
-#define RBD_MAX_OPT_LEN		1024
 
 #define RBD_SNAP_HEAD_NAME	"-"
 
@@ -93,8 +95,6 @@
 #define DEV_NAME_LEN		32
 #define MAX_INT_FORMAT_WIDTH	((5 * sizeof (int)) / 2 + 1)
 
-#define RBD_READ_ONLY_DEFAULT		false
-
 /*
  * block device image metadata (in-memory version)
  */
@@ -119,16 +119,33 @@ struct rbd_image_header {
  * An rbd image specification.
  *
  * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
- * identify an image.
+ * identify an image.  Each rbd_dev structure includes a pointer to
+ * an rbd_spec structure that encapsulates this identity.
+ *
+ * Each of the id's in an rbd_spec has an associated name.  For a
+ * user-mapped image, the names are supplied and the id's associated
+ * with them are looked up.  For a layered image, a parent image is
+ * defined by the tuple, and the names are looked up.
+ *
+ * An rbd_dev structure contains a parent_spec pointer which is
+ * non-null if the image it represents is a child in a layered
+ * image.  This pointer will refer to the rbd_spec structure used
+ * by the parent rbd_dev for its own identity (i.e., the structure
+ * is shared between the parent and child).
+ *
+ * Since these structures are populated once, during the discovery
+ * phase of image construction, they are effectively immutable so
+ * we make no effort to synchronize access to them.
+ *
+ * Note that code herein does not assume the image name is known (it
+ * could be a null pointer).
  */
 struct rbd_spec {
 	u64		pool_id;
 	char		*pool_name;
 
 	char		*image_id;
-	size_t		image_id_len;
 	char		*image_name;
-	size_t		image_name_len;
 
 	u64		snap_id;
 	char		*snap_name;
@@ -136,10 +153,6 @@ struct rbd_spec {
 	struct kref	kref;
 };
 
-struct rbd_options {
-	bool	read_only;
-};
-
 /*
  * an instance of the client.  multiple devices may share an rbd client.
  */
@@ -149,37 +162,76 @@ struct rbd_client {
 	struct list_head	node;
 };
 
-/*
- * a request completion status
- */
-struct rbd_req_status {
-	int done;
-	int rc;
-	u64 bytes;
+struct rbd_img_request;
+typedef void (*rbd_img_callback_t)(struct rbd_img_request *);
+
+#define	BAD_WHICH	U32_MAX		/* Good which or bad which, which? */
+
+struct rbd_obj_request;
+typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
+
+enum obj_request_type {
+	OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
 };
 
-/*
- * a collection of requests
- */
-struct rbd_req_coll {
-	int			total;
-	int			num_done;
+struct rbd_obj_request {
+	const char		*object_name;
+	u64			offset;		/* object start byte */
+	u64			length;		/* bytes from offset */
+
+	struct rbd_img_request	*img_request;
+	struct list_head	links;		/* img_request->obj_requests */
+	u32			which;		/* posn image request list */
+
+	enum obj_request_type	type;
+	union {
+		struct bio	*bio_list;
+		struct {
+			struct page	**pages;
+			u32		page_count;
+		};
+	};
+
+	struct ceph_osd_request	*osd_req;
+
+	u64			xferred;	/* bytes transferred */
+	u64			version;
+	int			result;
+	atomic_t		done;
+
+	rbd_obj_callback_t	callback;
+	struct completion	completion;
+
 	struct kref		kref;
-	struct rbd_req_status	status[0];
 };
 
-/*
- * a single io request
- */
-struct rbd_request {
-	struct request		*rq;		/* blk layer request */
-	struct bio		*bio;		/* cloned bio */
-	struct page		**pages;	/* list of used pages */
-	u64			len;
-	int			coll_index;
-	struct rbd_req_coll	*coll;
+struct rbd_img_request {
+	struct request		*rq;
+	struct rbd_device	*rbd_dev;
+	u64			offset;	/* starting image byte offset */
+	u64			length;	/* byte count from offset */
+	bool			write_request;	/* false for read */
+	union {
+		struct ceph_snap_context *snapc;	/* for writes */
+		u64		snap_id;		/* for reads */
+	};
+	spinlock_t		completion_lock;/* protects next_completion */
+	u32			next_completion;
+	rbd_img_callback_t	callback;
+
+	u32			obj_request_count;
+	struct list_head	obj_requests;	/* rbd_obj_request structs */
+
+	struct kref		kref;
 };
 
+#define for_each_obj_request(ireq, oreq) \
+	list_for_each_entry(oreq, &(ireq)->obj_requests, links)
+#define for_each_obj_request_from(ireq, oreq) \
+	list_for_each_entry_from(oreq, &(ireq)->obj_requests, links)
+#define for_each_obj_request_safe(ireq, oreq, n) \
+	list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
+
 struct rbd_snap {
 	struct	device		dev;
 	const char		*name;
@@ -209,16 +261,18 @@ struct rbd_device {
 
 	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
 
-	spinlock_t		lock;		/* queue lock */
+	spinlock_t		lock;		/* queue, flags, open_count */
 
 	struct rbd_image_header	header;
-	bool                    exists;
+	unsigned long		flags;		/* possibly lock protected */
 	struct rbd_spec		*spec;
 
 	char			*header_name;
 
+	struct ceph_file_layout	layout;
+
 	struct ceph_osd_event   *watch_event;
-	struct ceph_osd_request *watch_request;
+	struct rbd_obj_request	*watch_request;
 
 	struct rbd_spec		*parent_spec;
 	u64			parent_overlap;
@@ -235,7 +289,19 @@ struct rbd_device {
 
 	/* sysfs related */
 	struct device		dev;
-	unsigned long		open_count;
+	unsigned long		open_count;	/* protected by lock */
+};
+
+/*
+ * Flag bits for rbd_dev->flags.  If atomicity is required,
+ * rbd_dev->lock is used to protect access.
+ *
+ * Currently, only the "removing" flag (which is coupled with the
+ * "open_count" field) requires atomic access.
+ */
+enum rbd_dev_flags {
+	RBD_DEV_FLAG_EXISTS,	/* mapped snapshot has not been deleted */
+	RBD_DEV_FLAG_REMOVING,	/* this mapping is being removed */
 };
 
 static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
@@ -277,6 +343,33 @@ static struct device rbd_root_dev = {
 	.release =      rbd_root_dev_release,
 };
 
+static __printf(2, 3)
+void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	if (!rbd_dev)
+		printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf);
+	else if (rbd_dev->disk)
+		printk(KERN_WARNING "%s: %s: %pV\n",
+			RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf);
+	else if (rbd_dev->spec && rbd_dev->spec->image_name)
+		printk(KERN_WARNING "%s: image %s: %pV\n",
+			RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf);
+	else if (rbd_dev->spec && rbd_dev->spec->image_id)
+		printk(KERN_WARNING "%s: id %s: %pV\n",
+			RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf);
+	else	/* punt */
+		printk(KERN_WARNING "%s: rbd_dev %p: %pV\n",
+			RBD_DRV_NAME, rbd_dev, &vaf);
+	va_end(args);
+}
+
 #ifdef RBD_DEBUG
 #define rbd_assert(expr)						\
 		if (unlikely(!(expr))) {				\
@@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);
 static int rbd_open(struct block_device *bdev, fmode_t mode)
 {
 	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
+	bool removing = false;
 
 	if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only)
 		return -EROFS;
 
+	spin_lock_irq(&rbd_dev->lock);
+	if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags))
+		removing = true;
+	else
+		rbd_dev->open_count++;
+	spin_unlock_irq(&rbd_dev->lock);
+	if (removing)
+		return -ENOENT;
+
 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
 	(void) get_device(&rbd_dev->dev);
 	set_device_ro(bdev, rbd_dev->mapping.read_only);
-	rbd_dev->open_count++;
 	mutex_unlock(&ctl_mutex);
 
 	return 0;
@@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
 static int rbd_release(struct gendisk *disk, fmode_t mode)
 {
 	struct rbd_device *rbd_dev = disk->private_data;
+	unsigned long open_count_before;
+
+	spin_lock_irq(&rbd_dev->lock);
+	open_count_before = rbd_dev->open_count--;
+	spin_unlock_irq(&rbd_dev->lock);
+	rbd_assert(open_count_before > 0);
 
 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-	rbd_assert(rbd_dev->open_count > 0);
-	rbd_dev->open_count--;
 	put_device(&rbd_dev->dev);
 	mutex_unlock(&ctl_mutex);
 
@@ -337,7 +443,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
 	struct rbd_client *rbdc;
 	int ret = -ENOMEM;
 
-	dout("rbd_client_create\n");
+	dout("%s:\n", __func__);
 	rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
 	if (!rbdc)
 		goto out_opt;
@@ -361,8 +467,8 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
 	spin_unlock(&rbd_client_list_lock);
 
 	mutex_unlock(&ctl_mutex);
+	dout("%s: rbdc %p\n", __func__, rbdc);
 
-	dout("rbd_client_create created %p\n", rbdc);
 	return rbdc;
 
 out_err:
@@ -373,6 +479,8 @@ out_mutex:
 out_opt:
 	if (ceph_opts)
 		ceph_destroy_options(ceph_opts);
+	dout("%s: error %d\n", __func__, ret);
+
 	return ERR_PTR(ret);
 }
 
@@ -426,6 +534,12 @@ static match_table_t rbd_opts_tokens = {
 	{-1, NULL}
 };
 
+struct rbd_options {
+	bool	read_only;
+};
+
+#define RBD_READ_ONLY_DEFAULT	false
+
 static int parse_rbd_opts_token(char *c, void *private)
 {
 	struct rbd_options *rbd_opts = private;
@@ -493,7 +607,7 @@ static void rbd_client_release(struct kref *kref)
 {
 	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
 
-	dout("rbd_release_client %p\n", rbdc);
+	dout("%s: rbdc %p\n", __func__, rbdc);
 	spin_lock(&rbd_client_list_lock);
 	list_del(&rbdc->node);
 	spin_unlock(&rbd_client_list_lock);
@@ -512,18 +626,6 @@ static void rbd_put_client(struct rbd_client *rbdc)
 		kref_put(&rbdc->kref, rbd_client_release);
 }
 
-/*
- * Destroy requests collection
- */
-static void rbd_coll_release(struct kref *kref)
-{
-	struct rbd_req_coll *coll =
-		container_of(kref, struct rbd_req_coll, kref);
-
-	dout("rbd_coll_release %p\n", coll);
-	kfree(coll);
-}
-
 static bool rbd_image_format_valid(u32 image_format)
 {
 	return image_format == 1 || image_format == 2;
@@ -707,7 +809,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev)
 			goto done;
 		rbd_dev->mapping.read_only = true;
 	}
-	rbd_dev->exists = true;
+	set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+
 done:
 	return ret;
 }
@@ -724,7 +827,7 @@ static void rbd_header_free(struct rbd_image_header *header)
 	header->snapc = NULL;
 }
 
-static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
+static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
 {
 	char *name;
 	u64 segment;
@@ -767,23 +870,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev,
 	return length;
 }
 
-static int rbd_get_num_segments(struct rbd_image_header *header,
-				u64 ofs, u64 len)
-{
-	u64 start_seg;
-	u64 end_seg;
-
-	if (!len)
-		return 0;
-	if (len - 1 > U64_MAX - ofs)
-		return -ERANGE;
-
-	start_seg = ofs >> header->obj_order;
-	end_seg = (ofs + len - 1) >> header->obj_order;
-
-	return end_seg - start_seg + 1;
-}
-
 /*
  * returns the size of an object in the image
  */
@@ -949,8 +1035,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
 		unsigned int bi_size;
 		struct bio *bio;
 
-		if (!bi)
+		if (!bi) {
+			rbd_warn(NULL, "bio_chain exhausted with %u left", len);
 			goto out_err;	/* EINVAL; ran out of bio's */
+		}
 		bi_size = min_t(unsigned int, bi->bi_size - off, len);
 		bio = bio_clone_range(bi, off, bi_size, gfpmask);
 		if (!bio)
@@ -976,399 +1064,721 @@ out_err:
 	return NULL;
 }
 
-/*
- * helpers for osd request op vectors.
- */
-static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops,
-					int opcode, u32 payload_len)
+static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
 {
-	struct ceph_osd_req_op *ops;
+	dout("%s: obj %p (was %d)\n", __func__, obj_request,
+		atomic_read(&obj_request->kref.refcount));
+	kref_get(&obj_request->kref);
+}
+
+static void rbd_obj_request_destroy(struct kref *kref);
+static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
+{
+	rbd_assert(obj_request != NULL);
+	dout("%s: obj %p (was %d)\n", __func__, obj_request,
+		atomic_read(&obj_request->kref.refcount));
+	kref_put(&obj_request->kref, rbd_obj_request_destroy);
+}
+
+static void rbd_img_request_get(struct rbd_img_request *img_request)
+{
+	dout("%s: img %p (was %d)\n", __func__, img_request,
+		atomic_read(&img_request->kref.refcount));
+	kref_get(&img_request->kref);
+}
+
+static void rbd_img_request_destroy(struct kref *kref);
+static void rbd_img_request_put(struct rbd_img_request *img_request)
+{
+	rbd_assert(img_request != NULL);
+	dout("%s: img %p (was %d)\n", __func__, img_request,
+		atomic_read(&img_request->kref.refcount));
+	kref_put(&img_request->kref, rbd_img_request_destroy);
+}
+
+static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
+					struct rbd_obj_request *obj_request)
+{
+	rbd_assert(obj_request->img_request == NULL);
+
+	rbd_obj_request_get(obj_request);
+	obj_request->img_request = img_request;
+	obj_request->which = img_request->obj_request_count;
+	rbd_assert(obj_request->which != BAD_WHICH);
+	img_request->obj_request_count++;
+	list_add_tail(&obj_request->links, &img_request->obj_requests);
+	dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
+		obj_request->which);
+}
 
-	ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO);
-	if (!ops)
+static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
+					struct rbd_obj_request *obj_request)
+{
+	rbd_assert(obj_request->which != BAD_WHICH);
+
+	dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
+		obj_request->which);
+	list_del(&obj_request->links);
+	rbd_assert(img_request->obj_request_count > 0);
+	img_request->obj_request_count--;
+	rbd_assert(obj_request->which == img_request->obj_request_count);
+	obj_request->which = BAD_WHICH;
+	rbd_assert(obj_request->img_request == img_request);
+	obj_request->img_request = NULL;
+	obj_request->callback = NULL;
+	rbd_obj_request_put(obj_request);
+}
+
+static bool obj_request_type_valid(enum obj_request_type type)
+{
+	switch (type) {
+	case OBJ_REQUEST_NODATA:
+	case OBJ_REQUEST_BIO:
+	case OBJ_REQUEST_PAGES:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...)
+{
+	struct ceph_osd_req_op *op;
+	va_list args;
+	size_t size;
+
+	op = kzalloc(sizeof (*op), GFP_NOIO);
+	if (!op)
 		return NULL;
+	op->op = opcode;
+	va_start(args, opcode);
+	switch (opcode) {
+	case CEPH_OSD_OP_READ:
+	case CEPH_OSD_OP_WRITE:
+		/* rbd_osd_req_op_create(READ, offset, length) */
+		/* rbd_osd_req_op_create(WRITE, offset, length) */
+		op->extent.offset = va_arg(args, u64);
+		op->extent.length = va_arg(args, u64);
+		if (opcode == CEPH_OSD_OP_WRITE)
+			op->payload_len = op->extent.length;
+		break;
+	case CEPH_OSD_OP_STAT:
+		break;
+	case CEPH_OSD_OP_CALL:
+		/* rbd_osd_req_op_create(CALL, class, method, data, datalen) */
+		op->cls.class_name = va_arg(args, char *);
+		size = strlen(op->cls.class_name);
+		rbd_assert(size <= (size_t) U8_MAX);
+		op->cls.class_len = size;
+		op->payload_len = size;
+
+		op->cls.method_name = va_arg(args, char *);
+		size = strlen(op->cls.method_name);
+		rbd_assert(size <= (size_t) U8_MAX);
+		op->cls.method_len = size;
+		op->payload_len += size;
+
+		op->cls.argc = 0;
+		op->cls.indata = va_arg(args, void *);
+		size = va_arg(args, size_t);
+		rbd_assert(size <= (size_t) U32_MAX);
+		op->cls.indata_len = (u32) size;
+		op->payload_len += size;
+		break;
+	case CEPH_OSD_OP_NOTIFY_ACK:
+	case CEPH_OSD_OP_WATCH:
+		/* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */
+		/* rbd_osd_req_op_create(WATCH, cookie, version, flag) */
+		op->watch.cookie = va_arg(args, u64);
+		op->watch.ver = va_arg(args, u64);
+		op->watch.ver = cpu_to_le64(op->watch.ver);
+		if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int))
+			op->watch.flag = (u8) 1;
+		break;
+	default:
+		rbd_warn(NULL, "unsupported opcode %hu\n", opcode);
+		kfree(op);
+		op = NULL;
+		break;
+	}
+	va_end(args);
 
-	ops[0].op = opcode;
+	return op;
+}
 
-	/*
-	 * op extent offset and length will be set later on
-	 * in calc_raw_layout()
-	 */
-	ops[0].payload_len = payload_len;
+static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op)
+{
+	kfree(op);
+}
+
+static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
+				struct rbd_obj_request *obj_request)
+{
+	dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request);
 
-	return ops;
+	return ceph_osdc_start_request(osdc, obj_request->osd_req, false);
 }
 
-static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
+static void rbd_img_request_complete(struct rbd_img_request *img_request)
 {
-	kfree(ops);
+	dout("%s: img %p\n", __func__, img_request);
+	if (img_request->callback)
+		img_request->callback(img_request);
+	else
+		rbd_img_request_put(img_request);
 }
 
-static void rbd_coll_end_req_index(struct request *rq,
-				   struct rbd_req_coll *coll,
-				   int index,
-				   int ret, u64 len)
+/* Caller is responsible for rbd_obj_request_destroy(obj_request) */
+
+static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
 {
-	struct request_queue *q;
-	int min, max, i;
+	dout("%s: obj %p\n", __func__, obj_request);
 
-	dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n",
-	     coll, index, ret, (unsigned long long) len);
+	return wait_for_completion_interruptible(&obj_request->completion);
+}
 
-	if (!rq)
-		return;
+static void obj_request_done_init(struct rbd_obj_request *obj_request)
+{
+	atomic_set(&obj_request->done, 0);
+	smp_wmb();
+}
 
-	if (!coll) {
-		blk_end_request(rq, ret, len);
-		return;
+static void obj_request_done_set(struct rbd_obj_request *obj_request)
+{
+	int done;
+
+	done = atomic_inc_return(&obj_request->done);
+	if (done > 1) {
+		struct rbd_img_request *img_request = obj_request->img_request;
+		struct rbd_device *rbd_dev;
+
+		rbd_dev = img_request ? img_request->rbd_dev : NULL;
+		rbd_warn(rbd_dev, "obj_request %p was already done\n",
+			obj_request);
 	}
+}
 
-	q = rq->q;
-
-	spin_lock_irq(q->queue_lock);
-	coll->status[index].done = 1;
-	coll->status[index].rc = ret;
-	coll->status[index].bytes = len;
-	max = min = coll->num_done;
-	while (max < coll->total && coll->status[max].done)
-		max++;
-
-	for (i = min; i<max; i++) {
-		__blk_end_request(rq, coll->status[i].rc,
-				  coll->status[i].bytes);
-		coll->num_done++;
-		kref_put(&coll->kref, rbd_coll_release);
+static bool obj_request_done_test(struct rbd_obj_request *obj_request)
+{
+	smp_mb();
+	return atomic_read(&obj_request->done) != 0;
+}
+
+static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p cb %p\n", __func__, obj_request,
+		obj_request->callback);
+	if (obj_request->callback)
+		obj_request->callback(obj_request);
+	else
+		complete_all(&obj_request->completion);
+}
+
+static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p\n", __func__, obj_request);
+	obj_request_done_set(obj_request);
+}
+
+static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
+		obj_request->result, obj_request->xferred, obj_request->length);
+	/*
+	 * ENOENT means a hole in the object.  We zero-fill the
+	 * entire length of the request.  A short read also implies
+	 * zero-fill to the end of the request.  Either way we
+	 * update the xferred count to indicate the whole request
+	 * was satisfied.
+	 */
+	if (obj_request->result == -ENOENT) {
+		zero_bio_chain(obj_request->bio_list, 0);
+		obj_request->result = 0;
+		obj_request->xferred = obj_request->length;
+	} else if (obj_request->xferred < obj_request->length &&
+			!obj_request->result) {
+		zero_bio_chain(obj_request->bio_list, obj_request->xferred);
+		obj_request->xferred = obj_request->length;
 	}
-	spin_unlock_irq(q->queue_lock);
+	obj_request_done_set(obj_request);
 }
 
-static void rbd_coll_end_req(struct rbd_request *req,
-			     int ret, u64 len)
+static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
 {
-	rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len);
+	dout("%s: obj %p result %d %llu\n", __func__, obj_request,
+		obj_request->result, obj_request->length);
+	/*
+	 * There is no such thing as a successful short write.
+	 * Our xferred value is the number of bytes transferred
+	 * back.  Set it to our originally-requested length.
+	 */
+	obj_request->xferred = obj_request->length;
+	obj_request_done_set(obj_request);
 }
 
 /*
- * Send ceph osd request
+ * For a simple stat call there's nothing to do.  We'll do more if
+ * this is part of a write sequence for a layered image.
  */
-static int rbd_do_request(struct request *rq,
-			  struct rbd_device *rbd_dev,
-			  struct ceph_snap_context *snapc,
-			  u64 snapid,
-			  const char *object_name, u64 ofs, u64 len,
-			  struct bio *bio,
-			  struct page **pages,
-			  int num_pages,
-			  int flags,
-			  struct ceph_osd_req_op *ops,
-			  struct rbd_req_coll *coll,
-			  int coll_index,
-			  void (*rbd_cb)(struct ceph_osd_request *req,
-					 struct ceph_msg *msg),
-			  struct ceph_osd_request **linger_req,
-			  u64 *ver)
-{
-	struct ceph_osd_request *req;
-	struct ceph_file_layout *layout;
-	int ret;
-	u64 bno;
-	struct timespec mtime = CURRENT_TIME;
-	struct rbd_request *req_data;
-	struct ceph_osd_request_head *reqhead;
-	struct ceph_osd_client *osdc;
+static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p\n", __func__, obj_request);
+	obj_request_done_set(obj_request);
+}
 
-	req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
-	if (!req_data) {
-		if (coll)
-			rbd_coll_end_req_index(rq, coll, coll_index,
-					       -ENOMEM, len);
-		return -ENOMEM;
+static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
+				struct ceph_msg *msg)
+{
+	struct rbd_obj_request *obj_request = osd_req->r_priv;
+	u16 opcode;
+
+	dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg);
+	rbd_assert(osd_req == obj_request->osd_req);
+	rbd_assert(!!obj_request->img_request ^
+				(obj_request->which == BAD_WHICH));
+
+	if (osd_req->r_result < 0)
+		obj_request->result = osd_req->r_result;
+	obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version);
+
+	WARN_ON(osd_req->r_num_ops != 1);	/* For now */
+
+	/*
+	 * We support a 64-bit length, but ultimately it has to be
+	 * passed to blk_end_request(), which takes an unsigned int.
+	 */
+	obj_request->xferred = osd_req->r_reply_op_len[0];
+	rbd_assert(obj_request->xferred < (u64) UINT_MAX);
+	opcode = osd_req->r_request_ops[0].op;
+	switch (opcode) {
+	case CEPH_OSD_OP_READ:
+		rbd_osd_read_callback(obj_request);
+		break;
+	case CEPH_OSD_OP_WRITE:
+		rbd_osd_write_callback(obj_request);
+		break;
+	case CEPH_OSD_OP_STAT:
+		rbd_osd_stat_callback(obj_request);
+		break;
+	case CEPH_OSD_OP_CALL:
+	case CEPH_OSD_OP_NOTIFY_ACK:
+	case CEPH_OSD_OP_WATCH:
+		rbd_osd_trivial_callback(obj_request);
+		break;
+	default:
+		rbd_warn(NULL, "%s: unsupported op %hu\n",
+			obj_request->object_name, (unsigned short) opcode);
+		break;
 	}
 
-	if (coll) {
-		req_data->coll = coll;
-		req_data->coll_index = coll_index;
+	if (obj_request_done_test(obj_request))
+		rbd_obj_request_complete(obj_request);
+}
+
+static struct ceph_osd_request *rbd_osd_req_create(
+					struct rbd_device *rbd_dev,
+					bool write_request,
+					struct rbd_obj_request *obj_request,
+					struct ceph_osd_req_op *op)
+{
+	struct rbd_img_request *img_request = obj_request->img_request;
+	struct ceph_snap_context *snapc = NULL;
+	struct ceph_osd_client *osdc;
+	struct ceph_osd_request *osd_req;
+	struct timespec now;
+	struct timespec *mtime;
+	u64 snap_id = CEPH_NOSNAP;
+	u64 offset = obj_request->offset;
+	u64 length = obj_request->length;
+
+	if (img_request) {
+		rbd_assert(img_request->write_request == write_request);
+		if (img_request->write_request)
+			snapc = img_request->snapc;
+		else
+			snap_id = img_request->snap_id;
 	}
 
-	dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n",
-		object_name, (unsigned long long) ofs,
-		(unsigned long long) len, coll, coll_index);
+	/* Allocate and initialize the request, for the single op */
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
-	req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
-					false, GFP_NOIO, pages, bio);
-	if (!req) {
-		ret = -ENOMEM;
-		goto done_pages;
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC);
+	if (!osd_req)
+		return NULL;	/* ENOMEM */
+
+	rbd_assert(obj_request_type_valid(obj_request->type));
+	switch (obj_request->type) {
+	case OBJ_REQUEST_NODATA:
+		break;		/* Nothing to do */
+	case OBJ_REQUEST_BIO:
+		rbd_assert(obj_request->bio_list != NULL);
+		osd_req->r_bio = obj_request->bio_list;
+		break;
+	case OBJ_REQUEST_PAGES:
+		osd_req->r_pages = obj_request->pages;
+		osd_req->r_num_pages = obj_request->page_count;
+		osd_req->r_page_alignment = offset & ~PAGE_MASK;
+		break;
 	}
 
-	req->r_callback = rbd_cb;
+	if (write_request) {
+		osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+		now = CURRENT_TIME;
+		mtime = &now;
+	} else {
+		osd_req->r_flags = CEPH_OSD_FLAG_READ;
+		mtime = NULL;	/* not needed for reads */
+		offset = 0;	/* These are not used... */
+		length = 0;	/* ...for osd read requests */
+	}
 
-	req_data->rq = rq;
-	req_data->bio = bio;
-	req_data->pages = pages;
-	req_data->len = len;
+	osd_req->r_callback = rbd_osd_req_callback;
+	osd_req->r_priv = obj_request;
 
-	req->r_priv = req_data;
+	osd_req->r_oid_len = strlen(obj_request->object_name);
+	rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid));
+	memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len);
 
-	reqhead = req->r_request->front.iov_base;
-	reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
+	osd_req->r_file_layout = rbd_dev->layout;	/* struct */
 
-	strncpy(req->r_oid, object_name, sizeof(req->r_oid));
-	req->r_oid_len = strlen(req->r_oid);
+	/* osd_req will get its own reference to snapc (if non-null) */
 
-	layout = &req->r_file_layout;
-	memset(layout, 0, sizeof(*layout));
-	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-	layout->fl_stripe_count = cpu_to_le32(1);
-	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-	layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id);
-	ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
-				   req, ops);
-	rbd_assert(ret == 0);
+	ceph_osdc_build_request(osd_req, offset, length, 1, op,
+				snapc, snap_id, mtime);
 
-	ceph_osdc_build_request(req, ofs, &len,
-				ops,
-				snapc,
-				&mtime,
-				req->r_oid, req->r_oid_len);
+	return osd_req;
+}
 
-	if (linger_req) {
-		ceph_osdc_set_request_linger(osdc, req);
-		*linger_req = req;
-	}
+static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
+{
+	ceph_osdc_put_request(osd_req);
+}
 
-	ret = ceph_osdc_start_request(osdc, req, false);
-	if (ret < 0)
-		goto done_err;
-
-	if (!rbd_cb) {
-		ret = ceph_osdc_wait_request(osdc, req);
-		if (ver)
-			*ver = le64_to_cpu(req->r_reassert_version.version);
-		dout("reassert_ver=%llu\n",
-			(unsigned long long)
-				le64_to_cpu(req->r_reassert_version.version));
-		ceph_osdc_put_request(req);
+/* object_name is assumed to be a non-null pointer and NUL-terminated */
+
+static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
+						u64 offset, u64 length,
+						enum obj_request_type type)
+{
+	struct rbd_obj_request *obj_request;
+	size_t size;
+	char *name;
+
+	rbd_assert(obj_request_type_valid(type));
+
+	size = strlen(object_name) + 1;
+	obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL);
+	if (!obj_request)
+		return NULL;
+
+	name = (char *)(obj_request + 1);
+	obj_request->object_name = memcpy(name, object_name, size);
+	obj_request->offset = offset;
+	obj_request->length = length;
+	obj_request->which = BAD_WHICH;
+	obj_request->type = type;
+	INIT_LIST_HEAD(&obj_request->links);
+	obj_request_done_init(obj_request);
+	init_completion(&obj_request->completion);
+	kref_init(&obj_request->kref);
+
+	dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name,
+		offset, length, (int)type, obj_request);
+
+	return obj_request;
+}
+
+static void rbd_obj_request_destroy(struct kref *kref)
+{
+	struct rbd_obj_request *obj_request;
+
+	obj_request = container_of(kref, struct rbd_obj_request, kref);
+
+	dout("%s: obj %p\n", __func__, obj_request);
+
+	rbd_assert(obj_request->img_request == NULL);
+	rbd_assert(obj_request->which == BAD_WHICH);
+
+	if (obj_request->osd_req)
+		rbd_osd_req_destroy(obj_request->osd_req);
+
+	rbd_assert(obj_request_type_valid(obj_request->type));
+	switch (obj_request->type) {
+	case OBJ_REQUEST_NODATA:
+		break;		/* Nothing to do */
+	case OBJ_REQUEST_BIO:
+		if (obj_request->bio_list)
+			bio_chain_put(obj_request->bio_list);
+		break;
+	case OBJ_REQUEST_PAGES:
+		if (obj_request->pages)
+			ceph_release_page_vector(obj_request->pages,
+						obj_request->page_count);
+		break;
 	}
-	return ret;
 
-done_err:
-	bio_chain_put(req_data->bio);
-	ceph_osdc_put_request(req);
-done_pages:
-	rbd_coll_end_req(req_data, ret, len);
-	kfree(req_data);
-	return ret;
+	kfree(obj_request);
 }
 
 /*
- * Ceph osd op callback
+ * Caller is responsible for filling in the list of object requests
+ * that comprises the image request, and the Linux request pointer
+ * (if there is one).
  */
-static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
-{
-	struct rbd_request *req_data = req->r_priv;
-	struct ceph_osd_reply_head *replyhead;
-	struct ceph_osd_op *op;
-	__s32 rc;
-	u64 bytes;
-	int read_op;
-
-	/* parse reply */
-	replyhead = msg->front.iov_base;
-	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-	op = (void *)(replyhead + 1);
-	rc = le32_to_cpu(replyhead->result);
-	bytes = le64_to_cpu(op->extent.length);
-	read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);
-
-	dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n",
-		(unsigned long long) bytes, read_op, (int) rc);
-
-	if (rc == -ENOENT && read_op) {
-		zero_bio_chain(req_data->bio, 0);
-		rc = 0;
-	} else if (rc == 0 && read_op && bytes < req_data->len) {
-		zero_bio_chain(req_data->bio, bytes);
-		bytes = req_data->len;
-	}
+static struct rbd_img_request *rbd_img_request_create(
+					struct rbd_device *rbd_dev,
+					u64 offset, u64 length,
+					bool write_request)
+{
+	struct rbd_img_request *img_request;
+	struct ceph_snap_context *snapc = NULL;
 
-	rbd_coll_end_req(req_data, rc, bytes);
+	img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC);
+	if (!img_request)
+		return NULL;
 
-	if (req_data->bio)
-		bio_chain_put(req_data->bio);
+	if (write_request) {
+		down_read(&rbd_dev->header_rwsem);
+		snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+		up_read(&rbd_dev->header_rwsem);
+		if (WARN_ON(!snapc)) {
+			kfree(img_request);
+			return NULL;	/* Shouldn't happen */
+		}
+	}
 
-	ceph_osdc_put_request(req);
-	kfree(req_data);
+	img_request->rq = NULL;
+	img_request->rbd_dev = rbd_dev;
+	img_request->offset = offset;
+	img_request->length = length;
+	img_request->write_request = write_request;
+	if (write_request)
+		img_request->snapc = snapc;
+	else
+		img_request->snap_id = rbd_dev->spec->snap_id;
+	spin_lock_init(&img_request->completion_lock);
+	img_request->next_completion = 0;
+	img_request->callback = NULL;
+	img_request->obj_request_count = 0;
+	INIT_LIST_HEAD(&img_request->obj_requests);
+	kref_init(&img_request->kref);
+
+	rbd_img_request_get(img_request);	/* Avoid a warning */
+	rbd_img_request_put(img_request);	/* TEMPORARY */
+
+	dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
+		write_request ? "write" : "read", offset, length,
+		img_request);
+
+	return img_request;
 }
 
-static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
+static void rbd_img_request_destroy(struct kref *kref)
 {
-	ceph_osdc_put_request(req);
+	struct rbd_img_request *img_request;
+	struct rbd_obj_request *obj_request;
+	struct rbd_obj_request *next_obj_request;
+
+	img_request = container_of(kref, struct rbd_img_request, kref);
+
+	dout("%s: img %p\n", __func__, img_request);
+
+	for_each_obj_request_safe(img_request, obj_request, next_obj_request)
+		rbd_img_obj_request_del(img_request, obj_request);
+	rbd_assert(img_request->obj_request_count == 0);
+
+	if (img_request->write_request)
+		ceph_put_snap_context(img_request->snapc);
+
+	kfree(img_request);
 }
 
-/*
- * Do a synchronous ceph osd operation
- */
-static int rbd_req_sync_op(struct rbd_device *rbd_dev,
-			   struct ceph_snap_context *snapc,
-			   u64 snapid,
-			   int flags,
-			   struct ceph_osd_req_op *ops,
-			   const char *object_name,
-			   u64 ofs, u64 inbound_size,
-			   char *inbound,
-			   struct ceph_osd_request **linger_req,
-			   u64 *ver)
+static int rbd_img_request_fill_bio(struct rbd_img_request *img_request,
+					struct bio *bio_list)
 {
-	int ret;
-	struct page **pages;
-	int num_pages;
-
-	rbd_assert(ops != NULL);
+	struct rbd_device *rbd_dev = img_request->rbd_dev;
+	struct rbd_obj_request *obj_request = NULL;
+	struct rbd_obj_request *next_obj_request;
+	unsigned int bio_offset;
+	u64 image_offset;
+	u64 resid;
+	u16 opcode;
+
+	dout("%s: img %p bio %p\n", __func__, img_request, bio_list);
+
+	opcode = img_request->write_request ? CEPH_OSD_OP_WRITE
+					      : CEPH_OSD_OP_READ;
+	bio_offset = 0;
+	image_offset = img_request->offset;
+	rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT);
+	resid = img_request->length;
+	rbd_assert(resid > 0);
+	while (resid) {
+		const char *object_name;
+		unsigned int clone_size;
+		struct ceph_osd_req_op *op;
+		u64 offset;
+		u64 length;
+
+		object_name = rbd_segment_name(rbd_dev, image_offset);
+		if (!object_name)
+			goto out_unwind;
+		offset = rbd_segment_offset(rbd_dev, image_offset);
+		length = rbd_segment_length(rbd_dev, image_offset, resid);
+		obj_request = rbd_obj_request_create(object_name,
+						offset, length,
+						OBJ_REQUEST_BIO);
+		kfree(object_name);	/* object request has its own copy */
+		if (!obj_request)
+			goto out_unwind;
+
+		rbd_assert(length <= (u64) UINT_MAX);
+		clone_size = (unsigned int) length;
+		obj_request->bio_list = bio_chain_clone_range(&bio_list,
+						&bio_offset, clone_size,
+						GFP_ATOMIC);
+		if (!obj_request->bio_list)
+			goto out_partial;
 
-	num_pages = calc_pages_for(ofs, inbound_size);
-	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
+		/*
+		 * Build up the op to use in building the osd
+		 * request.  Note that the contents of the op are
+		 * copied by rbd_osd_req_create().
+		 */
+		op = rbd_osd_req_op_create(opcode, offset, length);
+		if (!op)
+			goto out_partial;
+		obj_request->osd_req = rbd_osd_req_create(rbd_dev,
+						img_request->write_request,
+						obj_request, op);
+		rbd_osd_req_op_destroy(op);
+		if (!obj_request->osd_req)
+			goto out_partial;
+		/* status and version are initially zero-filled */
+
+		rbd_img_obj_request_add(img_request, obj_request);
+
+		image_offset += length;
+		resid -= length;
+	}
 
-	ret = rbd_do_request(NULL, rbd_dev, snapc, snapid,
-			  object_name, ofs, inbound_size, NULL,
-			  pages, num_pages,
-			  flags,
-			  ops,
-			  NULL, 0,
-			  NULL,
-			  linger_req, ver);
-	if (ret < 0)
-		goto done;
+	return 0;
 
-	if ((flags & CEPH_OSD_FLAG_READ) && inbound)
-		ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret);
+out_partial:
+	rbd_obj_request_put(obj_request);
+out_unwind:
+	for_each_obj_request_safe(img_request, obj_request, next_obj_request)
+		rbd_obj_request_put(obj_request);
 
-done:
-	ceph_release_page_vector(pages, num_pages);
-	return ret;
+	return -ENOMEM;
 }
 
-/*
- * Do an asynchronous ceph osd operation
- */
-static int rbd_do_op(struct request *rq,
-		     struct rbd_device *rbd_dev,
-		     struct ceph_snap_context *snapc,
-		     u64 ofs, u64 len,
-		     struct bio *bio,
-		     struct rbd_req_coll *coll,
-		     int coll_index)
-{
-	char *seg_name;
-	u64 seg_ofs;
-	u64 seg_len;
-	int ret;
-	struct ceph_osd_req_op *ops;
-	u32 payload_len;
-	int opcode;
-	int flags;
-	u64 snapid;
-
-	seg_name = rbd_segment_name(rbd_dev, ofs);
-	if (!seg_name)
-		return -ENOMEM;
-	seg_len = rbd_segment_length(rbd_dev, ofs, len);
-	seg_ofs = rbd_segment_offset(rbd_dev, ofs);
-
-	if (rq_data_dir(rq) == WRITE) {
-		opcode = CEPH_OSD_OP_WRITE;
-		flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK;
-		snapid = CEPH_NOSNAP;
-		payload_len = seg_len;
-	} else {
-		opcode = CEPH_OSD_OP_READ;
-		flags = CEPH_OSD_FLAG_READ;
-		snapc = NULL;
-		snapid = rbd_dev->spec->snap_id;
-		payload_len = 0;
+static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	u32 which = obj_request->which;
+	bool more = true;
+
+	img_request = obj_request->img_request;
+
+	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
+	rbd_assert(img_request != NULL);
+	rbd_assert(img_request->rq != NULL);
+	rbd_assert(img_request->obj_request_count > 0);
+	rbd_assert(which != BAD_WHICH);
+	rbd_assert(which < img_request->obj_request_count);
+	rbd_assert(which >= img_request->next_completion);
+
+	spin_lock_irq(&img_request->completion_lock);
+	if (which != img_request->next_completion)
+		goto out;
+
+	for_each_obj_request_from(img_request, obj_request) {
+		unsigned int xferred;
+		int result;
+
+		rbd_assert(more);
+		rbd_assert(which < img_request->obj_request_count);
+
+		if (!obj_request_done_test(obj_request))
+			break;
+
+		rbd_assert(obj_request->xferred <= (u64) UINT_MAX);
+		xferred = (unsigned int) obj_request->xferred;
+		result = (int) obj_request->result;
+		if (result)
+			rbd_warn(NULL, "obj_request %s result %d xferred %u\n",
+				img_request->write_request ? "write" : "read",
+				result, xferred);
+
+		more = blk_end_request(img_request->rq, result, xferred);
+		which++;
 	}
 
-	ret = -ENOMEM;
-	ops = rbd_create_rw_ops(1, opcode, payload_len);
-	if (!ops)
-		goto done;
+	rbd_assert(more ^ (which == img_request->obj_request_count));
+	img_request->next_completion = which;
+out:
+	spin_unlock_irq(&img_request->completion_lock);
 
-	/* we've taken care of segment sizes earlier when we
-	   cloned the bios. We should never have a segment
-	   truncated at this point */
-	rbd_assert(seg_len == len);
-
-	ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
-			     seg_name, seg_ofs, seg_len,
-			     bio,
-			     NULL, 0,
-			     flags,
-			     ops,
-			     coll, coll_index,
-			     rbd_req_cb, 0, NULL);
-
-	rbd_destroy_ops(ops);
-done:
-	kfree(seg_name);
-	return ret;
+	if (!more)
+		rbd_img_request_complete(img_request);
 }
 
-/*
- * Request sync osd read
- */
-static int rbd_req_sync_read(struct rbd_device *rbd_dev,
-			  u64 snapid,
-			  const char *object_name,
-			  u64 ofs, u64 len,
-			  char *buf,
-			  u64 *ver)
-{
-	struct ceph_osd_req_op *ops;
-	int ret;
+static int rbd_img_request_submit(struct rbd_img_request *img_request)
+{
+	struct rbd_device *rbd_dev = img_request->rbd_dev;
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct rbd_obj_request *obj_request;
 
-	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0);
-	if (!ops)
-		return -ENOMEM;
+	dout("%s: img %p\n", __func__, img_request);
+	for_each_obj_request(img_request, obj_request) {
+		int ret;
 
-	ret = rbd_req_sync_op(rbd_dev, NULL,
-			       snapid,
-			       CEPH_OSD_FLAG_READ,
-			       ops, object_name, ofs, len, buf, NULL, ver);
-	rbd_destroy_ops(ops);
+		obj_request->callback = rbd_img_obj_callback;
+		ret = rbd_obj_request_submit(osdc, obj_request);
+		if (ret)
+			return ret;
+		/*
+		 * The image request has its own reference to each
+		 * of its object requests, so we can safely drop the
+		 * initial one here.
+		 */
+		rbd_obj_request_put(obj_request);
+	}
 
-	return ret;
+	return 0;
 }
 
-/*
- * Request sync osd watch
- */
-static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev,
-				   u64 ver,
-				   u64 notify_id)
+static int rbd_obj_notify_ack(struct rbd_device *rbd_dev,
+				   u64 ver, u64 notify_id)
 {
-	struct ceph_osd_req_op *ops;
+	struct rbd_obj_request *obj_request;
+	struct ceph_osd_req_op *op;
+	struct ceph_osd_client *osdc;
 	int ret;
 
-	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0);
-	if (!ops)
+	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+							OBJ_REQUEST_NODATA);
+	if (!obj_request)
 		return -ENOMEM;
 
-	ops[0].watch.ver = cpu_to_le64(ver);
-	ops[0].watch.cookie = notify_id;
-	ops[0].watch.flag = 0;
+	ret = -ENOMEM;
+	op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver);
+	if (!op)
+		goto out;
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
+						obj_request, op);
+	rbd_osd_req_op_destroy(op);
+	if (!obj_request->osd_req)
+		goto out;
 
-	ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP,
-			  rbd_dev->header_name, 0, 0, NULL,
-			  NULL, 0,
-			  CEPH_OSD_FLAG_READ,
-			  ops,
-			  NULL, 0,
-			  rbd_simple_req_cb, 0, NULL);
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	obj_request->callback = rbd_obj_request_put;
+	ret = rbd_obj_request_submit(osdc, obj_request);
+out:
+	if (ret)
+		rbd_obj_request_put(obj_request);
 
-	rbd_destroy_ops(ops);
 	return ret;
 }
 
@@ -1381,95 +1791,103 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
 	if (!rbd_dev)
 		return;
 
-	dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n",
+	dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__,
 		rbd_dev->header_name, (unsigned long long) notify_id,
 		(unsigned int) opcode);
 	rc = rbd_dev_refresh(rbd_dev, &hver);
 	if (rc)
-		pr_warning(RBD_DRV_NAME "%d got notification but failed to "
-			   " update snaps: %d\n", rbd_dev->major, rc);
+		rbd_warn(rbd_dev, "got notification but failed to "
+			   " update snaps: %d\n", rc);
 
-	rbd_req_sync_notify_ack(rbd_dev, hver, notify_id);
+	rbd_obj_notify_ack(rbd_dev, hver, notify_id);
 }
 
 /*
- * Request sync osd watch
+ * Request sync osd watch/unwatch.  The value of "start" determines
+ * whether a watch request is being initiated or torn down.
  */
-static int rbd_req_sync_watch(struct rbd_device *rbd_dev)
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start)
 {
-	struct ceph_osd_req_op *ops;
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct rbd_obj_request *obj_request;
+	struct ceph_osd_req_op *op;
 	int ret;
 
-	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
-	if (!ops)
-		return -ENOMEM;
+	rbd_assert(start ^ !!rbd_dev->watch_event);
+	rbd_assert(start ^ !!rbd_dev->watch_request);
 
-	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
-				     (void *)rbd_dev, &rbd_dev->watch_event);
-	if (ret < 0)
-		goto fail;
+	if (start) {
+		ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
+						&rbd_dev->watch_event);
+		if (ret < 0)
+			return ret;
+		rbd_assert(rbd_dev->watch_event != NULL);
+	}
 
-	ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version);
-	ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
-	ops[0].watch.flag = 1;
+	ret = -ENOMEM;
+	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+							OBJ_REQUEST_NODATA);
+	if (!obj_request)
+		goto out_cancel;
+
+	op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH,
+				rbd_dev->watch_event->cookie,
+				rbd_dev->header.obj_version, start);
+	if (!op)
+		goto out_cancel;
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true,
+							obj_request, op);
+	rbd_osd_req_op_destroy(op);
+	if (!obj_request->osd_req)
+		goto out_cancel;
+
+	if (start)
+		ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
+	else
+		ceph_osdc_unregister_linger_request(osdc,
+					rbd_dev->watch_request->osd_req);
+	ret = rbd_obj_request_submit(osdc, obj_request);
+	if (ret)
+		goto out_cancel;
+	ret = rbd_obj_request_wait(obj_request);
+	if (ret)
+		goto out_cancel;
+	ret = obj_request->result;
+	if (ret)
+		goto out_cancel;
 
-	ret = rbd_req_sync_op(rbd_dev, NULL,
-			      CEPH_NOSNAP,
-			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-			      ops,
-			      rbd_dev->header_name,
-			      0, 0, NULL,
-			      &rbd_dev->watch_request, NULL);
+	/*
+	 * A watch request is set to linger, so the underlying osd
+	 * request won't go away until we unregister it.  We retain
+	 * a pointer to the object request during that time (in
+	 * rbd_dev->watch_request), so we'll keep a reference to
+	 * it.  We'll drop that reference (below) after we've
+	 * unregistered it.
+	 */
+	if (start) {
+		rbd_dev->watch_request = obj_request;
 
-	if (ret < 0)
-		goto fail_event;
+		return 0;
+	}
 
-	rbd_destroy_ops(ops);
-	return 0;
+	/* We have successfully torn down the watch request */
 
-fail_event:
+	rbd_obj_request_put(rbd_dev->watch_request);
+	rbd_dev->watch_request = NULL;
+out_cancel:
+	/* Cancel the event if we're tearing down, or on error */
 	ceph_osdc_cancel_event(rbd_dev->watch_event);
 	rbd_dev->watch_event = NULL;
-fail:
-	rbd_destroy_ops(ops);
-	return ret;
-}
+	if (obj_request)
+		rbd_obj_request_put(obj_request);
 
-/*
- * Request sync osd unwatch
- */
-static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev)
-{
-	struct ceph_osd_req_op *ops;
-	int ret;
-
-	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
-	if (!ops)
-		return -ENOMEM;
-
-	ops[0].watch.ver = 0;
-	ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
-	ops[0].watch.flag = 0;
-
-	ret = rbd_req_sync_op(rbd_dev, NULL,
-			      CEPH_NOSNAP,
-			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-			      ops,
-			      rbd_dev->header_name,
-			      0, 0, NULL, NULL, NULL);
-
-
-	rbd_destroy_ops(ops);
-	ceph_osdc_cancel_event(rbd_dev->watch_event);
-	rbd_dev->watch_event = NULL;
 	return ret;
 }
 
 /*
  * Synchronous osd object method call
  */
-static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
+static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
 			     const char *object_name,
 			     const char *class_name,
 			     const char *method_name,
@@ -1477,169 +1895,154 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
 			     size_t outbound_size,
 			     char *inbound,
 			     size_t inbound_size,
-			     int flags,
-			     u64 *ver)
+			     u64 *version)
 {
-	struct ceph_osd_req_op *ops;
-	int class_name_len = strlen(class_name);
-	int method_name_len = strlen(method_name);
-	int payload_size;
+	struct rbd_obj_request *obj_request;
+	struct ceph_osd_client *osdc;
+	struct ceph_osd_req_op *op;
+	struct page **pages;
+	u32 page_count;
 	int ret;
 
 	/*
-	 * Any input parameters required by the method we're calling
-	 * will be sent along with the class and method names as
-	 * part of the message payload.  That data and its size are
-	 * supplied via the indata and indata_len fields (named from
-	 * the perspective of the server side) in the OSD request
-	 * operation.
+	 * Method calls are ultimately read operations but they
+	 * don't involve object data (so no offset or length).
+	 * The result should placed into the inbound buffer
+	 * provided.  They also supply outbound data--parameters for
+	 * the object method.  Currently if this is present it will
+	 * be a snapshot id.
 	 */
-	payload_size = class_name_len + method_name_len + outbound_size;
-	ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size);
-	if (!ops)
-		return -ENOMEM;
+	page_count = (u32) calc_pages_for(0, inbound_size);
+	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
 
-	ops[0].cls.class_name = class_name;
-	ops[0].cls.class_len = (__u8) class_name_len;
-	ops[0].cls.method_name = method_name;
-	ops[0].cls.method_len = (__u8) method_name_len;
-	ops[0].cls.argc = 0;
-	ops[0].cls.indata = outbound;
-	ops[0].cls.indata_len = outbound_size;
+	ret = -ENOMEM;
+	obj_request = rbd_obj_request_create(object_name, 0, 0,
+							OBJ_REQUEST_PAGES);
+	if (!obj_request)
+		goto out;
 
-	ret = rbd_req_sync_op(rbd_dev, NULL,
-			       CEPH_NOSNAP,
-			       flags, ops,
-			       object_name, 0, inbound_size, inbound,
-			       NULL, ver);
+	obj_request->pages = pages;
+	obj_request->page_count = page_count;
 
-	rbd_destroy_ops(ops);
+	op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name,
+					method_name, outbound, outbound_size);
+	if (!op)
+		goto out;
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
+						obj_request, op);
+	rbd_osd_req_op_destroy(op);
+	if (!obj_request->osd_req)
+		goto out;
 
-	dout("cls_exec returned %d\n", ret);
-	return ret;
-}
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	ret = rbd_obj_request_submit(osdc, obj_request);
+	if (ret)
+		goto out;
+	ret = rbd_obj_request_wait(obj_request);
+	if (ret)
+		goto out;
 
-static struct rbd_req_coll *rbd_alloc_coll(int num_reqs)
-{
-	struct rbd_req_coll *coll =
-			kzalloc(sizeof(struct rbd_req_coll) +
-			        sizeof(struct rbd_req_status) * num_reqs,
-				GFP_ATOMIC);
+	ret = obj_request->result;
+	if (ret < 0)
+		goto out;
+	ret = 0;
+	ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
+	if (version)
+		*version = obj_request->version;
+out:
+	if (obj_request)
+		rbd_obj_request_put(obj_request);
+	else
+		ceph_release_page_vector(pages, page_count);
 
-	if (!coll)
-		return NULL;
-	coll->total = num_reqs;
-	kref_init(&coll->kref);
-	return coll;
+	return ret;
 }
 
-/*
- * block device queue callback
- */
-static void rbd_rq_fn(struct request_queue *q)
+static void rbd_request_fn(struct request_queue *q)
+		__releases(q->queue_lock) __acquires(q->queue_lock)
 {
 	struct rbd_device *rbd_dev = q->queuedata;
+	bool read_only = rbd_dev->mapping.read_only;
 	struct request *rq;
+	int result;
 
 	while ((rq = blk_fetch_request(q))) {
-		struct bio *bio;
-		bool do_write;
-		unsigned int size;
-		u64 ofs;
-		int num_segs, cur_seg = 0;
-		struct rbd_req_coll *coll;
-		struct ceph_snap_context *snapc;
-		unsigned int bio_offset;
-
-		dout("fetched request\n");
-
-		/* filter out block requests we don't understand */
-		if ((rq->cmd_type != REQ_TYPE_FS)) {
-			__blk_end_request_all(rq, 0);
-			continue;
-		}
+		bool write_request = rq_data_dir(rq) == WRITE;
+		struct rbd_img_request *img_request;
+		u64 offset;
+		u64 length;
+
+		/* Ignore any non-FS requests that filter through. */
 
-		/* deduce our operation (read, write) */
-		do_write = (rq_data_dir(rq) == WRITE);
-		if (do_write && rbd_dev->mapping.read_only) {
-			__blk_end_request_all(rq, -EROFS);
+		if (rq->cmd_type != REQ_TYPE_FS) {
+			dout("%s: non-fs request type %d\n", __func__,
+				(int) rq->cmd_type);
+			__blk_end_request_all(rq, 0);
 			continue;
 		}
 
-		spin_unlock_irq(q->queue_lock);
+		/* Ignore/skip any zero-length requests */
 
-		down_read(&rbd_dev->header_rwsem);
+		offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT;
+		length = (u64) blk_rq_bytes(rq);
 
-		if (!rbd_dev->exists) {
-			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
-			up_read(&rbd_dev->header_rwsem);
-			dout("request for non-existent snapshot");
-			spin_lock_irq(q->queue_lock);
-			__blk_end_request_all(rq, -ENXIO);
+		if (!length) {
+			dout("%s: zero-length request\n", __func__);
+			__blk_end_request_all(rq, 0);
 			continue;
 		}
 
-		snapc = ceph_get_snap_context(rbd_dev->header.snapc);
-
-		up_read(&rbd_dev->header_rwsem);
-
-		size = blk_rq_bytes(rq);
-		ofs = blk_rq_pos(rq) * SECTOR_SIZE;
-		bio = rq->bio;
+		spin_unlock_irq(q->queue_lock);
 
-		dout("%s 0x%x bytes at 0x%llx\n",
-		     do_write ? "write" : "read",
-		     size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE);
+		/* Disallow writes to a read-only device */
 
-		num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
-		if (num_segs <= 0) {
-			spin_lock_irq(q->queue_lock);
-			__blk_end_request_all(rq, num_segs);
-			ceph_put_snap_context(snapc);
-			continue;
+		if (write_request) {
+			result = -EROFS;
+			if (read_only)
+				goto end_request;
+			rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
 		}
-		coll = rbd_alloc_coll(num_segs);
-		if (!coll) {
-			spin_lock_irq(q->queue_lock);
-			__blk_end_request_all(rq, -ENOMEM);
-			ceph_put_snap_context(snapc);
-			continue;
-		}
-
-		bio_offset = 0;
-		do {
-			u64 limit = rbd_segment_length(rbd_dev, ofs, size);
-			unsigned int chain_size;
-			struct bio *bio_chain;
-
-			BUG_ON(limit > (u64) UINT_MAX);
-			chain_size = (unsigned int) limit;
-			dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt);
 
-			kref_get(&coll->kref);
+		/*
+		 * Quit early if the mapped snapshot no longer
+		 * exists.  It's still possible the snapshot will
+		 * have disappeared by the time our request arrives
+		 * at the osd, but there's no sense in sending it if
+		 * we already know.
+		 */
+		if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
+			dout("request for non-existent snapshot");
+			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
+			result = -ENXIO;
+			goto end_request;
+		}
 
-			/* Pass a cloned bio chain via an osd request */
+		result = -EINVAL;
+		if (WARN_ON(offset && length > U64_MAX - offset + 1))
+			goto end_request;	/* Shouldn't happen */
 
-			bio_chain = bio_chain_clone_range(&bio,
-						&bio_offset, chain_size,
-						GFP_ATOMIC);
-			if (bio_chain)
-				(void) rbd_do_op(rq, rbd_dev, snapc,
-						ofs, chain_size,
-						bio_chain, coll, cur_seg);
-			else
-				rbd_coll_end_req_index(rq, coll, cur_seg,
-						       -ENOMEM, chain_size);
-			size -= chain_size;
-			ofs += chain_size;
+		result = -ENOMEM;
+		img_request = rbd_img_request_create(rbd_dev, offset, length,
+							write_request);
+		if (!img_request)
+			goto end_request;
 
-			cur_seg++;
-		} while (size > 0);
-		kref_put(&coll->kref, rbd_coll_release);
+		img_request->rq = rq;
 
+		result = rbd_img_request_fill_bio(img_request, rq->bio);
+		if (!result)
+			result = rbd_img_request_submit(img_request);
+		if (result)
+			rbd_img_request_put(img_request);
+end_request:
 		spin_lock_irq(q->queue_lock);
-
-		ceph_put_snap_context(snapc);
+		if (result < 0) {
+			rbd_warn(rbd_dev, "obj_request %s result %d\n",
+				write_request ? "write" : "read", result);
+			__blk_end_request_all(rq, result);
+		}
 	}
 }
 
@@ -1703,6 +2106,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
 	put_disk(disk);
 }
 
+static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
+				const char *object_name,
+				u64 offset, u64 length,
+				char *buf, u64 *version)
+
+{
+	struct ceph_osd_req_op *op;
+	struct rbd_obj_request *obj_request;
+	struct ceph_osd_client *osdc;
+	struct page **pages = NULL;
+	u32 page_count;
+	size_t size;
+	int ret;
+
+	page_count = (u32) calc_pages_for(offset, length);
+	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+	if (IS_ERR(pages))
+		ret = PTR_ERR(pages);
+
+	ret = -ENOMEM;
+	obj_request = rbd_obj_request_create(object_name, offset, length,
+							OBJ_REQUEST_PAGES);
+	if (!obj_request)
+		goto out;
+
+	obj_request->pages = pages;
+	obj_request->page_count = page_count;
+
+	op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length);
+	if (!op)
+		goto out;
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
+						obj_request, op);
+	rbd_osd_req_op_destroy(op);
+	if (!obj_request->osd_req)
+		goto out;
+
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	ret = rbd_obj_request_submit(osdc, obj_request);
+	if (ret)
+		goto out;
+	ret = rbd_obj_request_wait(obj_request);
+	if (ret)
+		goto out;
+
+	ret = obj_request->result;
+	if (ret < 0)
+		goto out;
+
+	rbd_assert(obj_request->xferred <= (u64) SIZE_MAX);
+	size = (size_t) obj_request->xferred;
+	ceph_copy_from_page_vector(pages, buf, 0, size);
+	rbd_assert(size <= (size_t) INT_MAX);
+	ret = (int) size;
+	if (version)
+		*version = obj_request->version;
+out:
+	if (obj_request)
+		rbd_obj_request_put(obj_request);
+	else
+		ceph_release_page_vector(pages, page_count);
+
+	return ret;
+}
+
 /*
  * Read the complete header for the given rbd device.
  *
@@ -1741,24 +2209,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version)
 		if (!ondisk)
 			return ERR_PTR(-ENOMEM);
 
-		ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP,
-				       rbd_dev->header_name,
+		ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name,
 				       0, size,
 				       (char *) ondisk, version);
-
 		if (ret < 0)
 			goto out_err;
 		if (WARN_ON((size_t) ret < size)) {
 			ret = -ENXIO;
-			pr_warning("short header read for image %s"
-					" (want %zd got %d)\n",
-				rbd_dev->spec->image_name, size, ret);
+			rbd_warn(rbd_dev, "short header read (want %zd got %d)",
+				size, ret);
 			goto out_err;
 		}
 		if (!rbd_dev_ondisk_valid(ondisk)) {
 			ret = -ENXIO;
-			pr_warning("invalid header for image %s\n",
-				rbd_dev->spec->image_name);
+			rbd_warn(rbd_dev, "invalid header");
 			goto out_err;
 		}
 
@@ -1895,8 +2359,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	disk->fops = &rbd_bd_ops;
 	disk->private_data = rbd_dev;
 
-	/* init rq */
-	q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
+	q = blk_init_queue(rbd_request_fn, &rbd_dev->lock);
 	if (!q)
 		goto out_disk;
 
@@ -2233,7 +2696,7 @@ static void rbd_spec_free(struct kref *kref)
 	kfree(spec);
 }
 
-struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
+static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
 				struct rbd_spec *spec)
 {
 	struct rbd_device *rbd_dev;
@@ -2243,6 +2706,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
 		return NULL;
 
 	spin_lock_init(&rbd_dev->lock);
+	rbd_dev->flags = 0;
 	INIT_LIST_HEAD(&rbd_dev->node);
 	INIT_LIST_HEAD(&rbd_dev->snaps);
 	init_rwsem(&rbd_dev->header_rwsem);
@@ -2250,6 +2714,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
 	rbd_dev->spec = spec;
 	rbd_dev->rbd_client = rbdc;
 
+	/* Initialize the layout used for all rbd requests */
+
+	rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+	rbd_dev->layout.fl_stripe_count = cpu_to_le32(1);
+	rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+	rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id);
+
 	return rbd_dev;
 }
 
@@ -2360,12 +2831,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
 		__le64 size;
 	} __attribute__ ((packed)) size_buf = { 0 };
 
-	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_size",
 				(char *) &snapid, sizeof (snapid),
-				(char *) &size_buf, sizeof (size_buf),
-				CEPH_OSD_FLAG_READ, NULL);
-	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+				(char *) &size_buf, sizeof (size_buf), NULL);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		return ret;
 
@@ -2396,15 +2866,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
 	if (!reply_buf)
 		return -ENOMEM;
 
-	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_object_prefix",
 				NULL, 0,
-				reply_buf, RBD_OBJ_PREFIX_LEN_MAX,
-				CEPH_OSD_FLAG_READ, NULL);
-	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+				reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
-	ret = 0;    /* rbd_req_sync_exec() can return positive */
 
 	p = reply_buf;
 	rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
@@ -2435,12 +2903,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
 	u64 incompat;
 	int ret;
 
-	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_features",
 				(char *) &snapid, sizeof (snapid),
 				(char *) &features_buf, sizeof (features_buf),
-				CEPH_OSD_FLAG_READ, NULL);
-	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+				NULL);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		return ret;
 
@@ -2474,7 +2942,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 	void *end;
 	char *image_id;
 	u64 overlap;
-	size_t len = 0;
 	int ret;
 
 	parent_spec = rbd_spec_alloc();
@@ -2492,12 +2959,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 	}
 
 	snapid = cpu_to_le64(CEPH_NOSNAP);
-	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_parent",
 				(char *) &snapid, sizeof (snapid),
-				(char *) reply_buf, size,
-				CEPH_OSD_FLAG_READ, NULL);
-	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+				(char *) reply_buf, size, NULL);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out_err;
 
@@ -2508,13 +2974,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 	if (parent_spec->pool_id == CEPH_NOPOOL)
 		goto out;	/* No parent?  No problem. */
 
-	image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
+	/* The ceph file layout needs to fit pool id in 32 bits */
+
+	ret = -EIO;
+	if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX))
+		goto out;
+
+	image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
 	if (IS_ERR(image_id)) {
 		ret = PTR_ERR(image_id);
 		goto out_err;
 	}
 	parent_spec->image_id = image_id;
-	parent_spec->image_id_len = len;
 	ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err);
 	ceph_decode_64_safe(&p, end, overlap, out_err);
 
@@ -2544,26 +3015,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
 
 	rbd_assert(!rbd_dev->spec->image_name);
 
-	image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len;
+	len = strlen(rbd_dev->spec->image_id);
+	image_id_size = sizeof (__le32) + len;
 	image_id = kmalloc(image_id_size, GFP_KERNEL);
 	if (!image_id)
 		return NULL;
 
 	p = image_id;
 	end = (char *) image_id + image_id_size;
-	ceph_encode_string(&p, end, rbd_dev->spec->image_id,
-				(u32) rbd_dev->spec->image_id_len);
+	ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len);
 
 	size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX;
 	reply_buf = kmalloc(size, GFP_KERNEL);
 	if (!reply_buf)
 		goto out;
 
-	ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY,
+	ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY,
 				"rbd", "dir_get_name",
 				image_id, image_id_size,
-				(char *) reply_buf, size,
-				CEPH_OSD_FLAG_READ, NULL);
+				(char *) reply_buf, size, NULL);
 	if (ret < 0)
 		goto out;
 	p = reply_buf;
@@ -2602,8 +3072,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
 	name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id);
-	if (!name)
-		return -EIO;	/* pool id too large (>= 2^31) */
+	if (!name) {
+		rbd_warn(rbd_dev, "there is no pool with id %llu",
+			rbd_dev->spec->pool_id);	/* Really a BUG() */
+		return -EIO;
+	}
 
 	rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL);
 	if (!rbd_dev->spec->pool_name)
@@ -2612,19 +3085,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
 	/* Fetch the image name; tolerate failure here */
 
 	name = rbd_dev_image_name(rbd_dev);
-	if (name) {
-		rbd_dev->spec->image_name_len = strlen(name);
+	if (name)
 		rbd_dev->spec->image_name = (char *) name;
-	} else {
-		pr_warning(RBD_DRV_NAME "%d "
-			"unable to get image name for image id %s\n",
-			rbd_dev->major, rbd_dev->spec->image_id);
-	}
+	else
+		rbd_warn(rbd_dev, "unable to get image name");
 
 	/* Look up the snapshot name. */
 
 	name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id);
 	if (!name) {
+		rbd_warn(rbd_dev, "no snapshot with id %llu",
+			rbd_dev->spec->snap_id);	/* Really a BUG() */
 		ret = -EIO;
 		goto out_err;
 	}
@@ -2665,12 +3136,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
 	if (!reply_buf)
 		return -ENOMEM;
 
-	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_snapcontext",
 				NULL, 0,
-				reply_buf, size,
-				CEPH_OSD_FLAG_READ, ver);
-	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+				reply_buf, size, ver);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
 
@@ -2735,12 +3205,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
 		return ERR_PTR(-ENOMEM);
 
 	snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]);
-	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_snapshot_name",
 				(char *) &snap_id, sizeof (snap_id),
-				reply_buf, size,
-				CEPH_OSD_FLAG_READ, NULL);
-	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+				reply_buf, size, NULL);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
 
@@ -2766,7 +3235,7 @@ out:
 static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which,
 		u64 *snap_size, u64 *snap_features)
 {
-	__le64 snap_id;
+	u64 snap_id;
 	u8 order;
 	int ret;
 
@@ -2865,10 +3334,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev)
 		if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) {
 			struct list_head *next = links->next;
 
-			/* Existing snapshot not in the new snap context */
-
+			/*
+			 * A previously-existing snapshot is not in
+			 * the new snap context.
+			 *
+			 * If the now missing snapshot is the one the
+			 * image is mapped to, clear its exists flag
+			 * so we can avoid sending any more requests
+			 * to it.
+			 */
 			if (rbd_dev->spec->snap_id == snap->id)
-				rbd_dev->exists = false;
+				clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
 			rbd_remove_snap_dev(snap);
 			dout("%ssnap id %llu has been removed\n",
 				rbd_dev->spec->snap_id == snap->id ?
@@ -2942,7 +3418,7 @@ static int rbd_dev_snaps_register(struct rbd_device *rbd_dev)
 	struct rbd_snap *snap;
 	int ret = 0;
 
-	dout("%s called\n", __func__);
+	dout("%s:\n", __func__);
 	if (WARN_ON(!device_is_registered(&rbd_dev->dev)))
 		return -EIO;
 
@@ -2983,22 +3459,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
 	device_unregister(&rbd_dev->dev);
 }
 
-static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
-{
-	int ret, rc;
-
-	do {
-		ret = rbd_req_sync_watch(rbd_dev);
-		if (ret == -ERANGE) {
-			rc = rbd_dev_refresh(rbd_dev, NULL);
-			if (rc < 0)
-				return rc;
-		}
-	} while (ret == -ERANGE);
-
-	return ret;
-}
-
 static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0);
 
 /*
@@ -3138,11 +3598,9 @@ static inline char *dup_token(const char **buf, size_t *lenp)
 	size_t len;
 
 	len = next_token(buf);
-	dup = kmalloc(len + 1, GFP_KERNEL);
+	dup = kmemdup(*buf, len + 1, GFP_KERNEL);
 	if (!dup)
 		return NULL;
-
-	memcpy(dup, *buf, len);
 	*(dup + len) = '\0';
 	*buf += len;
 
@@ -3210,8 +3668,10 @@ static int rbd_add_parse_args(const char *buf,
 	/* The first four tokens are required */
 
 	len = next_token(&buf);
-	if (!len)
-		return -EINVAL;	/* Missing monitor address(es) */
+	if (!len) {
+		rbd_warn(NULL, "no monitor address(es) provided");
+		return -EINVAL;
+	}
 	mon_addrs = buf;
 	mon_addrs_size = len + 1;
 	buf += len;
@@ -3220,8 +3680,10 @@ static int rbd_add_parse_args(const char *buf,
 	options = dup_token(&buf, NULL);
 	if (!options)
 		return -ENOMEM;
-	if (!*options)
-		goto out_err;	/* Missing options */
+	if (!*options) {
+		rbd_warn(NULL, "no options provided");
+		goto out_err;
+	}
 
 	spec = rbd_spec_alloc();
 	if (!spec)
@@ -3230,14 +3692,18 @@ static int rbd_add_parse_args(const char *buf,
 	spec->pool_name = dup_token(&buf, NULL);
 	if (!spec->pool_name)
 		goto out_mem;
-	if (!*spec->pool_name)
-		goto out_err;	/* Missing pool name */
+	if (!*spec->pool_name) {
+		rbd_warn(NULL, "no pool name provided");
+		goto out_err;
+	}
 
-	spec->image_name = dup_token(&buf, &spec->image_name_len);
+	spec->image_name = dup_token(&buf, NULL);
 	if (!spec->image_name)
 		goto out_mem;
-	if (!*spec->image_name)
-		goto out_err;	/* Missing image name */
+	if (!*spec->image_name) {
+		rbd_warn(NULL, "no image name provided");
+		goto out_err;
+	}
 
 	/*
 	 * Snapshot name is optional; default is to use "-"
@@ -3251,10 +3717,9 @@ static int rbd_add_parse_args(const char *buf,
 		ret = -ENAMETOOLONG;
 		goto out_err;
 	}
-	spec->snap_name = kmalloc(len + 1, GFP_KERNEL);
+	spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL);
 	if (!spec->snap_name)
 		goto out_mem;
-	memcpy(spec->snap_name, buf, len);
 	*(spec->snap_name + len) = '\0';
 
 	/* Initialize all rbd options to the defaults */
@@ -3323,7 +3788,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 	 * First, see if the format 2 image id file exists, and if
 	 * so, get the image's persistent id from it.
 	 */
-	size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len;
+	size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name);
 	object_name = kmalloc(size, GFP_NOIO);
 	if (!object_name)
 		return -ENOMEM;
@@ -3339,21 +3804,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 		goto out;
 	}
 
-	ret = rbd_req_sync_exec(rbd_dev, object_name,
+	ret = rbd_obj_method_sync(rbd_dev, object_name,
 				"rbd", "get_id",
 				NULL, 0,
-				response, RBD_IMAGE_ID_LEN_MAX,
-				CEPH_OSD_FLAG_READ, NULL);
-	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+				response, RBD_IMAGE_ID_LEN_MAX, NULL);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
-	ret = 0;    /* rbd_req_sync_exec() can return positive */
 
 	p = response;
 	rbd_dev->spec->image_id = ceph_extract_encoded_string(&p,
 						p + RBD_IMAGE_ID_LEN_MAX,
-						&rbd_dev->spec->image_id_len,
-						GFP_NOIO);
+						NULL, GFP_NOIO);
 	if (IS_ERR(rbd_dev->spec->image_id)) {
 		ret = PTR_ERR(rbd_dev->spec->image_id);
 		rbd_dev->spec->image_id = NULL;
@@ -3377,11 +3839,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
 	rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL);
 	if (!rbd_dev->spec->image_id)
 		return -ENOMEM;
-	rbd_dev->spec->image_id_len = 0;
 
 	/* Record the header object name for this rbd image. */
 
-	size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX);
+	size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX);
 	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
 	if (!rbd_dev->header_name) {
 		ret = -ENOMEM;
@@ -3427,7 +3888,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
 	 * Image id was filled in by the caller.  Record the header
 	 * object name for this rbd image.
 	 */
-	size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len;
+	size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id);
 	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
 	if (!rbd_dev->header_name)
 		return -ENOMEM;
@@ -3542,7 +4003,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev)
 	if (ret)
 		goto err_out_bus;
 
-	ret = rbd_init_watch_dev(rbd_dev);
+	ret = rbd_dev_header_watch_sync(rbd_dev, 1);
 	if (ret)
 		goto err_out_bus;
 
@@ -3638,6 +4099,13 @@ static ssize_t rbd_add(struct bus_type *bus,
 		goto err_out_client;
 	spec->pool_id = (u64) rc;
 
+	/* The ceph file layout needs to fit pool id in 32 bits */
+
+	if (WARN_ON(spec->pool_id > (u64) U32_MAX)) {
+		rc = -EIO;
+		goto err_out_client;
+	}
+
 	rbd_dev = rbd_dev_create(rbdc, spec);
 	if (!rbd_dev)
 		goto err_out_client;
@@ -3691,15 +4159,8 @@ static void rbd_dev_release(struct device *dev)
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-	if (rbd_dev->watch_request) {
-		struct ceph_client *client = rbd_dev->rbd_client->client;
-
-		ceph_osdc_unregister_linger_request(&client->osdc,
-						    rbd_dev->watch_request);
-	}
 	if (rbd_dev->watch_event)
-		rbd_req_sync_unwatch(rbd_dev);
-
+		rbd_dev_header_watch_sync(rbd_dev, 0);
 
 	/* clean up and free blkdev */
 	rbd_free_disk(rbd_dev);
@@ -3743,10 +4204,14 @@ static ssize_t rbd_remove(struct bus_type *bus,
 		goto done;
 	}
 
-	if (rbd_dev->open_count) {
+	spin_lock_irq(&rbd_dev->lock);
+	if (rbd_dev->open_count)
 		ret = -EBUSY;
+	else
+		set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
+	spin_unlock_irq(&rbd_dev->lock);
+	if (ret < 0)
 		goto done;
-	}
 
 	rbd_remove_all_snaps(rbd_dev);
 	rbd_bus_del_dev(rbd_dev);
@@ -3782,10 +4247,15 @@ static void rbd_sysfs_cleanup(void)
 	device_unregister(&rbd_root_dev);
 }
 
-int __init rbd_init(void)
+static int __init rbd_init(void)
 {
 	int rc;
 
+	if (!libceph_compatible(NULL)) {
+		rbd_warn(NULL, "libceph incompatibility (quitting)");
+
+		return -EINVAL;
+	}
 	rc = rbd_sysfs_init();
 	if (rc)
 		return rc;
@@ -3793,7 +4263,7 @@ int __init rbd_init(void)
 	return 0;
 }
 
-void __exit rbd_exit(void)
+static void __exit rbd_exit(void)
 {
 	rbd_sysfs_cleanup();
 }
diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile
new file mode 100644
index 000000000000..f35cd0b71f7b
--- /dev/null
+++ b/drivers/block/rsxx/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o
+rsxx-y := config.o core.o cregs.o dev.o dma.o
diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c
new file mode 100644
index 000000000000..a295e7e9ee41
--- /dev/null
+++ b/drivers/block/rsxx/config.c
@@ -0,0 +1,213 @@
+/*
+* Filename: config.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/types.h>
+#include <linux/crc32.h>
+#include <linux/swab.h>
+
+#include "rsxx_priv.h"
+#include "rsxx_cfg.h"
+
+static void initialize_config(void *config)
+{
+	struct rsxx_card_cfg *cfg = config;
+
+	cfg->hdr.version = RSXX_CFG_VERSION;
+
+	cfg->data.block_size        = RSXX_HW_BLK_SIZE;
+	cfg->data.stripe_size       = RSXX_HW_BLK_SIZE;
+	cfg->data.vendor_id         = RSXX_VENDOR_ID_TMS_IBM;
+	cfg->data.cache_order       = (-1);
+	cfg->data.intr_coal.mode    = RSXX_INTR_COAL_DISABLED;
+	cfg->data.intr_coal.count   = 0;
+	cfg->data.intr_coal.latency = 0;
+}
+
+static u32 config_data_crc32(struct rsxx_card_cfg *cfg)
+{
+	/*
+	 * Return the compliment of the CRC to ensure compatibility
+	 * (i.e. this is how early rsxx drivers did it.)
+	 */
+
+	return ~crc32(~0, &cfg->data, sizeof(cfg->data));
+}
+
+
+/*----------------- Config Byte Swap Functions -------------------*/
+static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr)
+{
+	hdr->version = be32_to_cpu((__force __be32) hdr->version);
+	hdr->crc     = be32_to_cpu((__force __be32) hdr->crc);
+}
+
+static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr)
+{
+	hdr->version = (__force u32) cpu_to_be32(hdr->version);
+	hdr->crc     = (__force u32) cpu_to_be32(hdr->crc);
+}
+
+static void config_data_swab(struct rsxx_card_cfg *cfg)
+{
+	u32 *data = (u32 *) &cfg->data;
+	int i;
+
+	for (i = 0; i < (sizeof(cfg->data) / 4); i++)
+		data[i] = swab32(data[i]);
+}
+
+static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg)
+{
+	u32 *data = (u32 *) &cfg->data;
+	int i;
+
+	for (i = 0; i < (sizeof(cfg->data) / 4); i++)
+		data[i] = le32_to_cpu((__force __le32) data[i]);
+}
+
+static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg)
+{
+	u32 *data = (u32 *) &cfg->data;
+	int i;
+
+	for (i = 0; i < (sizeof(cfg->data) / 4); i++)
+		data[i] = (__force u32) cpu_to_le32(data[i]);
+}
+
+
+/*----------------- Config Operations ------------------*/
+static int rsxx_save_config(struct rsxx_cardinfo *card)
+{
+	struct rsxx_card_cfg cfg;
+	int st;
+
+	memcpy(&cfg, &card->config, sizeof(cfg));
+
+	if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) {
+		dev_err(CARD_TO_DEV(card),
+			"Cannot save config with invalid version %d\n",
+			cfg.hdr.version);
+		return -EINVAL;
+	}
+
+	/* Convert data to little endian for the CRC calculation. */
+	config_data_cpu_to_le(&cfg);
+
+	cfg.hdr.crc = config_data_crc32(&cfg);
+
+	/*
+	 * Swap the data from little endian to big endian so it can be
+	 * stored.
+	 */
+	config_data_swab(&cfg);
+	config_hdr_cpu_to_be(&cfg.hdr);
+
+	st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1);
+	if (st)
+		return st;
+
+	return 0;
+}
+
+int rsxx_load_config(struct rsxx_cardinfo *card)
+{
+	int st;
+	u32 crc;
+
+	st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config),
+				&card->config, 1);
+	if (st) {
+		dev_err(CARD_TO_DEV(card),
+			"Failed reading card config.\n");
+		return st;
+	}
+
+	config_hdr_be_to_cpu(&card->config.hdr);
+
+	if (card->config.hdr.version == RSXX_CFG_VERSION) {
+		/*
+		 * We calculate the CRC with the data in little endian, because
+		 * early drivers did not take big endian CPUs into account.
+		 * The data is always stored in big endian, so we need to byte
+		 * swap it before calculating the CRC.
+		 */
+
+		config_data_swab(&card->config);
+
+		/* Check the CRC */
+		crc = config_data_crc32(&card->config);
+		if (crc != card->config.hdr.crc) {
+			dev_err(CARD_TO_DEV(card),
+				"Config corruption detected!\n");
+			dev_info(CARD_TO_DEV(card),
+				"CRC (sb x%08x is x%08x)\n",
+				card->config.hdr.crc, crc);
+			return -EIO;
+		}
+
+		/* Convert the data to CPU byteorder */
+		config_data_le_to_cpu(&card->config);
+
+	} else if (card->config.hdr.version != 0) {
+		dev_err(CARD_TO_DEV(card),
+			"Invalid config version %d.\n",
+			card->config.hdr.version);
+		/*
+		 * Config version changes require special handling from the
+		 * user
+		 */
+		return -EINVAL;
+	} else {
+		dev_info(CARD_TO_DEV(card),
+			"Initializing card configuration.\n");
+		initialize_config(card);
+		st = rsxx_save_config(card);
+		if (st)
+			return st;
+	}
+
+	card->config_valid = 1;
+
+	dev_dbg(CARD_TO_DEV(card), "version:     x%08x\n",
+		card->config.hdr.version);
+	dev_dbg(CARD_TO_DEV(card), "crc:         x%08x\n",
+		card->config.hdr.crc);
+	dev_dbg(CARD_TO_DEV(card), "block_size:  x%08x\n",
+		card->config.data.block_size);
+	dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n",
+		card->config.data.stripe_size);
+	dev_dbg(CARD_TO_DEV(card), "vendor_id:   x%08x\n",
+		card->config.data.vendor_id);
+	dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n",
+		card->config.data.cache_order);
+	dev_dbg(CARD_TO_DEV(card), "mode:        x%08x\n",
+		card->config.data.intr_coal.mode);
+	dev_dbg(CARD_TO_DEV(card), "count:       x%08x\n",
+		card->config.data.intr_coal.count);
+	dev_dbg(CARD_TO_DEV(card), "latency:     x%08x\n",
+		 card->config.data.intr_coal.latency);
+
+	return 0;
+}
+
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
new file mode 100644
index 000000000000..e5162487686a
--- /dev/null
+++ b/drivers/block/rsxx/core.c
@@ -0,0 +1,649 @@
+/*
+* Filename: core.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+
+#include <linux/genhd.h>
+#include <linux/idr.h>
+
+#include "rsxx_priv.h"
+#include "rsxx_cfg.h"
+
+#define NO_LEGACY 0
+
+MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver");
+MODULE_AUTHOR("IBM <support@ramsan.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+static unsigned int force_legacy = NO_LEGACY;
+module_param(force_legacy, uint, 0444);
+MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
+
+static DEFINE_IDA(rsxx_disk_ida);
+static DEFINE_SPINLOCK(rsxx_ida_lock);
+
+/*----------------- Interrupt Control & Handling -------------------*/
+static void __enable_intr(unsigned int *mask, unsigned int intr)
+{
+	*mask |= intr;
+}
+
+static void __disable_intr(unsigned int *mask, unsigned int intr)
+{
+	*mask &= ~intr;
+}
+
+/*
+ * NOTE: Disabling the IER will disable the hardware interrupt.
+ * Disabling the ISR will disable the software handling of the ISR bit.
+ *
+ * Enable/Disable interrupt functions assume the card->irq_lock
+ * is held by the caller.
+ */
+void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
+{
+	if (unlikely(card->halt))
+		return;
+
+	__enable_intr(&card->ier_mask, intr);
+	iowrite32(card->ier_mask, card->regmap + IER);
+}
+
+void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
+{
+	__disable_intr(&card->ier_mask, intr);
+	iowrite32(card->ier_mask, card->regmap + IER);
+}
+
+void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
+				 unsigned int intr)
+{
+	if (unlikely(card->halt))
+		return;
+
+	__enable_intr(&card->isr_mask, intr);
+	__enable_intr(&card->ier_mask, intr);
+	iowrite32(card->ier_mask, card->regmap + IER);
+}
+void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
+				  unsigned int intr)
+{
+	__disable_intr(&card->isr_mask, intr);
+	__disable_intr(&card->ier_mask, intr);
+	iowrite32(card->ier_mask, card->regmap + IER);
+}
+
+static irqreturn_t rsxx_isr(int irq, void *pdata)
+{
+	struct rsxx_cardinfo *card = pdata;
+	unsigned int isr;
+	int handled = 0;
+	int reread_isr;
+	int i;
+
+	spin_lock(&card->irq_lock);
+
+	do {
+		reread_isr = 0;
+
+		isr = ioread32(card->regmap + ISR);
+		if (isr == 0xffffffff) {
+			/*
+			 * A few systems seem to have an intermittent issue
+			 * where PCI reads return all Fs, but retrying the read
+			 * a little later will return as expected.
+			 */
+			dev_info(CARD_TO_DEV(card),
+				"ISR = 0xFFFFFFFF, retrying later\n");
+			break;
+		}
+
+		isr &= card->isr_mask;
+		if (!isr)
+			break;
+
+		for (i = 0; i < card->n_targets; i++) {
+			if (isr & CR_INTR_DMA(i)) {
+				if (card->ier_mask & CR_INTR_DMA(i)) {
+					rsxx_disable_ier(card, CR_INTR_DMA(i));
+					reread_isr = 1;
+				}
+				queue_work(card->ctrl[i].done_wq,
+					   &card->ctrl[i].dma_done_work);
+				handled++;
+			}
+		}
+
+		if (isr & CR_INTR_CREG) {
+			schedule_work(&card->creg_ctrl.done_work);
+			handled++;
+		}
+
+		if (isr & CR_INTR_EVENT) {
+			schedule_work(&card->event_work);
+			rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
+			handled++;
+		}
+	} while (reread_isr);
+
+	spin_unlock(&card->irq_lock);
+
+	return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*----------------- Card Event Handler -------------------*/
+static char *rsxx_card_state_to_str(unsigned int state)
+{
+	static char *state_strings[] = {
+		"Unknown", "Shutdown", "Starting", "Formatting",
+		"Uninitialized", "Good", "Shutting Down",
+		"Fault", "Read Only Fault", "dStroying"
+	};
+
+	return state_strings[ffs(state)];
+}
+
+static void card_state_change(struct rsxx_cardinfo *card,
+			      unsigned int new_state)
+{
+	int st;
+
+	dev_info(CARD_TO_DEV(card),
+		"card state change detected.(%s -> %s)\n",
+		rsxx_card_state_to_str(card->state),
+		rsxx_card_state_to_str(new_state));
+
+	card->state = new_state;
+
+	/* Don't attach DMA interfaces if the card has an invalid config */
+	if (!card->config_valid)
+		return;
+
+	switch (new_state) {
+	case CARD_STATE_RD_ONLY_FAULT:
+		dev_crit(CARD_TO_DEV(card),
+			"Hardware has entered read-only mode!\n");
+		/*
+		 * Fall through so the DMA devices can be attached and
+		 * the user can attempt to pull off their data.
+		 */
+	case CARD_STATE_GOOD:
+		st = rsxx_get_card_size8(card, &card->size8);
+		if (st)
+			dev_err(CARD_TO_DEV(card),
+				"Failed attaching DMA devices\n");
+
+		if (card->config_valid)
+			set_capacity(card->gendisk, card->size8 >> 9);
+		break;
+
+	case CARD_STATE_FAULT:
+		dev_crit(CARD_TO_DEV(card),
+			"Hardware Fault reported!\n");
+		/* Fall through. */
+
+	/* Everything else, detach DMA interface if it's attached. */
+	case CARD_STATE_SHUTDOWN:
+	case CARD_STATE_STARTING:
+	case CARD_STATE_FORMATTING:
+	case CARD_STATE_UNINITIALIZED:
+	case CARD_STATE_SHUTTING_DOWN:
+	/*
+	 * dStroy is a term coined by marketing to represent the low level
+	 * secure erase.
+	 */
+	case CARD_STATE_DSTROYING:
+		set_capacity(card->gendisk, 0);
+		break;
+	}
+}
+
+static void card_event_handler(struct work_struct *work)
+{
+	struct rsxx_cardinfo *card;
+	unsigned int state;
+	unsigned long flags;
+	int st;
+
+	card = container_of(work, struct rsxx_cardinfo, event_work);
+
+	if (unlikely(card->halt))
+		return;
+
+	/*
+	 * Enable the interrupt now to avoid any weird race conditions where a
+	 * state change might occur while rsxx_get_card_state() is
+	 * processing a returned creg cmd.
+	 */
+	spin_lock_irqsave(&card->irq_lock, flags);
+	rsxx_enable_ier_and_isr(card, CR_INTR_EVENT);
+	spin_unlock_irqrestore(&card->irq_lock, flags);
+
+	st = rsxx_get_card_state(card, &state);
+	if (st) {
+		dev_info(CARD_TO_DEV(card),
+			"Failed reading state after event.\n");
+		return;
+	}
+
+	if (card->state != state)
+		card_state_change(card, state);
+
+	if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING)
+		rsxx_read_hw_log(card);
+}
+
+/*----------------- Card Operations -------------------*/
+static int card_shutdown(struct rsxx_cardinfo *card)
+{
+	unsigned int state;
+	signed long start;
+	const int timeout = msecs_to_jiffies(120000);
+	int st;
+
+	/* We can't issue a shutdown if the card is in a transition state */
+	start = jiffies;
+	do {
+		st = rsxx_get_card_state(card, &state);
+		if (st)
+			return st;
+	} while (state == CARD_STATE_STARTING &&
+		 (jiffies - start < timeout));
+
+	if (state == CARD_STATE_STARTING)
+		return -ETIMEDOUT;
+
+	/* Only issue a shutdown if we need to */
+	if ((state != CARD_STATE_SHUTTING_DOWN) &&
+	    (state != CARD_STATE_SHUTDOWN)) {
+		st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN);
+		if (st)
+			return st;
+	}
+
+	start = jiffies;
+	do {
+		st = rsxx_get_card_state(card, &state);
+		if (st)
+			return st;
+	} while (state != CARD_STATE_SHUTDOWN &&
+		 (jiffies - start < timeout));
+
+	if (state != CARD_STATE_SHUTDOWN)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/*----------------- Driver Initialization & Setup -------------------*/
+/* Returns:   0 if the driver is compatible with the device
+	     -1 if the driver is NOT compatible with the device */
+static int rsxx_compatibility_check(struct rsxx_cardinfo *card)
+{
+	unsigned char pci_rev;
+
+	pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev);
+
+	if (pci_rev > RS70_PCI_REV_SUPPORTED)
+		return -1;
+	return 0;
+}
+
+static int rsxx_pci_probe(struct pci_dev *dev,
+					const struct pci_device_id *id)
+{
+	struct rsxx_cardinfo *card;
+	int st;
+
+	dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
+
+	card = kzalloc(sizeof(*card), GFP_KERNEL);
+	if (!card)
+		return -ENOMEM;
+
+	card->dev = dev;
+	pci_set_drvdata(dev, card);
+
+	do {
+		if (!ida_pre_get(&rsxx_disk_ida, GFP_KERNEL)) {
+			st = -ENOMEM;
+			goto failed_ida_get;
+		}
+
+		spin_lock(&rsxx_ida_lock);
+		st = ida_get_new(&rsxx_disk_ida, &card->disk_id);
+		spin_unlock(&rsxx_ida_lock);
+	} while (st == -EAGAIN);
+
+	if (st)
+		goto failed_ida_get;
+
+	st = pci_enable_device(dev);
+	if (st)
+		goto failed_enable;
+
+	pci_set_master(dev);
+	pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE);
+
+	st = pci_set_dma_mask(dev, DMA_BIT_MASK(64));
+	if (st) {
+		dev_err(CARD_TO_DEV(card),
+			"No usable DMA configuration,aborting\n");
+		goto failed_dma_mask;
+	}
+
+	st = pci_request_regions(dev, DRIVER_NAME);
+	if (st) {
+		dev_err(CARD_TO_DEV(card),
+			"Failed to request memory region\n");
+		goto failed_request_regions;
+	}
+
+	if (pci_resource_len(dev, 0) == 0) {
+		dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n");
+		st = -ENOMEM;
+		goto failed_iomap;
+	}
+
+	card->regmap = pci_iomap(dev, 0, 0);
+	if (!card->regmap) {
+		dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n");
+		st = -ENOMEM;
+		goto failed_iomap;
+	}
+
+	spin_lock_init(&card->irq_lock);
+	card->halt = 0;
+
+	spin_lock_irq(&card->irq_lock);
+	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
+	spin_unlock_irq(&card->irq_lock);
+
+	if (!force_legacy) {
+		st = pci_enable_msi(dev);
+		if (st)
+			dev_warn(CARD_TO_DEV(card),
+				"Failed to enable MSI\n");
+	}
+
+	st = request_irq(dev->irq, rsxx_isr, IRQF_DISABLED | IRQF_SHARED,
+			 DRIVER_NAME, card);
+	if (st) {
+		dev_err(CARD_TO_DEV(card),
+			"Failed requesting IRQ%d\n", dev->irq);
+		goto failed_irq;
+	}
+
+	/************* Setup Processor Command Interface *************/
+	rsxx_creg_setup(card);
+
+	spin_lock_irq(&card->irq_lock);
+	rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
+	spin_unlock_irq(&card->irq_lock);
+
+	st = rsxx_compatibility_check(card);
+	if (st) {
+		dev_warn(CARD_TO_DEV(card),
+			"Incompatible driver detected. Please update the driver.\n");
+		st = -EINVAL;
+		goto failed_compatiblity_check;
+	}
+
+	/************* Load Card Config *************/
+	st = rsxx_load_config(card);
+	if (st)
+		dev_err(CARD_TO_DEV(card),
+			"Failed loading card config\n");
+
+	/************* Setup DMA Engine *************/
+	st = rsxx_get_num_targets(card, &card->n_targets);
+	if (st)
+		dev_info(CARD_TO_DEV(card),
+			"Failed reading the number of DMA targets\n");
+
+	card->ctrl = kzalloc(card->n_targets * sizeof(*card->ctrl), GFP_KERNEL);
+	if (!card->ctrl) {
+		st = -ENOMEM;
+		goto failed_dma_setup;
+	}
+
+	st = rsxx_dma_setup(card);
+	if (st) {
+		dev_info(CARD_TO_DEV(card),
+			"Failed to setup DMA engine\n");
+		goto failed_dma_setup;
+	}
+
+	/************* Setup Card Event Handler *************/
+	INIT_WORK(&card->event_work, card_event_handler);
+
+	st = rsxx_setup_dev(card);
+	if (st)
+		goto failed_create_dev;
+
+	rsxx_get_card_state(card, &card->state);
+
+	dev_info(CARD_TO_DEV(card),
+		"card state: %s\n",
+		rsxx_card_state_to_str(card->state));
+
+	/*
+	 * Now that the DMA Engine and devices have been setup,
+	 * we can enable the event interrupt(it kicks off actions in
+	 * those layers so we couldn't enable it right away.)
+	 */
+	spin_lock_irq(&card->irq_lock);
+	rsxx_enable_ier_and_isr(card, CR_INTR_EVENT);
+	spin_unlock_irq(&card->irq_lock);
+
+	if (card->state == CARD_STATE_SHUTDOWN) {
+		st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP);
+		if (st)
+			dev_crit(CARD_TO_DEV(card),
+				"Failed issuing card startup\n");
+	} else if (card->state == CARD_STATE_GOOD ||
+		   card->state == CARD_STATE_RD_ONLY_FAULT) {
+		st = rsxx_get_card_size8(card, &card->size8);
+		if (st)
+			card->size8 = 0;
+	}
+
+	rsxx_attach_dev(card);
+
+	return 0;
+
+failed_create_dev:
+	rsxx_dma_destroy(card);
+failed_dma_setup:
+failed_compatiblity_check:
+	spin_lock_irq(&card->irq_lock);
+	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
+	spin_unlock_irq(&card->irq_lock);
+	free_irq(dev->irq, card);
+	if (!force_legacy)
+		pci_disable_msi(dev);
+failed_irq:
+	pci_iounmap(dev, card->regmap);
+failed_iomap:
+	pci_release_regions(dev);
+failed_request_regions:
+failed_dma_mask:
+	pci_disable_device(dev);
+failed_enable:
+	spin_lock(&rsxx_ida_lock);
+	ida_remove(&rsxx_disk_ida, card->disk_id);
+	spin_unlock(&rsxx_ida_lock);
+failed_ida_get:
+	kfree(card);
+
+	return st;
+}
+
+static void rsxx_pci_remove(struct pci_dev *dev)
+{
+	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+	unsigned long flags;
+	int st;
+	int i;
+
+	if (!card)
+		return;
+
+	dev_info(CARD_TO_DEV(card),
+		"Removing PCI-Flash SSD.\n");
+
+	rsxx_detach_dev(card);
+
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_irqsave(&card->irq_lock, flags);
+		rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i));
+		spin_unlock_irqrestore(&card->irq_lock, flags);
+	}
+
+	st = card_shutdown(card);
+	if (st)
+		dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n");
+
+	/* Sync outstanding event handlers. */
+	spin_lock_irqsave(&card->irq_lock, flags);
+	rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
+	spin_unlock_irqrestore(&card->irq_lock, flags);
+
+	/* Prevent work_structs from re-queuing themselves. */
+	card->halt = 1;
+
+	cancel_work_sync(&card->event_work);
+
+	rsxx_destroy_dev(card);
+	rsxx_dma_destroy(card);
+
+	spin_lock_irqsave(&card->irq_lock, flags);
+	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
+	spin_unlock_irqrestore(&card->irq_lock, flags);
+	free_irq(dev->irq, card);
+
+	if (!force_legacy)
+		pci_disable_msi(dev);
+
+	rsxx_creg_destroy(card);
+
+	pci_iounmap(dev, card->regmap);
+
+	pci_disable_device(dev);
+	pci_release_regions(dev);
+
+	kfree(card);
+}
+
+static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	/* We don't support suspend at this time. */
+	return -ENOSYS;
+}
+
+static void rsxx_pci_shutdown(struct pci_dev *dev)
+{
+	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+	unsigned long flags;
+	int i;
+
+	if (!card)
+		return;
+
+	dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n");
+
+	rsxx_detach_dev(card);
+
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_irqsave(&card->irq_lock, flags);
+		rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i));
+		spin_unlock_irqrestore(&card->irq_lock, flags);
+	}
+
+	card_shutdown(card);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = {
+	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)},
+	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)},
+	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)},
+	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)},
+	{0,},
+};
+
+MODULE_DEVICE_TABLE(pci, rsxx_pci_ids);
+
+static struct pci_driver rsxx_pci_driver = {
+	.name		= DRIVER_NAME,
+	.id_table	= rsxx_pci_ids,
+	.probe		= rsxx_pci_probe,
+	.remove		= rsxx_pci_remove,
+	.suspend	= rsxx_pci_suspend,
+	.shutdown	= rsxx_pci_shutdown,
+};
+
+static int __init rsxx_core_init(void)
+{
+	int st;
+
+	st = rsxx_dev_init();
+	if (st)
+		return st;
+
+	st = rsxx_dma_init();
+	if (st)
+		goto dma_init_failed;
+
+	st = rsxx_creg_init();
+	if (st)
+		goto creg_init_failed;
+
+	return pci_register_driver(&rsxx_pci_driver);
+
+creg_init_failed:
+	rsxx_dma_cleanup();
+dma_init_failed:
+	rsxx_dev_cleanup();
+
+	return st;
+}
+
+static void __exit rsxx_core_cleanup(void)
+{
+	pci_unregister_driver(&rsxx_pci_driver);
+	rsxx_creg_cleanup();
+	rsxx_dma_cleanup();
+	rsxx_dev_cleanup();
+}
+
+module_init(rsxx_core_init);
+module_exit(rsxx_core_cleanup);
diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
new file mode 100644
index 000000000000..80bbe639fccd
--- /dev/null
+++ b/drivers/block/rsxx/cregs.c
@@ -0,0 +1,758 @@
+/*
+* Filename: cregs.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/completion.h>
+#include <linux/slab.h>
+
+#include "rsxx_priv.h"
+
+#define CREG_TIMEOUT_MSEC	10000
+
+typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card,
+			    struct creg_cmd *cmd,
+			    int st);
+
+struct creg_cmd {
+	struct list_head list;
+	creg_cmd_cb cb;
+	void *cb_private;
+	unsigned int op;
+	unsigned int addr;
+	int cnt8;
+	void *buf;
+	unsigned int stream;
+	unsigned int status;
+};
+
+static struct kmem_cache *creg_cmd_pool;
+
+
+/*------------ Private Functions --------------*/
+
+#if defined(__LITTLE_ENDIAN)
+#define LITTLE_ENDIAN 1
+#elif defined(__BIG_ENDIAN)
+#define LITTLE_ENDIAN 0
+#else
+#error Unknown endianess!!! Aborting...
+#endif
+
+static void copy_to_creg_data(struct rsxx_cardinfo *card,
+			      int cnt8,
+			      void *buf,
+			      unsigned int stream)
+{
+	int i = 0;
+	u32 *data = buf;
+
+	for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
+		/*
+		 * Firmware implementation makes it necessary to byte swap on
+		 * little endian processors.
+		 */
+		if (LITTLE_ENDIAN && stream)
+			iowrite32be(data[i], card->regmap + CREG_DATA(i));
+		else
+			iowrite32(data[i], card->regmap + CREG_DATA(i));
+	}
+}
+
+
+static void copy_from_creg_data(struct rsxx_cardinfo *card,
+				int cnt8,
+				void *buf,
+				unsigned int stream)
+{
+	int i = 0;
+	u32 *data = buf;
+
+	for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
+		/*
+		 * Firmware implementation makes it necessary to byte swap on
+		 * little endian processors.
+		 */
+		if (LITTLE_ENDIAN && stream)
+			data[i] = ioread32be(card->regmap + CREG_DATA(i));
+		else
+			data[i] = ioread32(card->regmap + CREG_DATA(i));
+	}
+}
+
+static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card)
+{
+	struct creg_cmd *cmd;
+
+	/*
+	 * Spin lock is needed because this can be called in atomic/interrupt
+	 * context.
+	 */
+	spin_lock_bh(&card->creg_ctrl.lock);
+	cmd = card->creg_ctrl.active_cmd;
+	card->creg_ctrl.active_cmd = NULL;
+	spin_unlock_bh(&card->creg_ctrl.lock);
+
+	return cmd;
+}
+
+static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)
+{
+	iowrite32(cmd->addr, card->regmap + CREG_ADD);
+	iowrite32(cmd->cnt8, card->regmap + CREG_CNT);
+
+	if (cmd->op == CREG_OP_WRITE) {
+		if (cmd->buf)
+			copy_to_creg_data(card, cmd->cnt8,
+					  cmd->buf, cmd->stream);
+	}
+
+	/*
+	 * Data copy must complete before initiating the command. This is
+	 * needed for weakly ordered processors (i.e. PowerPC), so that all
+	 * neccessary registers are written before we kick the hardware.
+	 */
+	wmb();
+
+	/* Setting the valid bit will kick off the command. */
+	iowrite32(cmd->op, card->regmap + CREG_CMD);
+}
+
+static void creg_kick_queue(struct rsxx_cardinfo *card)
+{
+	if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue))
+		return;
+
+	card->creg_ctrl.active = 1;
+	card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue,
+						      struct creg_cmd, list);
+	list_del(&card->creg_ctrl.active_cmd->list);
+	card->creg_ctrl.q_depth--;
+
+	/*
+	 * We have to set the timer before we push the new command. Otherwise,
+	 * we could create a race condition that would occur if the timer
+	 * was not canceled, and expired after the new command was pushed,
+	 * but before the command was issued to hardware.
+	 */
+	mod_timer(&card->creg_ctrl.cmd_timer,
+				jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC));
+
+	creg_issue_cmd(card, card->creg_ctrl.active_cmd);
+}
+
+static int creg_queue_cmd(struct rsxx_cardinfo *card,
+			  unsigned int op,
+			  unsigned int addr,
+			  unsigned int cnt8,
+			  void *buf,
+			  int stream,
+			  creg_cmd_cb callback,
+			  void *cb_private)
+{
+	struct creg_cmd *cmd;
+
+	/* Don't queue stuff up if we're halted. */
+	if (unlikely(card->halt))
+		return -EINVAL;
+
+	if (card->creg_ctrl.reset)
+		return -EAGAIN;
+
+	if (cnt8 > MAX_CREG_DATA8)
+		return -EINVAL;
+
+	cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&cmd->list);
+
+	cmd->op		= op;
+	cmd->addr	= addr;
+	cmd->cnt8	= cnt8;
+	cmd->buf	= buf;
+	cmd->stream	= stream;
+	cmd->cb		= callback;
+	cmd->cb_private = cb_private;
+	cmd->status	= 0;
+
+	spin_lock(&card->creg_ctrl.lock);
+	list_add_tail(&cmd->list, &card->creg_ctrl.queue);
+	card->creg_ctrl.q_depth++;
+	creg_kick_queue(card);
+	spin_unlock(&card->creg_ctrl.lock);
+
+	return 0;
+}
+
+static void creg_cmd_timed_out(unsigned long data)
+{
+	struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data;
+	struct creg_cmd *cmd;
+
+	cmd = pop_active_cmd(card);
+	if (cmd == NULL) {
+		card->creg_ctrl.creg_stats.creg_timeout++;
+		dev_warn(CARD_TO_DEV(card),
+			"No active command associated with timeout!\n");
+		return;
+	}
+
+	if (cmd->cb)
+		cmd->cb(card, cmd, -ETIMEDOUT);
+
+	kmem_cache_free(creg_cmd_pool, cmd);
+
+
+	spin_lock(&card->creg_ctrl.lock);
+	card->creg_ctrl.active = 0;
+	creg_kick_queue(card);
+	spin_unlock(&card->creg_ctrl.lock);
+}
+
+
+static void creg_cmd_done(struct work_struct *work)
+{
+	struct rsxx_cardinfo *card;
+	struct creg_cmd *cmd;
+	int st = 0;
+
+	card = container_of(work, struct rsxx_cardinfo,
+			    creg_ctrl.done_work);
+
+	/*
+	 * The timer could not be cancelled for some reason,
+	 * race to pop the active command.
+	 */
+	if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0)
+		card->creg_ctrl.creg_stats.failed_cancel_timer++;
+
+	cmd = pop_active_cmd(card);
+	if (cmd == NULL) {
+		dev_err(CARD_TO_DEV(card),
+			"Spurious creg interrupt!\n");
+		return;
+	}
+
+	card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT);
+	cmd->status = card->creg_ctrl.creg_stats.stat;
+	if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) {
+		dev_err(CARD_TO_DEV(card),
+			"Invalid status on creg command\n");
+		/*
+		 * At this point we're probably reading garbage from HW. Don't
+		 * do anything else that could mess up the system and let
+		 * the sync function return an error.
+		 */
+		st = -EIO;
+		goto creg_done;
+	} else if (cmd->status & CREG_STAT_ERROR) {
+		st = -EIO;
+	}
+
+	if ((cmd->op == CREG_OP_READ)) {
+		unsigned int cnt8 = ioread32(card->regmap + CREG_CNT);
+
+		/* Paranoid Sanity Checks */
+		if (!cmd->buf) {
+			dev_err(CARD_TO_DEV(card),
+				"Buffer not given for read.\n");
+			st = -EIO;
+			goto creg_done;
+		}
+		if (cnt8 != cmd->cnt8) {
+			dev_err(CARD_TO_DEV(card),
+				"count mismatch\n");
+			st = -EIO;
+			goto creg_done;
+		}
+
+		copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
+	}
+
+creg_done:
+	if (cmd->cb)
+		cmd->cb(card, cmd, st);
+
+	kmem_cache_free(creg_cmd_pool, cmd);
+
+	spin_lock(&card->creg_ctrl.lock);
+	card->creg_ctrl.active = 0;
+	creg_kick_queue(card);
+	spin_unlock(&card->creg_ctrl.lock);
+}
+
+static void creg_reset(struct rsxx_cardinfo *card)
+{
+	struct creg_cmd *cmd = NULL;
+	struct creg_cmd *tmp;
+	unsigned long flags;
+
+	/*
+	 * mutex_trylock is used here because if reset_lock is taken then a
+	 * reset is already happening. So, we can just go ahead and return.
+	 */
+	if (!mutex_trylock(&card->creg_ctrl.reset_lock))
+		return;
+
+	card->creg_ctrl.reset = 1;
+	spin_lock_irqsave(&card->irq_lock, flags);
+	rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT);
+	spin_unlock_irqrestore(&card->irq_lock, flags);
+
+	dev_warn(CARD_TO_DEV(card),
+		"Resetting creg interface for recovery\n");
+
+	/* Cancel outstanding commands */
+	spin_lock(&card->creg_ctrl.lock);
+	list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
+		list_del(&cmd->list);
+		card->creg_ctrl.q_depth--;
+		if (cmd->cb)
+			cmd->cb(card, cmd, -ECANCELED);
+		kmem_cache_free(creg_cmd_pool, cmd);
+	}
+
+	cmd = card->creg_ctrl.active_cmd;
+	card->creg_ctrl.active_cmd = NULL;
+	if (cmd) {
+		if (timer_pending(&card->creg_ctrl.cmd_timer))
+			del_timer_sync(&card->creg_ctrl.cmd_timer);
+
+		if (cmd->cb)
+			cmd->cb(card, cmd, -ECANCELED);
+		kmem_cache_free(creg_cmd_pool, cmd);
+
+		card->creg_ctrl.active = 0;
+	}
+	spin_unlock(&card->creg_ctrl.lock);
+
+	card->creg_ctrl.reset = 0;
+	spin_lock_irqsave(&card->irq_lock, flags);
+	rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT);
+	spin_unlock_irqrestore(&card->irq_lock, flags);
+
+	mutex_unlock(&card->creg_ctrl.reset_lock);
+}
+
+/* Used for synchronous accesses */
+struct creg_completion {
+	struct completion	*cmd_done;
+	int			st;
+	u32			creg_status;
+};
+
+static void creg_cmd_done_cb(struct rsxx_cardinfo *card,
+			     struct creg_cmd *cmd,
+			     int st)
+{
+	struct creg_completion *cmd_completion;
+
+	cmd_completion = cmd->cb_private;
+	BUG_ON(!cmd_completion);
+
+	cmd_completion->st = st;
+	cmd_completion->creg_status = cmd->status;
+	complete(cmd_completion->cmd_done);
+}
+
+static int __issue_creg_rw(struct rsxx_cardinfo *card,
+			   unsigned int op,
+			   unsigned int addr,
+			   unsigned int cnt8,
+			   void *buf,
+			   int stream,
+			   unsigned int *hw_stat)
+{
+	DECLARE_COMPLETION_ONSTACK(cmd_done);
+	struct creg_completion completion;
+	unsigned long timeout;
+	int st;
+
+	completion.cmd_done = &cmd_done;
+	completion.st = 0;
+	completion.creg_status = 0;
+
+	st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb,
+			    &completion);
+	if (st)
+		return st;
+
+	/*
+	 * This timeout is neccessary for unresponsive hardware. The additional
+	 * 20 seconds to used to guarantee that each cregs requests has time to
+	 * complete.
+	 */
+	timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC *
+				card->creg_ctrl.q_depth) + 20000);
+
+	/*
+	 * The creg interface is guaranteed to complete. It has a timeout
+	 * mechanism that will kick in if hardware does not respond.
+	 */
+	st = wait_for_completion_timeout(completion.cmd_done, timeout);
+	if (st == 0) {
+		/*
+		 * This is really bad, because the kernel timer did not
+		 * expire and notify us of a timeout!
+		 */
+		dev_crit(CARD_TO_DEV(card),
+			"cregs timer failed\n");
+		creg_reset(card);
+		return -EIO;
+	}
+
+	*hw_stat = completion.creg_status;
+
+	if (completion.st) {
+		dev_warn(CARD_TO_DEV(card),
+			"creg command failed(%d x%08x)\n",
+			completion.st, addr);
+		return completion.st;
+	}
+
+	return 0;
+}
+
+static int issue_creg_rw(struct rsxx_cardinfo *card,
+			 u32 addr,
+			 unsigned int size8,
+			 void *data,
+			 int stream,
+			 int read)
+{
+	unsigned int hw_stat;
+	unsigned int xfer;
+	unsigned int op;
+	int st;
+
+	op = read ? CREG_OP_READ : CREG_OP_WRITE;
+
+	do {
+		xfer = min_t(unsigned int, size8, MAX_CREG_DATA8);
+
+		st = __issue_creg_rw(card, op, addr, xfer,
+				     data, stream, &hw_stat);
+		if (st)
+			return st;
+
+		data   = (char *)data + xfer;
+		addr  += xfer;
+		size8 -= xfer;
+	} while (size8);
+
+	return 0;
+}
+
+/* ---------------------------- Public API ---------------------------------- */
+int rsxx_creg_write(struct rsxx_cardinfo *card,
+			u32 addr,
+			unsigned int size8,
+			void *data,
+			int byte_stream)
+{
+	return issue_creg_rw(card, addr, size8, data, byte_stream, 0);
+}
+
+int rsxx_creg_read(struct rsxx_cardinfo *card,
+		       u32 addr,
+		       unsigned int size8,
+		       void *data,
+		       int byte_stream)
+{
+	return issue_creg_rw(card, addr, size8, data, byte_stream, 1);
+}
+
+int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state)
+{
+	return rsxx_creg_read(card, CREG_ADD_CARD_STATE,
+				  sizeof(*state), state, 0);
+}
+
+int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8)
+{
+	unsigned int size;
+	int st;
+
+	st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE,
+				sizeof(size), &size, 0);
+	if (st)
+		return st;
+
+	*size8 = (u64)size * RSXX_HW_BLK_SIZE;
+	return 0;
+}
+
+int rsxx_get_num_targets(struct rsxx_cardinfo *card,
+			     unsigned int *n_targets)
+{
+	return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS,
+				  sizeof(*n_targets), n_targets, 0);
+}
+
+int rsxx_get_card_capabilities(struct rsxx_cardinfo *card,
+				   u32 *capabilities)
+{
+	return rsxx_creg_read(card, CREG_ADD_CAPABILITIES,
+				  sizeof(*capabilities), capabilities, 0);
+}
+
+int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd)
+{
+	return rsxx_creg_write(card, CREG_ADD_CARD_CMD,
+				   sizeof(cmd), &cmd, 0);
+}
+
+
+/*----------------- HW Log Functions -------------------*/
+static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len)
+{
+	static char level;
+
+	/*
+	 * New messages start with "<#>", where # is the log level. Messages
+	 * that extend past the log buffer will use the previous level
+	 */
+	if ((len > 3) && (str[0] == '<') && (str[2] == '>')) {
+		level = str[1];
+		str += 3; /* Skip past the log level. */
+		len -= 3;
+	}
+
+	switch (level) {
+	case '0':
+		dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str);
+		break;
+	case '1':
+		dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str);
+		break;
+	case '2':
+		dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str);
+		break;
+	case '3':
+		dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str);
+		break;
+	case '4':
+		dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str);
+		break;
+	case '5':
+		dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str);
+		break;
+	case '6':
+		dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str);
+		break;
+	case '7':
+		dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str);
+		break;
+	default:
+		dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str);
+		break;
+	}
+}
+
+/*
+ * The substrncpy function copies the src string (which includes the
+ * terminating '\0' character), up to the count into the dest pointer.
+ * Returns the number of bytes copied to dest.
+ */
+static int substrncpy(char *dest, const char *src, int count)
+{
+	int max_cnt = count;
+
+	while (count) {
+		count--;
+		*dest = *src;
+		if (*dest == '\0')
+			break;
+		src++;
+		dest++;
+	}
+	return max_cnt - count;
+}
+
+
+static void read_hw_log_done(struct rsxx_cardinfo *card,
+			     struct creg_cmd *cmd,
+			     int st)
+{
+	char *buf;
+	char *log_str;
+	int cnt;
+	int len;
+	int off;
+
+	buf = cmd->buf;
+	off = 0;
+
+	/* Failed getting the log message */
+	if (st)
+		return;
+
+	while (off < cmd->cnt8) {
+		log_str = &card->log.buf[card->log.buf_len];
+		cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len);
+		len = substrncpy(log_str, &buf[off], cnt);
+
+		off += len;
+		card->log.buf_len += len;
+
+		/*
+		 * Flush the log if we've hit the end of a message or if we've
+		 * run out of buffer space.
+		 */
+		if ((log_str[len - 1] == '\0')  ||
+		    (card->log.buf_len == LOG_BUF_SIZE8)) {
+			if (card->log.buf_len != 1) /* Don't log blank lines. */
+				hw_log_msg(card, card->log.buf,
+					   card->log.buf_len);
+			card->log.buf_len = 0;
+		}
+
+	}
+
+	if (cmd->status & CREG_STAT_LOG_PENDING)
+		rsxx_read_hw_log(card);
+}
+
+int rsxx_read_hw_log(struct rsxx_cardinfo *card)
+{
+	int st;
+
+	st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG,
+			    sizeof(card->log.tmp), card->log.tmp,
+			    1, read_hw_log_done, NULL);
+	if (st)
+		dev_err(CARD_TO_DEV(card),
+			"Failed getting log text\n");
+
+	return st;
+}
+
+/*-------------- IOCTL REG Access ------------------*/
+static int issue_reg_cmd(struct rsxx_cardinfo *card,
+			 struct rsxx_reg_access *cmd,
+			 int read)
+{
+	unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE;
+
+	return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data,
+			       cmd->stream, &cmd->stat);
+}
+
+int rsxx_reg_access(struct rsxx_cardinfo *card,
+			struct rsxx_reg_access __user *ucmd,
+			int read)
+{
+	struct rsxx_reg_access cmd;
+	int st;
+
+	st = copy_from_user(&cmd, ucmd, sizeof(cmd));
+	if (st)
+		return -EFAULT;
+
+	if (cmd.cnt > RSXX_MAX_REG_CNT)
+		return -EFAULT;
+
+	st = issue_reg_cmd(card, &cmd, read);
+	if (st)
+		return st;
+
+	st = put_user(cmd.stat, &ucmd->stat);
+	if (st)
+		return -EFAULT;
+
+	if (read) {
+		st = copy_to_user(ucmd->data, cmd.data, cmd.cnt);
+		if (st)
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*------------ Initialization & Setup --------------*/
+int rsxx_creg_setup(struct rsxx_cardinfo *card)
+{
+	card->creg_ctrl.active_cmd = NULL;
+
+	INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
+	mutex_init(&card->creg_ctrl.reset_lock);
+	INIT_LIST_HEAD(&card->creg_ctrl.queue);
+	spin_lock_init(&card->creg_ctrl.lock);
+	setup_timer(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out,
+		    (unsigned long) card);
+
+	return 0;
+}
+
+void rsxx_creg_destroy(struct rsxx_cardinfo *card)
+{
+	struct creg_cmd *cmd;
+	struct creg_cmd *tmp;
+	int cnt = 0;
+
+	/* Cancel outstanding commands */
+	spin_lock(&card->creg_ctrl.lock);
+	list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
+		list_del(&cmd->list);
+		if (cmd->cb)
+			cmd->cb(card, cmd, -ECANCELED);
+		kmem_cache_free(creg_cmd_pool, cmd);
+		cnt++;
+	}
+
+	if (cnt)
+		dev_info(CARD_TO_DEV(card),
+			"Canceled %d queue creg commands\n", cnt);
+
+	cmd = card->creg_ctrl.active_cmd;
+	card->creg_ctrl.active_cmd = NULL;
+	if (cmd) {
+		if (timer_pending(&card->creg_ctrl.cmd_timer))
+			del_timer_sync(&card->creg_ctrl.cmd_timer);
+
+		if (cmd->cb)
+			cmd->cb(card, cmd, -ECANCELED);
+		dev_info(CARD_TO_DEV(card),
+			"Canceled active creg command\n");
+		kmem_cache_free(creg_cmd_pool, cmd);
+	}
+	spin_unlock(&card->creg_ctrl.lock);
+
+	cancel_work_sync(&card->creg_ctrl.done_work);
+}
+
+
+int rsxx_creg_init(void)
+{
+	creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN);
+	if (!creg_cmd_pool)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void rsxx_creg_cleanup(void)
+{
+	kmem_cache_destroy(creg_cmd_pool);
+}
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
new file mode 100644
index 000000000000..4346d17d2949
--- /dev/null
+++ b/drivers/block/rsxx/dev.c
@@ -0,0 +1,367 @@
+/*
+* Filename: dev.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include <linux/hdreg.h>
+#include <linux/genhd.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+
+#include <linux/fs.h>
+
+#include "rsxx_priv.h"
+
+static unsigned int blkdev_minors = 64;
+module_param(blkdev_minors, uint, 0444);
+MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)");
+
+/*
+ * For now I'm making this tweakable in case any applications hit this limit.
+ * If you see a "bio too big" error in the log you will need to raise this
+ * value.
+ */
+static unsigned int blkdev_max_hw_sectors = 1024;
+module_param(blkdev_max_hw_sectors, uint, 0444);
+MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO");
+
+static unsigned int enable_blkdev = 1;
+module_param(enable_blkdev , uint, 0444);
+MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces");
+
+
+struct rsxx_bio_meta {
+	struct bio	*bio;
+	atomic_t	pending_dmas;
+	atomic_t	error;
+	unsigned long	start_time;
+};
+
+static struct kmem_cache *bio_meta_pool;
+
+/*----------------- Block Device Operations -----------------*/
+static int rsxx_blkdev_ioctl(struct block_device *bdev,
+				 fmode_t mode,
+				 unsigned int cmd,
+				 unsigned long arg)
+{
+	struct rsxx_cardinfo *card = bdev->bd_disk->private_data;
+
+	switch (cmd) {
+	case RSXX_GETREG:
+		return rsxx_reg_access(card, (void __user *)arg, 1);
+	case RSXX_SETREG:
+		return rsxx_reg_access(card, (void __user *)arg, 0);
+	}
+
+	return -ENOTTY;
+}
+
+static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct rsxx_cardinfo *card = bdev->bd_disk->private_data;
+	u64 blocks = card->size8 >> 9;
+
+	/*
+	 * get geometry: Fake it. I haven't found any drivers that set
+	 * geo->start, so we won't either.
+	 */
+	if (card->size8) {
+		geo->heads = 64;
+		geo->sectors = 16;
+		do_div(blocks, (geo->heads * geo->sectors));
+		geo->cylinders = blocks;
+	} else {
+		geo->heads = 0;
+		geo->sectors = 0;
+		geo->cylinders = 0;
+	}
+	return 0;
+}
+
+static const struct block_device_operations rsxx_fops = {
+	.owner		= THIS_MODULE,
+	.getgeo		= rsxx_getgeo,
+	.ioctl		= rsxx_blkdev_ioctl,
+};
+
+static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
+{
+	struct hd_struct *part0 = &card->gendisk->part0;
+	int rw = bio_data_dir(bio);
+	int cpu;
+
+	cpu = part_stat_lock();
+
+	part_round_stats(cpu, part0);
+	part_inc_in_flight(part0, rw);
+
+	part_stat_unlock();
+}
+
+static void disk_stats_complete(struct rsxx_cardinfo *card,
+				struct bio *bio,
+				unsigned long start_time)
+{
+	struct hd_struct *part0 = &card->gendisk->part0;
+	unsigned long duration = jiffies - start_time;
+	int rw = bio_data_dir(bio);
+	int cpu;
+
+	cpu = part_stat_lock();
+
+	part_stat_add(cpu, part0, sectors[rw], bio_sectors(bio));
+	part_stat_inc(cpu, part0, ios[rw]);
+	part_stat_add(cpu, part0, ticks[rw], duration);
+
+	part_round_stats(cpu, part0);
+	part_dec_in_flight(part0, rw);
+
+	part_stat_unlock();
+}
+
+static void bio_dma_done_cb(struct rsxx_cardinfo *card,
+			    void *cb_data,
+			    unsigned int error)
+{
+	struct rsxx_bio_meta *meta = cb_data;
+
+	if (error)
+		atomic_set(&meta->error, 1);
+
+	if (atomic_dec_and_test(&meta->pending_dmas)) {
+		disk_stats_complete(card, meta->bio, meta->start_time);
+
+		bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
+		kmem_cache_free(bio_meta_pool, meta);
+	}
+}
+
+static void rsxx_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct rsxx_cardinfo *card = q->queuedata;
+	struct rsxx_bio_meta *bio_meta;
+	int st = -EINVAL;
+
+	might_sleep();
+
+	if (unlikely(card->halt)) {
+		st = -EFAULT;
+		goto req_err;
+	}
+
+	if (unlikely(card->dma_fault)) {
+		st = (-EFAULT);
+		goto req_err;
+	}
+
+	if (bio->bi_size == 0) {
+		dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
+		goto req_err;
+	}
+
+	bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
+	if (!bio_meta) {
+		st = -ENOMEM;
+		goto req_err;
+	}
+
+	bio_meta->bio = bio;
+	atomic_set(&bio_meta->error, 0);
+	atomic_set(&bio_meta->pending_dmas, 0);
+	bio_meta->start_time = jiffies;
+
+	disk_stats_start(card, bio);
+
+	dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
+		 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
+		 (u64)bio->bi_sector << 9, bio->bi_size);
+
+	st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas,
+				    bio_dma_done_cb, bio_meta);
+	if (st)
+		goto queue_err;
+
+	return;
+
+queue_err:
+	kmem_cache_free(bio_meta_pool, bio_meta);
+req_err:
+	bio_endio(bio, st);
+}
+
+/*----------------- Device Setup -------------------*/
+static bool rsxx_discard_supported(struct rsxx_cardinfo *card)
+{
+	unsigned char pci_rev;
+
+	pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev);
+
+	return (pci_rev >= RSXX_DISCARD_SUPPORT);
+}
+
+static unsigned short rsxx_get_logical_block_size(
+					struct rsxx_cardinfo *card)
+{
+	u32 capabilities = 0;
+	int st;
+
+	st = rsxx_get_card_capabilities(card, &capabilities);
+	if (st)
+		dev_warn(CARD_TO_DEV(card),
+			"Failed reading card capabilities register\n");
+
+	/* Earlier firmware did not have support for 512 byte accesses */
+	if (capabilities & CARD_CAP_SUBPAGE_WRITES)
+		return 512;
+	else
+		return RSXX_HW_BLK_SIZE;
+}
+
+int rsxx_attach_dev(struct rsxx_cardinfo *card)
+{
+	mutex_lock(&card->dev_lock);
+
+	/* The block device requires the stripe size from the config. */
+	if (enable_blkdev) {
+		if (card->config_valid)
+			set_capacity(card->gendisk, card->size8 >> 9);
+		else
+			set_capacity(card->gendisk, 0);
+		add_disk(card->gendisk);
+
+		card->bdev_attached = 1;
+	}
+
+	mutex_unlock(&card->dev_lock);
+
+	return 0;
+}
+
+void rsxx_detach_dev(struct rsxx_cardinfo *card)
+{
+	mutex_lock(&card->dev_lock);
+
+	if (card->bdev_attached) {
+		del_gendisk(card->gendisk);
+		card->bdev_attached = 0;
+	}
+
+	mutex_unlock(&card->dev_lock);
+}
+
+int rsxx_setup_dev(struct rsxx_cardinfo *card)
+{
+	unsigned short blk_size;
+
+	mutex_init(&card->dev_lock);
+
+	if (!enable_blkdev)
+		return 0;
+
+	card->major = register_blkdev(0, DRIVER_NAME);
+	if (card->major < 0) {
+		dev_err(CARD_TO_DEV(card), "Failed to get major number\n");
+		return -ENOMEM;
+	}
+
+	card->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!card->queue) {
+		dev_err(CARD_TO_DEV(card), "Failed queue alloc\n");
+		unregister_blkdev(card->major, DRIVER_NAME);
+		return -ENOMEM;
+	}
+
+	card->gendisk = alloc_disk(blkdev_minors);
+	if (!card->gendisk) {
+		dev_err(CARD_TO_DEV(card), "Failed disk alloc\n");
+		blk_cleanup_queue(card->queue);
+		unregister_blkdev(card->major, DRIVER_NAME);
+		return -ENOMEM;
+	}
+
+	blk_size = rsxx_get_logical_block_size(card);
+
+	blk_queue_make_request(card->queue, rsxx_make_request);
+	blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
+	blk_queue_dma_alignment(card->queue, blk_size - 1);
+	blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
+	blk_queue_logical_block_size(card->queue, blk_size);
+	blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
+
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue);
+	if (rsxx_discard_supported(card)) {
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue);
+		blk_queue_max_discard_sectors(card->queue,
+						RSXX_HW_BLK_SIZE >> 9);
+		card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
+		card->queue->limits.discard_alignment   = RSXX_HW_BLK_SIZE;
+		card->queue->limits.discard_zeroes_data = 1;
+	}
+
+	card->queue->queuedata = card;
+
+	snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
+		 "rsxx%d", card->disk_id);
+	card->gendisk->driverfs_dev = &card->dev->dev;
+	card->gendisk->major = card->major;
+	card->gendisk->first_minor = 0;
+	card->gendisk->fops = &rsxx_fops;
+	card->gendisk->private_data = card;
+	card->gendisk->queue = card->queue;
+
+	return 0;
+}
+
+void rsxx_destroy_dev(struct rsxx_cardinfo *card)
+{
+	if (!enable_blkdev)
+		return;
+
+	put_disk(card->gendisk);
+	card->gendisk = NULL;
+
+	blk_cleanup_queue(card->queue);
+	unregister_blkdev(card->major, DRIVER_NAME);
+}
+
+int rsxx_dev_init(void)
+{
+	bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN);
+	if (!bio_meta_pool)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void rsxx_dev_cleanup(void)
+{
+	kmem_cache_destroy(bio_meta_pool);
+}
+
+
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
new file mode 100644
index 000000000000..63176e67662f
--- /dev/null
+++ b/drivers/block/rsxx/dma.c
@@ -0,0 +1,998 @@
+/*
+* Filename: dma.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/slab.h>
+#include "rsxx_priv.h"
+
+struct rsxx_dma {
+	struct list_head	 list;
+	u8			 cmd;
+	unsigned int		 laddr;     /* Logical address on the ramsan */
+	struct {
+		u32		 off;
+		u32		 cnt;
+	} sub_page;
+	dma_addr_t		 dma_addr;
+	struct page		 *page;
+	unsigned int		 pg_off;    /* Page Offset */
+	rsxx_dma_cb		 cb;
+	void			 *cb_data;
+};
+
+/* This timeout is used to detect a stalled DMA channel */
+#define DMA_ACTIVITY_TIMEOUT	msecs_to_jiffies(10000)
+
+struct hw_status {
+	u8	status;
+	u8	tag;
+	__le16	count;
+	__le32	_rsvd2;
+	__le64	_rsvd3;
+} __packed;
+
+enum rsxx_dma_status {
+	DMA_SW_ERR    = 0x1,
+	DMA_HW_FAULT  = 0x2,
+	DMA_CANCELLED = 0x4,
+};
+
+struct hw_cmd {
+	u8	command;
+	u8	tag;
+	u8	_rsvd;
+	u8	sub_page; /* Bit[0:2]: 512byte offset */
+			  /* Bit[4:6]: 512byte count */
+	__le32	device_addr;
+	__le64	host_addr;
+} __packed;
+
+enum rsxx_hw_cmd {
+	HW_CMD_BLK_DISCARD	= 0x70,
+	HW_CMD_BLK_WRITE	= 0x80,
+	HW_CMD_BLK_READ		= 0xC0,
+	HW_CMD_BLK_RECON_READ	= 0xE0,
+};
+
+enum rsxx_hw_status {
+	HW_STATUS_CRC		= 0x01,
+	HW_STATUS_HARD_ERR	= 0x02,
+	HW_STATUS_SOFT_ERR	= 0x04,
+	HW_STATUS_FAULT		= 0x08,
+};
+
+#define STATUS_BUFFER_SIZE8     4096
+#define COMMAND_BUFFER_SIZE8    4096
+
+static struct kmem_cache *rsxx_dma_pool;
+
+struct dma_tracker {
+	int			next_tag;
+	struct rsxx_dma	*dma;
+};
+
+#define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \
+		(sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS))
+
+struct dma_tracker_list {
+	spinlock_t		lock;
+	int			head;
+	struct dma_tracker	list[0];
+};
+
+
+/*----------------- Misc Utility Functions -------------------*/
+static unsigned int rsxx_addr8_to_laddr(u64 addr8, struct rsxx_cardinfo *card)
+{
+	unsigned long long tgt_addr8;
+
+	tgt_addr8 = ((addr8 >> card->_stripe.upper_shift) &
+		      card->_stripe.upper_mask) |
+		    ((addr8) & card->_stripe.lower_mask);
+	do_div(tgt_addr8, RSXX_HW_BLK_SIZE);
+	return tgt_addr8;
+}
+
+static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8)
+{
+	unsigned int tgt;
+
+	tgt = (addr8 >> card->_stripe.target_shift) & card->_stripe.target_mask;
+
+	return tgt;
+}
+
+static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card)
+{
+	/* Reset all DMA Command/Status Queues */
+	iowrite32(DMA_QUEUE_RESET, card->regmap + RESET);
+}
+
+static unsigned int get_dma_size(struct rsxx_dma *dma)
+{
+	if (dma->sub_page.cnt)
+		return dma->sub_page.cnt << 9;
+	else
+		return RSXX_HW_BLK_SIZE;
+}
+
+
+/*----------------- DMA Tracker -------------------*/
+static void set_tracker_dma(struct dma_tracker_list *trackers,
+			    int tag,
+			    struct rsxx_dma *dma)
+{
+	trackers->list[tag].dma = dma;
+}
+
+static struct rsxx_dma *get_tracker_dma(struct dma_tracker_list *trackers,
+					    int tag)
+{
+	return trackers->list[tag].dma;
+}
+
+static int pop_tracker(struct dma_tracker_list *trackers)
+{
+	int tag;
+
+	spin_lock(&trackers->lock);
+	tag = trackers->head;
+	if (tag != -1) {
+		trackers->head = trackers->list[tag].next_tag;
+		trackers->list[tag].next_tag = -1;
+	}
+	spin_unlock(&trackers->lock);
+
+	return tag;
+}
+
+static void push_tracker(struct dma_tracker_list *trackers, int tag)
+{
+	spin_lock(&trackers->lock);
+	trackers->list[tag].next_tag = trackers->head;
+	trackers->head = tag;
+	trackers->list[tag].dma = NULL;
+	spin_unlock(&trackers->lock);
+}
+
+
+/*----------------- Interrupt Coalescing -------------*/
+/*
+ * Interrupt Coalescing Register Format:
+ * Interrupt Timer (64ns units) [15:0]
+ * Interrupt Count [24:16]
+ * Reserved [31:25]
+*/
+#define INTR_COAL_LATENCY_MASK       (0x0000ffff)
+
+#define INTR_COAL_COUNT_SHIFT        16
+#define INTR_COAL_COUNT_BITS         9
+#define INTR_COAL_COUNT_MASK         (((1 << INTR_COAL_COUNT_BITS) - 1) << \
+					INTR_COAL_COUNT_SHIFT)
+#define INTR_COAL_LATENCY_UNITS_NS   64
+
+
+static u32 dma_intr_coal_val(u32 mode, u32 count, u32 latency)
+{
+	u32 latency_units = latency / INTR_COAL_LATENCY_UNITS_NS;
+
+	if (mode == RSXX_INTR_COAL_DISABLED)
+		return 0;
+
+	return ((count << INTR_COAL_COUNT_SHIFT) & INTR_COAL_COUNT_MASK) |
+			(latency_units & INTR_COAL_LATENCY_MASK);
+
+}
+
+static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
+{
+	int i;
+	u32 q_depth = 0;
+	u32 intr_coal;
+
+	if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE)
+		return;
+
+	for (i = 0; i < card->n_targets; i++)
+		q_depth += atomic_read(&card->ctrl[i].stats.hw_q_depth);
+
+	intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode,
+				      q_depth / 2,
+				      card->config.data.intr_coal.latency);
+	iowrite32(intr_coal, card->regmap + INTR_COAL);
+}
+
+/*----------------- RSXX DMA Handling -------------------*/
+static void rsxx_complete_dma(struct rsxx_cardinfo *card,
+				  struct rsxx_dma *dma,
+				  unsigned int status)
+{
+	if (status & DMA_SW_ERR)
+		printk_ratelimited(KERN_ERR
+				   "SW Error in DMA(cmd x%02x, laddr x%08x)\n",
+				   dma->cmd, dma->laddr);
+	if (status & DMA_HW_FAULT)
+		printk_ratelimited(KERN_ERR
+				   "HW Fault in DMA(cmd x%02x, laddr x%08x)\n",
+				   dma->cmd, dma->laddr);
+	if (status & DMA_CANCELLED)
+		printk_ratelimited(KERN_ERR
+				   "DMA Cancelled(cmd x%02x, laddr x%08x)\n",
+				   dma->cmd, dma->laddr);
+
+	if (dma->dma_addr)
+		pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma),
+			       dma->cmd == HW_CMD_BLK_WRITE ?
+					   PCI_DMA_TODEVICE :
+					   PCI_DMA_FROMDEVICE);
+
+	if (dma->cb)
+		dma->cb(card, dma->cb_data, status ? 1 : 0);
+
+	kmem_cache_free(rsxx_dma_pool, dma);
+}
+
+static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
+				 struct rsxx_dma *dma)
+{
+	/*
+	 * Requeued DMAs go to the front of the queue so they are issued
+	 * first.
+	 */
+	spin_lock(&ctrl->queue_lock);
+	list_add(&dma->list, &ctrl->queue);
+	spin_unlock(&ctrl->queue_lock);
+}
+
+static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
+				      struct rsxx_dma *dma,
+				      u8 hw_st)
+{
+	unsigned int status = 0;
+	int requeue_cmd = 0;
+
+	dev_dbg(CARD_TO_DEV(ctrl->card),
+		"Handling DMA error(cmd x%02x, laddr x%08x st:x%02x)\n",
+		dma->cmd, dma->laddr, hw_st);
+
+	if (hw_st & HW_STATUS_CRC)
+		ctrl->stats.crc_errors++;
+	if (hw_st & HW_STATUS_HARD_ERR)
+		ctrl->stats.hard_errors++;
+	if (hw_st & HW_STATUS_SOFT_ERR)
+		ctrl->stats.soft_errors++;
+
+	switch (dma->cmd) {
+	case HW_CMD_BLK_READ:
+		if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) {
+			if (ctrl->card->scrub_hard) {
+				dma->cmd = HW_CMD_BLK_RECON_READ;
+				requeue_cmd = 1;
+				ctrl->stats.reads_retried++;
+			} else {
+				status |= DMA_HW_FAULT;
+				ctrl->stats.reads_failed++;
+			}
+		} else if (hw_st & HW_STATUS_FAULT) {
+			status |= DMA_HW_FAULT;
+			ctrl->stats.reads_failed++;
+		}
+
+		break;
+	case HW_CMD_BLK_RECON_READ:
+		if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) {
+			/* Data could not be reconstructed. */
+			status |= DMA_HW_FAULT;
+			ctrl->stats.reads_failed++;
+		}
+
+		break;
+	case HW_CMD_BLK_WRITE:
+		status |= DMA_HW_FAULT;
+		ctrl->stats.writes_failed++;
+
+		break;
+	case HW_CMD_BLK_DISCARD:
+		status |= DMA_HW_FAULT;
+		ctrl->stats.discards_failed++;
+
+		break;
+	default:
+		dev_err(CARD_TO_DEV(ctrl->card),
+			"Unknown command in DMA!(cmd: x%02x "
+			   "laddr x%08x st: x%02x\n",
+			   dma->cmd, dma->laddr, hw_st);
+		status |= DMA_SW_ERR;
+
+		break;
+	}
+
+	if (requeue_cmd)
+		rsxx_requeue_dma(ctrl, dma);
+	else
+		rsxx_complete_dma(ctrl->card, dma, status);
+}
+
+static void dma_engine_stalled(unsigned long data)
+{
+	struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
+
+	if (atomic_read(&ctrl->stats.hw_q_depth) == 0)
+		return;
+
+	if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) {
+		/*
+		 * The dma engine was stalled because the SW_CMD_IDX write
+		 * was lost. Issue it again to recover.
+		 */
+		dev_warn(CARD_TO_DEV(ctrl->card),
+			"SW_CMD_IDX write was lost, re-writing...\n");
+		iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
+		mod_timer(&ctrl->activity_timer,
+			  jiffies + DMA_ACTIVITY_TIMEOUT);
+	} else {
+		dev_warn(CARD_TO_DEV(ctrl->card),
+			"DMA channel %d has stalled, faulting interface.\n",
+			ctrl->id);
+		ctrl->card->dma_fault = 1;
+	}
+}
+
+static void rsxx_issue_dmas(struct work_struct *work)
+{
+	struct rsxx_dma_ctrl *ctrl;
+	struct rsxx_dma *dma;
+	int tag;
+	int cmds_pending = 0;
+	struct hw_cmd *hw_cmd_buf;
+
+	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
+	hw_cmd_buf = ctrl->cmd.buf;
+
+	if (unlikely(ctrl->card->halt))
+		return;
+
+	while (1) {
+		spin_lock(&ctrl->queue_lock);
+		if (list_empty(&ctrl->queue)) {
+			spin_unlock(&ctrl->queue_lock);
+			break;
+		}
+		spin_unlock(&ctrl->queue_lock);
+
+		tag = pop_tracker(ctrl->trackers);
+		if (tag == -1)
+			break;
+
+		spin_lock(&ctrl->queue_lock);
+		dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
+		list_del(&dma->list);
+		ctrl->stats.sw_q_depth--;
+		spin_unlock(&ctrl->queue_lock);
+
+		/*
+		 * This will catch any DMAs that slipped in right before the
+		 * fault, but was queued after all the other DMAs were
+		 * cancelled.
+		 */
+		if (unlikely(ctrl->card->dma_fault)) {
+			push_tracker(ctrl->trackers, tag);
+			rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED);
+			continue;
+		}
+
+		set_tracker_dma(ctrl->trackers, tag, dma);
+		hw_cmd_buf[ctrl->cmd.idx].command  = dma->cmd;
+		hw_cmd_buf[ctrl->cmd.idx].tag      = tag;
+		hw_cmd_buf[ctrl->cmd.idx]._rsvd    = 0;
+		hw_cmd_buf[ctrl->cmd.idx].sub_page =
+					((dma->sub_page.cnt & 0x7) << 4) |
+					 (dma->sub_page.off & 0x7);
+
+		hw_cmd_buf[ctrl->cmd.idx].device_addr =
+					cpu_to_le32(dma->laddr);
+
+		hw_cmd_buf[ctrl->cmd.idx].host_addr =
+					cpu_to_le64(dma->dma_addr);
+
+		dev_dbg(CARD_TO_DEV(ctrl->card),
+			"Issue DMA%d(laddr %d tag %d) to idx %d\n",
+			ctrl->id, dma->laddr, tag, ctrl->cmd.idx);
+
+		ctrl->cmd.idx = (ctrl->cmd.idx + 1) & RSXX_CS_IDX_MASK;
+		cmds_pending++;
+
+		if (dma->cmd == HW_CMD_BLK_WRITE)
+			ctrl->stats.writes_issued++;
+		else if (dma->cmd == HW_CMD_BLK_DISCARD)
+			ctrl->stats.discards_issued++;
+		else
+			ctrl->stats.reads_issued++;
+	}
+
+	/* Let HW know we've queued commands. */
+	if (cmds_pending) {
+		/*
+		 * We must guarantee that the CPU writes to 'ctrl->cmd.buf'
+		 * (which is in PCI-consistent system-memory) from the loop
+		 * above make it into the coherency domain before the
+		 * following PIO "trigger" updating the cmd.idx.  A WMB is
+		 * sufficient. We need not explicitly CPU cache-flush since
+		 * the memory is a PCI-consistent (ie; coherent) mapping.
+		 */
+		wmb();
+
+		atomic_add(cmds_pending, &ctrl->stats.hw_q_depth);
+		mod_timer(&ctrl->activity_timer,
+			  jiffies + DMA_ACTIVITY_TIMEOUT);
+		iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
+	}
+}
+
+static void rsxx_dma_done(struct work_struct *work)
+{
+	struct rsxx_dma_ctrl *ctrl;
+	struct rsxx_dma *dma;
+	unsigned long flags;
+	u16 count;
+	u8 status;
+	u8 tag;
+	struct hw_status *hw_st_buf;
+
+	ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
+	hw_st_buf = ctrl->status.buf;
+
+	if (unlikely(ctrl->card->halt) ||
+	    unlikely(ctrl->card->dma_fault))
+		return;
+
+	count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count);
+
+	while (count == ctrl->e_cnt) {
+		/*
+		 * The read memory-barrier is necessary to keep aggressive
+		 * processors/optimizers (such as the PPC Apple G5) from
+		 * reordering the following status-buffer tag & status read
+		 * *before* the count read on subsequent iterations of the
+		 * loop!
+		 */
+		rmb();
+
+		status = hw_st_buf[ctrl->status.idx].status;
+		tag    = hw_st_buf[ctrl->status.idx].tag;
+
+		dma = get_tracker_dma(ctrl->trackers, tag);
+		if (dma == NULL) {
+			spin_lock_irqsave(&ctrl->card->irq_lock, flags);
+			rsxx_disable_ier(ctrl->card, CR_INTR_DMA_ALL);
+			spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
+
+			dev_err(CARD_TO_DEV(ctrl->card),
+				"No tracker for tag %d "
+				"(idx %d id %d)\n",
+				tag, ctrl->status.idx, ctrl->id);
+			return;
+		}
+
+		dev_dbg(CARD_TO_DEV(ctrl->card),
+			"Completing DMA%d"
+			"(laddr x%x tag %d st: x%x cnt: x%04x) from idx %d.\n",
+			ctrl->id, dma->laddr, tag, status, count,
+			ctrl->status.idx);
+
+		atomic_dec(&ctrl->stats.hw_q_depth);
+
+		mod_timer(&ctrl->activity_timer,
+			  jiffies + DMA_ACTIVITY_TIMEOUT);
+
+		if (status)
+			rsxx_handle_dma_error(ctrl, dma, status);
+		else
+			rsxx_complete_dma(ctrl->card, dma, 0);
+
+		push_tracker(ctrl->trackers, tag);
+
+		ctrl->status.idx = (ctrl->status.idx + 1) &
+				   RSXX_CS_IDX_MASK;
+		ctrl->e_cnt++;
+
+		count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count);
+	}
+
+	dma_intr_coal_auto_tune(ctrl->card);
+
+	if (atomic_read(&ctrl->stats.hw_q_depth) == 0)
+		del_timer_sync(&ctrl->activity_timer);
+
+	spin_lock_irqsave(&ctrl->card->irq_lock, flags);
+	rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
+	spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
+
+	spin_lock(&ctrl->queue_lock);
+	if (ctrl->stats.sw_q_depth)
+		queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
+	spin_unlock(&ctrl->queue_lock);
+}
+
+static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card,
+				      struct list_head *q)
+{
+	struct rsxx_dma *dma;
+	struct rsxx_dma *tmp;
+	int cnt = 0;
+
+	list_for_each_entry_safe(dma, tmp, q, list) {
+		list_del(&dma->list);
+
+		if (dma->dma_addr)
+			pci_unmap_page(card->dev, dma->dma_addr,
+				       get_dma_size(dma),
+				       (dma->cmd == HW_CMD_BLK_WRITE) ?
+				       PCI_DMA_TODEVICE :
+				       PCI_DMA_FROMDEVICE);
+		kmem_cache_free(rsxx_dma_pool, dma);
+		cnt++;
+	}
+
+	return cnt;
+}
+
+static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+				  struct list_head *q,
+				  unsigned int laddr,
+				  rsxx_dma_cb cb,
+				  void *cb_data)
+{
+	struct rsxx_dma *dma;
+
+	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
+	if (!dma)
+		return -ENOMEM;
+
+	dma->cmd          = HW_CMD_BLK_DISCARD;
+	dma->laddr        = laddr;
+	dma->dma_addr     = 0;
+	dma->sub_page.off = 0;
+	dma->sub_page.cnt = 0;
+	dma->page         = NULL;
+	dma->pg_off       = 0;
+	dma->cb	          = cb;
+	dma->cb_data      = cb_data;
+
+	dev_dbg(CARD_TO_DEV(card), "Queuing[D] laddr %x\n", dma->laddr);
+
+	list_add_tail(&dma->list, q);
+
+	return 0;
+}
+
+static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+			      struct list_head *q,
+			      int dir,
+			      unsigned int dma_off,
+			      unsigned int dma_len,
+			      unsigned int laddr,
+			      struct page *page,
+			      unsigned int pg_off,
+			      rsxx_dma_cb cb,
+			      void *cb_data)
+{
+	struct rsxx_dma *dma;
+
+	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
+	if (!dma)
+		return -ENOMEM;
+
+	dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len,
+				     dir ? PCI_DMA_TODEVICE :
+				     PCI_DMA_FROMDEVICE);
+	if (!dma->dma_addr) {
+		kmem_cache_free(rsxx_dma_pool, dma);
+		return -ENOMEM;
+	}
+
+	dma->cmd          = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
+	dma->laddr        = laddr;
+	dma->sub_page.off = (dma_off >> 9);
+	dma->sub_page.cnt = (dma_len >> 9);
+	dma->page         = page;
+	dma->pg_off       = pg_off;
+	dma->cb	          = cb;
+	dma->cb_data      = cb_data;
+
+	dev_dbg(CARD_TO_DEV(card),
+		"Queuing[%c] laddr %x off %d cnt %d page %p pg_off %d\n",
+		dir ? 'W' : 'R', dma->laddr, dma->sub_page.off,
+		dma->sub_page.cnt, dma->page, dma->pg_off);
+
+	/* Queue the DMA */
+	list_add_tail(&dma->list, q);
+
+	return 0;
+}
+
+int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+			   struct bio *bio,
+			   atomic_t *n_dmas,
+			   rsxx_dma_cb cb,
+			   void *cb_data)
+{
+	struct list_head dma_list[RSXX_MAX_TARGETS];
+	struct bio_vec *bvec;
+	unsigned long long addr8;
+	unsigned int laddr;
+	unsigned int bv_len;
+	unsigned int bv_off;
+	unsigned int dma_off;
+	unsigned int dma_len;
+	int dma_cnt[RSXX_MAX_TARGETS];
+	int tgt;
+	int st;
+	int i;
+
+	addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */
+	atomic_set(n_dmas, 0);
+
+	for (i = 0; i < card->n_targets; i++) {
+		INIT_LIST_HEAD(&dma_list[i]);
+		dma_cnt[i] = 0;
+	}
+
+	if (bio->bi_rw & REQ_DISCARD) {
+		bv_len = bio->bi_size;
+
+		while (bv_len > 0) {
+			tgt   = rsxx_get_dma_tgt(card, addr8);
+			laddr = rsxx_addr8_to_laddr(addr8, card);
+
+			st = rsxx_queue_discard(card, &dma_list[tgt], laddr,
+						    cb, cb_data);
+			if (st)
+				goto bvec_err;
+
+			dma_cnt[tgt]++;
+			atomic_inc(n_dmas);
+			addr8  += RSXX_HW_BLK_SIZE;
+			bv_len -= RSXX_HW_BLK_SIZE;
+		}
+	} else {
+		bio_for_each_segment(bvec, bio, i) {
+			bv_len = bvec->bv_len;
+			bv_off = bvec->bv_offset;
+
+			while (bv_len > 0) {
+				tgt   = rsxx_get_dma_tgt(card, addr8);
+				laddr = rsxx_addr8_to_laddr(addr8, card);
+				dma_off = addr8 & RSXX_HW_BLK_MASK;
+				dma_len = min(bv_len,
+					      RSXX_HW_BLK_SIZE - dma_off);
+
+				st = rsxx_queue_dma(card, &dma_list[tgt],
+							bio_data_dir(bio),
+							dma_off, dma_len,
+							laddr, bvec->bv_page,
+							bv_off, cb, cb_data);
+				if (st)
+					goto bvec_err;
+
+				dma_cnt[tgt]++;
+				atomic_inc(n_dmas);
+				addr8  += dma_len;
+				bv_off += dma_len;
+				bv_len -= dma_len;
+			}
+		}
+	}
+
+	for (i = 0; i < card->n_targets; i++) {
+		if (!list_empty(&dma_list[i])) {
+			spin_lock(&card->ctrl[i].queue_lock);
+			card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
+			list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
+			spin_unlock(&card->ctrl[i].queue_lock);
+
+			queue_work(card->ctrl[i].issue_wq,
+				   &card->ctrl[i].issue_dma_work);
+		}
+	}
+
+	return 0;
+
+bvec_err:
+	for (i = 0; i < card->n_targets; i++)
+		rsxx_cleanup_dma_queue(card, &dma_list[i]);
+
+	return st;
+}
+
+
+/*----------------- DMA Engine Initialization & Setup -------------------*/
+static int rsxx_dma_ctrl_init(struct pci_dev *dev,
+				  struct rsxx_dma_ctrl *ctrl)
+{
+	int i;
+
+	memset(&ctrl->stats, 0, sizeof(ctrl->stats));
+
+	ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8,
+						&ctrl->status.dma_addr);
+	ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8,
+					     &ctrl->cmd.dma_addr);
+	if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL)
+		return -ENOMEM;
+
+	ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8);
+	if (!ctrl->trackers)
+		return -ENOMEM;
+
+	ctrl->trackers->head = 0;
+	for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
+		ctrl->trackers->list[i].next_tag = i + 1;
+		ctrl->trackers->list[i].dma = NULL;
+	}
+	ctrl->trackers->list[RSXX_MAX_OUTSTANDING_CMDS-1].next_tag = -1;
+	spin_lock_init(&ctrl->trackers->lock);
+
+	spin_lock_init(&ctrl->queue_lock);
+	INIT_LIST_HEAD(&ctrl->queue);
+
+	setup_timer(&ctrl->activity_timer, dma_engine_stalled,
+					(unsigned long)ctrl);
+
+	ctrl->issue_wq = alloc_ordered_workqueue(DRIVER_NAME"_issue", 0);
+	if (!ctrl->issue_wq)
+		return -ENOMEM;
+
+	ctrl->done_wq = alloc_ordered_workqueue(DRIVER_NAME"_done", 0);
+	if (!ctrl->done_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
+	INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
+
+	memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8);
+	iowrite32(lower_32_bits(ctrl->status.dma_addr),
+		  ctrl->regmap + SB_ADD_LO);
+	iowrite32(upper_32_bits(ctrl->status.dma_addr),
+		  ctrl->regmap + SB_ADD_HI);
+
+	memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8);
+	iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO);
+	iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI);
+
+	ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT);
+	if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) {
+		dev_crit(&dev->dev, "Failed reading status cnt x%x\n",
+			 ctrl->status.idx);
+		return -EINVAL;
+	}
+	iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT);
+	iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT);
+
+	ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX);
+	if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) {
+		dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n",
+			 ctrl->status.idx);
+		return -EINVAL;
+	}
+	iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX);
+	iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
+
+	wmb();
+
+	return 0;
+}
+
+static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card,
+			      unsigned int stripe_size8)
+{
+	if (!is_power_of_2(stripe_size8)) {
+		dev_err(CARD_TO_DEV(card),
+			"stripe_size is NOT a power of 2!\n");
+		return -EINVAL;
+	}
+
+	card->_stripe.lower_mask = stripe_size8 - 1;
+
+	card->_stripe.upper_mask  = ~(card->_stripe.lower_mask);
+	card->_stripe.upper_shift = ffs(card->n_targets) - 1;
+
+	card->_stripe.target_mask = card->n_targets - 1;
+	card->_stripe.target_shift = ffs(stripe_size8) - 1;
+
+	dev_dbg(CARD_TO_DEV(card), "_stripe.lower_mask   = x%016llx\n",
+		card->_stripe.lower_mask);
+	dev_dbg(CARD_TO_DEV(card), "_stripe.upper_shift  = x%016llx\n",
+		card->_stripe.upper_shift);
+	dev_dbg(CARD_TO_DEV(card), "_stripe.upper_mask   = x%016llx\n",
+		card->_stripe.upper_mask);
+	dev_dbg(CARD_TO_DEV(card), "_stripe.target_mask  = x%016llx\n",
+		card->_stripe.target_mask);
+	dev_dbg(CARD_TO_DEV(card), "_stripe.target_shift = x%016llx\n",
+		card->_stripe.target_shift);
+
+	return 0;
+}
+
+static int rsxx_dma_configure(struct rsxx_cardinfo *card)
+{
+	u32 intr_coal;
+
+	intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode,
+				      card->config.data.intr_coal.count,
+				      card->config.data.intr_coal.latency);
+	iowrite32(intr_coal, card->regmap + INTR_COAL);
+
+	return rsxx_dma_stripe_setup(card, card->config.data.stripe_size);
+}
+
+int rsxx_dma_setup(struct rsxx_cardinfo *card)
+{
+	unsigned long flags;
+	int st;
+	int i;
+
+	dev_info(CARD_TO_DEV(card),
+		"Initializing %d DMA targets\n",
+		card->n_targets);
+
+	/* Regmap is divided up into 4K chunks. One for each DMA channel */
+	for (i = 0; i < card->n_targets; i++)
+		card->ctrl[i].regmap = card->regmap + (i * 4096);
+
+	card->dma_fault = 0;
+
+	/* Reset the DMA queues */
+	rsxx_dma_queue_reset(card);
+
+	/************* Setup DMA Control *************/
+	for (i = 0; i < card->n_targets; i++) {
+		st = rsxx_dma_ctrl_init(card->dev, &card->ctrl[i]);
+		if (st)
+			goto failed_dma_setup;
+
+		card->ctrl[i].card = card;
+		card->ctrl[i].id = i;
+	}
+
+	card->scrub_hard = 1;
+
+	if (card->config_valid)
+		rsxx_dma_configure(card);
+
+	/* Enable the interrupts after all setup has completed. */
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_irqsave(&card->irq_lock, flags);
+		rsxx_enable_ier_and_isr(card, CR_INTR_DMA(i));
+		spin_unlock_irqrestore(&card->irq_lock, flags);
+	}
+
+	return 0;
+
+failed_dma_setup:
+	for (i = 0; i < card->n_targets; i++) {
+		struct rsxx_dma_ctrl *ctrl = &card->ctrl[i];
+
+		if (ctrl->issue_wq) {
+			destroy_workqueue(ctrl->issue_wq);
+			ctrl->issue_wq = NULL;
+		}
+
+		if (ctrl->done_wq) {
+			destroy_workqueue(ctrl->done_wq);
+			ctrl->done_wq = NULL;
+		}
+
+		if (ctrl->trackers)
+			vfree(ctrl->trackers);
+
+		if (ctrl->status.buf)
+			pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
+					    ctrl->status.buf,
+					    ctrl->status.dma_addr);
+		if (ctrl->cmd.buf)
+			pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
+					    ctrl->cmd.buf, ctrl->cmd.dma_addr);
+	}
+
+	return st;
+}
+
+
+void rsxx_dma_destroy(struct rsxx_cardinfo *card)
+{
+	struct rsxx_dma_ctrl *ctrl;
+	struct rsxx_dma *dma;
+	int i, j;
+	int cnt = 0;
+
+	for (i = 0; i < card->n_targets; i++) {
+		ctrl = &card->ctrl[i];
+
+		if (ctrl->issue_wq) {
+			destroy_workqueue(ctrl->issue_wq);
+			ctrl->issue_wq = NULL;
+		}
+
+		if (ctrl->done_wq) {
+			destroy_workqueue(ctrl->done_wq);
+			ctrl->done_wq = NULL;
+		}
+
+		if (timer_pending(&ctrl->activity_timer))
+			del_timer_sync(&ctrl->activity_timer);
+
+		/* Clean up the DMA queue */
+		spin_lock(&ctrl->queue_lock);
+		cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue);
+		spin_unlock(&ctrl->queue_lock);
+
+		if (cnt)
+			dev_info(CARD_TO_DEV(card),
+				"Freed %d queued DMAs on channel %d\n",
+				cnt, i);
+
+		/* Clean up issued DMAs */
+		for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
+			dma = get_tracker_dma(ctrl->trackers, j);
+			if (dma) {
+				pci_unmap_page(card->dev, dma->dma_addr,
+					       get_dma_size(dma),
+					       (dma->cmd == HW_CMD_BLK_WRITE) ?
+					       PCI_DMA_TODEVICE :
+					       PCI_DMA_FROMDEVICE);
+				kmem_cache_free(rsxx_dma_pool, dma);
+				cnt++;
+			}
+		}
+
+		if (cnt)
+			dev_info(CARD_TO_DEV(card),
+				"Freed %d pending DMAs on channel %d\n",
+				cnt, i);
+
+		vfree(ctrl->trackers);
+
+		pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
+				    ctrl->status.buf, ctrl->status.dma_addr);
+		pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
+				    ctrl->cmd.buf, ctrl->cmd.dma_addr);
+	}
+}
+
+
+int rsxx_dma_init(void)
+{
+	rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN);
+	if (!rsxx_dma_pool)
+		return -ENOMEM;
+
+	return 0;
+}
+
+
+void rsxx_dma_cleanup(void)
+{
+	kmem_cache_destroy(rsxx_dma_pool);
+}
+
diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h
new file mode 100644
index 000000000000..2e50b65902b7
--- /dev/null
+++ b/drivers/block/rsxx/rsxx.h
@@ -0,0 +1,45 @@
+/*
+* Filename: rsxx.h
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#ifndef __RSXX_H__
+#define __RSXX_H__
+
+/*----------------- IOCTL Definitions -------------------*/
+
+struct rsxx_reg_access {
+	__u32 addr;
+	__u32 cnt;
+	__u32 stat;
+	__u32 stream;
+	__u32 data[8];
+};
+
+#define RSXX_MAX_REG_CNT	(8 * (sizeof(__u32)))
+
+#define RSXX_IOC_MAGIC 'r'
+
+#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access)
+#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access)
+
+#endif /* __RSXX_H_ */
diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h
new file mode 100644
index 000000000000..c025fe5fdb70
--- /dev/null
+++ b/drivers/block/rsxx/rsxx_cfg.h
@@ -0,0 +1,72 @@
+/*
+* Filename: rsXX_cfg.h
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#ifndef __RSXX_CFG_H__
+#define __RSXX_CFG_H__
+
+/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */
+#include <linux/types.h>
+
+/*
+ * The card config version must match the driver's expected version. If it does
+ * not, the DMA interfaces will not be attached and the user will need to
+ * initialize/upgrade the card configuration using the card config utility.
+ */
+#define RSXX_CFG_VERSION	4
+
+struct card_cfg_hdr {
+	__u32	version;
+	__u32	crc;
+};
+
+struct card_cfg_data {
+	__u32	block_size;
+	__u32	stripe_size;
+	__u32	vendor_id;
+	__u32	cache_order;
+	struct {
+		__u32	mode;	/* Disabled, manual, auto-tune... */
+		__u32	count;	/* Number of intr to coalesce     */
+		__u32	latency;/* Max wait time (in ns)          */
+	} intr_coal;
+};
+
+struct rsxx_card_cfg {
+	struct card_cfg_hdr	hdr;
+	struct card_cfg_data	data;
+};
+
+/* Vendor ID Values */
+#define RSXX_VENDOR_ID_TMS_IBM		0
+#define RSXX_VENDOR_ID_DSI		1
+#define RSXX_VENDOR_COUNT		2
+
+/* Interrupt Coalescing Values */
+#define RSXX_INTR_COAL_DISABLED           0
+#define RSXX_INTR_COAL_EXPLICIT           1
+#define RSXX_INTR_COAL_AUTO_TUNE          2
+
+
+#endif /* __RSXX_CFG_H__ */
+
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
new file mode 100644
index 000000000000..a1ac907d8f4c
--- /dev/null
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -0,0 +1,399 @@
+/*
+* Filename: rsxx_priv.h
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#ifndef __RSXX_PRIV_H__
+#define __RSXX_PRIV_H__
+
+#include <linux/version.h>
+#include <linux/semaphore.h>
+
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/workqueue.h>
+#include <linux/bio.h>
+#include <linux/vmalloc.h>
+#include <linux/timer.h>
+#include <linux/ioctl.h>
+
+#include "rsxx.h"
+#include "rsxx_cfg.h"
+
+struct proc_cmd;
+
+#define PCI_VENDOR_ID_TMS_IBM		0x15B6
+#define PCI_DEVICE_ID_RS70_FLASH	0x0019
+#define PCI_DEVICE_ID_RS70D_FLASH	0x001A
+#define PCI_DEVICE_ID_RS80_FLASH	0x001C
+#define PCI_DEVICE_ID_RS81_FLASH	0x001E
+
+#define RS70_PCI_REV_SUPPORTED	4
+
+#define DRIVER_NAME "rsxx"
+#define DRIVER_VERSION "3.7"
+
+/* Block size is 4096 */
+#define RSXX_HW_BLK_SHIFT		12
+#define RSXX_HW_BLK_SIZE		(1 << RSXX_HW_BLK_SHIFT)
+#define RSXX_HW_BLK_MASK		(RSXX_HW_BLK_SIZE - 1)
+
+#define MAX_CREG_DATA8	32
+#define LOG_BUF_SIZE8	128
+
+#define RSXX_MAX_OUTSTANDING_CMDS	255
+#define RSXX_CS_IDX_MASK		0xff
+
+#define RSXX_MAX_TARGETS	8
+
+struct dma_tracker_list;
+
+/* DMA Command/Status Buffer structure */
+struct rsxx_cs_buffer {
+	dma_addr_t	dma_addr;
+	void		*buf;
+	u32		idx;
+};
+
+struct rsxx_dma_stats {
+	u32 crc_errors;
+	u32 hard_errors;
+	u32 soft_errors;
+	u32 writes_issued;
+	u32 writes_failed;
+	u32 reads_issued;
+	u32 reads_failed;
+	u32 reads_retried;
+	u32 discards_issued;
+	u32 discards_failed;
+	u32 done_rescheduled;
+	u32 issue_rescheduled;
+	u32 sw_q_depth;		/* Number of DMAs on the SW queue. */
+	atomic_t hw_q_depth;	/* Number of DMAs queued to HW. */
+};
+
+struct rsxx_dma_ctrl {
+	struct rsxx_cardinfo		*card;
+	int				id;
+	void				__iomem *regmap;
+	struct rsxx_cs_buffer		status;
+	struct rsxx_cs_buffer		cmd;
+	u16				e_cnt;
+	spinlock_t			queue_lock;
+	struct list_head		queue;
+	struct workqueue_struct		*issue_wq;
+	struct work_struct		issue_dma_work;
+	struct workqueue_struct		*done_wq;
+	struct work_struct		dma_done_work;
+	struct timer_list		activity_timer;
+	struct dma_tracker_list		*trackers;
+	struct rsxx_dma_stats		stats;
+};
+
+struct rsxx_cardinfo {
+	struct pci_dev		*dev;
+	unsigned int		halt;
+
+	void			__iomem *regmap;
+	spinlock_t		irq_lock;
+	unsigned int		isr_mask;
+	unsigned int		ier_mask;
+
+	struct rsxx_card_cfg	config;
+	int			config_valid;
+
+	/* Embedded CPU Communication */
+	struct {
+		spinlock_t		lock;
+		bool			active;
+		struct creg_cmd		*active_cmd;
+		struct work_struct	done_work;
+		struct list_head	queue;
+		unsigned int		q_depth;
+		/* Cache the creg status to prevent ioreads */
+		struct {
+			u32		stat;
+			u32		failed_cancel_timer;
+			u32		creg_timeout;
+		} creg_stats;
+		struct timer_list	cmd_timer;
+		struct mutex		reset_lock;
+		int			reset;
+	} creg_ctrl;
+
+	struct {
+		char tmp[MAX_CREG_DATA8];
+		char buf[LOG_BUF_SIZE8]; /* terminated */
+		int buf_len;
+	} log;
+
+	struct work_struct	event_work;
+	unsigned int		state;
+	u64			size8;
+
+	/* Lock the device attach/detach function */
+	struct mutex		dev_lock;
+
+	/* Block Device Variables */
+	bool			bdev_attached;
+	int			disk_id;
+	int			major;
+	struct request_queue	*queue;
+	struct gendisk		*gendisk;
+	struct {
+		/* Used to convert a byte address to a device address. */
+		u64 lower_mask;
+		u64 upper_shift;
+		u64 upper_mask;
+		u64 target_mask;
+		u64 target_shift;
+	} _stripe;
+	unsigned int		dma_fault;
+
+	int			scrub_hard;
+
+	int			n_targets;
+	struct rsxx_dma_ctrl	*ctrl;
+};
+
+enum rsxx_pci_regmap {
+	HWID		= 0x00,	/* Hardware Identification Register */
+	SCRATCH		= 0x04, /* Scratch/Debug Register */
+	RESET		= 0x08, /* Reset Register */
+	ISR		= 0x10, /* Interrupt Status Register */
+	IER		= 0x14, /* Interrupt Enable Register */
+	IPR		= 0x18, /* Interrupt Poll Register */
+	CB_ADD_LO	= 0x20, /* Command Host Buffer Address [31:0] */
+	CB_ADD_HI	= 0x24, /* Command Host Buffer Address [63:32]*/
+	HW_CMD_IDX	= 0x28, /* Hardware Processed Command Index */
+	SW_CMD_IDX	= 0x2C, /* Software Processed Command Index */
+	SB_ADD_LO	= 0x30, /* Status Host Buffer Address [31:0] */
+	SB_ADD_HI	= 0x34, /* Status Host Buffer Address [63:32] */
+	HW_STATUS_CNT	= 0x38, /* Hardware Status Counter */
+	SW_STATUS_CNT	= 0x3C, /* Deprecated */
+	CREG_CMD	= 0x40, /* CPU Command Register */
+	CREG_ADD	= 0x44, /* CPU Address Register */
+	CREG_CNT	= 0x48, /* CPU Count Register */
+	CREG_STAT	= 0x4C, /* CPU Status Register */
+	CREG_DATA0	= 0x50, /* CPU Data Registers */
+	CREG_DATA1	= 0x54,
+	CREG_DATA2	= 0x58,
+	CREG_DATA3	= 0x5C,
+	CREG_DATA4	= 0x60,
+	CREG_DATA5	= 0x64,
+	CREG_DATA6	= 0x68,
+	CREG_DATA7	= 0x6c,
+	INTR_COAL	= 0x70, /* Interrupt Coalescing Register */
+	HW_ERROR	= 0x74, /* Card Error Register */
+	PCI_DEBUG0	= 0x78, /* PCI Debug Registers */
+	PCI_DEBUG1	= 0x7C,
+	PCI_DEBUG2	= 0x80,
+	PCI_DEBUG3	= 0x84,
+	PCI_DEBUG4	= 0x88,
+	PCI_DEBUG5	= 0x8C,
+	PCI_DEBUG6	= 0x90,
+	PCI_DEBUG7	= 0x94,
+	PCI_POWER_THROTTLE = 0x98,
+	PERF_CTRL	= 0x9c,
+	PERF_TIMER_LO	= 0xa0,
+	PERF_TIMER_HI	= 0xa4,
+	PERF_RD512_LO	= 0xa8,
+	PERF_RD512_HI	= 0xac,
+	PERF_WR512_LO	= 0xb0,
+	PERF_WR512_HI	= 0xb4,
+};
+
+enum rsxx_intr {
+	CR_INTR_DMA0	= 0x00000001,
+	CR_INTR_CREG	= 0x00000002,
+	CR_INTR_DMA1	= 0x00000004,
+	CR_INTR_EVENT	= 0x00000008,
+	CR_INTR_DMA2	= 0x00000010,
+	CR_INTR_DMA3	= 0x00000020,
+	CR_INTR_DMA4	= 0x00000040,
+	CR_INTR_DMA5	= 0x00000080,
+	CR_INTR_DMA6	= 0x00000100,
+	CR_INTR_DMA7	= 0x00000200,
+	CR_INTR_DMA_ALL = 0x000003f5,
+	CR_INTR_ALL	= 0xffffffff,
+};
+
+static inline int CR_INTR_DMA(int N)
+{
+	static const unsigned int _CR_INTR_DMA[] = {
+		CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3,
+		CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7
+	};
+	return _CR_INTR_DMA[N];
+}
+enum rsxx_pci_reset {
+	DMA_QUEUE_RESET		= 0x00000001,
+};
+
+enum rsxx_pci_revision {
+	RSXX_DISCARD_SUPPORT = 2,
+};
+
+enum rsxx_creg_cmd {
+	CREG_CMD_TAG_MASK	= 0x0000FF00,
+	CREG_OP_WRITE		= 0x000000C0,
+	CREG_OP_READ		= 0x000000E0,
+};
+
+enum rsxx_creg_addr {
+	CREG_ADD_CARD_CMD		= 0x80001000,
+	CREG_ADD_CARD_STATE		= 0x80001004,
+	CREG_ADD_CARD_SIZE		= 0x8000100c,
+	CREG_ADD_CAPABILITIES		= 0x80001050,
+	CREG_ADD_LOG			= 0x80002000,
+	CREG_ADD_NUM_TARGETS		= 0x80003000,
+	CREG_ADD_CONFIG			= 0xB0000000,
+};
+
+enum rsxx_creg_card_cmd {
+	CARD_CMD_STARTUP		= 1,
+	CARD_CMD_SHUTDOWN		= 2,
+	CARD_CMD_LOW_LEVEL_FORMAT	= 3,
+	CARD_CMD_FPGA_RECONFIG_BR	= 4,
+	CARD_CMD_FPGA_RECONFIG_MAIN	= 5,
+	CARD_CMD_BACKUP			= 6,
+	CARD_CMD_RESET			= 7,
+	CARD_CMD_deprecated		= 8,
+	CARD_CMD_UNINITIALIZE		= 9,
+	CARD_CMD_DSTROY_EMERGENCY	= 10,
+	CARD_CMD_DSTROY_NORMAL		= 11,
+	CARD_CMD_DSTROY_EXTENDED	= 12,
+	CARD_CMD_DSTROY_ABORT		= 13,
+};
+
+enum rsxx_card_state {
+	CARD_STATE_SHUTDOWN		= 0x00000001,
+	CARD_STATE_STARTING		= 0x00000002,
+	CARD_STATE_FORMATTING		= 0x00000004,
+	CARD_STATE_UNINITIALIZED	= 0x00000008,
+	CARD_STATE_GOOD			= 0x00000010,
+	CARD_STATE_SHUTTING_DOWN	= 0x00000020,
+	CARD_STATE_FAULT		= 0x00000040,
+	CARD_STATE_RD_ONLY_FAULT	= 0x00000080,
+	CARD_STATE_DSTROYING		= 0x00000100,
+};
+
+enum rsxx_led {
+	LED_DEFAULT	= 0x0,
+	LED_IDENTIFY	= 0x1,
+	LED_SOAK	= 0x2,
+};
+
+enum rsxx_creg_flash_lock {
+	CREG_FLASH_LOCK		= 1,
+	CREG_FLASH_UNLOCK	= 2,
+};
+
+enum rsxx_card_capabilities {
+	CARD_CAP_SUBPAGE_WRITES = 0x00000080,
+};
+
+enum rsxx_creg_stat {
+	CREG_STAT_STATUS_MASK	= 0x00000003,
+	CREG_STAT_SUCCESS	= 0x1,
+	CREG_STAT_ERROR		= 0x2,
+	CREG_STAT_CHAR_PENDING	= 0x00000004, /* Character I/O pending bit */
+	CREG_STAT_LOG_PENDING	= 0x00000008, /* HW log message pending bit */
+	CREG_STAT_TAG_MASK	= 0x0000ff00,
+};
+
+static inline unsigned int CREG_DATA(int N)
+{
+	return CREG_DATA0 + (N << 2);
+}
+
+/*----------------- Convenient Log Wrappers -------------------*/
+#define CARD_TO_DEV(__CARD)	(&(__CARD)->dev->dev)
+
+/***** config.c *****/
+int rsxx_load_config(struct rsxx_cardinfo *card);
+
+/***** core.c *****/
+void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr);
+void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr);
+void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
+				 unsigned int intr);
+void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
+				  unsigned int intr);
+
+/***** dev.c *****/
+int rsxx_attach_dev(struct rsxx_cardinfo *card);
+void rsxx_detach_dev(struct rsxx_cardinfo *card);
+int rsxx_setup_dev(struct rsxx_cardinfo *card);
+void rsxx_destroy_dev(struct rsxx_cardinfo *card);
+int rsxx_dev_init(void);
+void rsxx_dev_cleanup(void);
+
+/***** dma.c ****/
+typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
+				void *cb_data,
+				unsigned int status);
+int rsxx_dma_setup(struct rsxx_cardinfo *card);
+void rsxx_dma_destroy(struct rsxx_cardinfo *card);
+int rsxx_dma_init(void);
+void rsxx_dma_cleanup(void);
+int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+			   struct bio *bio,
+			   atomic_t *n_dmas,
+			   rsxx_dma_cb cb,
+			   void *cb_data);
+
+/***** cregs.c *****/
+int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr,
+			unsigned int size8,
+			void *data,
+			int byte_stream);
+int rsxx_creg_read(struct rsxx_cardinfo *card,
+		       u32 addr,
+		       unsigned int size8,
+		       void *data,
+		       int byte_stream);
+int rsxx_read_hw_log(struct rsxx_cardinfo *card);
+int rsxx_get_card_state(struct rsxx_cardinfo *card,
+			    unsigned int *state);
+int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8);
+int rsxx_get_num_targets(struct rsxx_cardinfo *card,
+			     unsigned int *n_targets);
+int rsxx_get_card_capabilities(struct rsxx_cardinfo *card,
+				   u32 *capabilities);
+int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd);
+int rsxx_creg_setup(struct rsxx_cardinfo *card);
+void rsxx_creg_destroy(struct rsxx_cardinfo *card);
+int rsxx_creg_init(void);
+void rsxx_creg_cleanup(void);
+
+int rsxx_reg_access(struct rsxx_cardinfo *card,
+			struct rsxx_reg_access __user *ucmd,
+			int read);
+
+
+
+#endif /* __DRIVERS_BLOCK_RSXX_H__ */
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 57763c54363a..758f2ac878cf 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1090,10 +1090,13 @@ static const struct block_device_operations floppy_fops = {
 static void swim3_mb_event(struct macio_dev* mdev, int mb_state)
 {
 	struct floppy_state *fs = macio_get_drvdata(mdev);
-	struct swim3 __iomem *sw = fs->swim3;
+	struct swim3 __iomem *sw;
 
 	if (!fs)
 		return;
+
+	sw = fs->swim3;
+
 	if (mb_state != MB_FD)
 		return;
 
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
deleted file mode 100644
index ff540520bada..000000000000
--- a/drivers/block/xd.c
+++ /dev/null
@@ -1,1123 +0,0 @@
-/*
- * This file contains the driver for an XT hard disk controller
- * (at least the DTC 5150X) for Linux.
- *
- * Author: Pat Mackinlay, pat@it.com.au
- * Date: 29/09/92
- * 
- * Revised: 01/01/93, ...
- *
- * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler,
- *   kevinf@agora.rain.com)
- * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and
- *   Wim Van Dorst.
- *
- * Revised: 04/04/94 by Risto Kankkunen
- *   Moved the detection code from xd_init() to xd_geninit() as it needed
- *   interrupts enabled and Linus didn't want to enable them in that first
- *   phase. xd_geninit() is the place to do these kinds of things anyway,
- *   he says.
- *
- * Modularized: 04/10/96 by Todd Fries, tfries@umr.edu
- *
- * Revised: 13/12/97 by Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl
- *   Fixed some problems with disk initialization and module initiation.
- *   Added support for manual geometry setting (except Seagate controllers)
- *   in form:
- *      xd_geo=<cyl_xda>,<head_xda>,<sec_xda>[,<cyl_xdb>,<head_xdb>,<sec_xdb>]
- *   Recovered DMA access. Abridged messages. Added support for DTC5051CX,
- *   WD1002-27X & XEBEC controllers. Driver uses now some jumper settings.
- *   Extended ioctl() support.
- *
- * Bugfix: 15/02/01, Paul G. - inform queue layer of tiny xd_maxsect.
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/wait.h>
-#include <linux/blkdev.h>
-#include <linux/mutex.h>
-#include <linux/blkpg.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/gfp.h>
-
-#include <asm/uaccess.h>
-#include <asm/dma.h>
-
-#include "xd.h"
-
-static DEFINE_MUTEX(xd_mutex);
-static void __init do_xd_setup (int *integers);
-#ifdef MODULE
-static int xd[5] = { -1,-1,-1,-1, };
-#endif
-
-#define XD_DONT_USE_DMA		0  /* Initial value. may be overriden using
-				      "nodma" module option */
-#define XD_INIT_DISK_DELAY	(30)  /* 30 ms delay during disk initialization */
-
-/* Above may need to be increased if a problem with the 2nd drive detection
-   (ST11M controller) or resetting a controller (WD) appears */
-
-static XD_INFO xd_info[XD_MAXDRIVES];
-
-/* If you try this driver and find that your card is not detected by the driver at bootup, you need to add your BIOS
-   signature and details to the following list of signatures. A BIOS signature is a string embedded into the first
-   few bytes of your controller's on-board ROM BIOS. To find out what yours is, use something like MS-DOS's DEBUG
-   command. Run DEBUG, and then you can examine your BIOS signature with:
-
-	d xxxx:0000
-
-   where xxxx is the segment of your controller (like C800 or D000 or something). On the ASCII dump at the right, you should
-   be able to see a string mentioning the manufacturer's copyright etc. Add this string into the table below. The parameters
-   in the table are, in order:
-
-	offset			; this is the offset (in bytes) from the start of your ROM where the signature starts
-	signature		; this is the actual text of the signature
-	xd_?_init_controller	; this is the controller init routine used by your controller
-	xd_?_init_drive		; this is the drive init routine used by your controller
-
-   The controllers directly supported at the moment are: DTC 5150x, WD 1004A27X, ST11M/R and override. If your controller is
-   made by the same manufacturer as one of these, try using the same init routines as they do. If that doesn't work, your
-   best bet is to use the "override" routines. These routines use a "portable" method of getting the disk's geometry, and
-   may work with your card. If none of these seem to work, try sending me some email and I'll see what I can do <grin>.
-
-   NOTE: You can now specify your XT controller's parameters from the command line in the form xd=TYPE,IRQ,IO,DMA. The driver
-   should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */
-
-#include <asm/page.h>
-#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size))
-#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size))
-static char *xd_dma_buffer;
-
-static XD_SIGNATURE xd_sigs[] __initdata = {
-	{ 0x0000,"Override geometry handler",NULL,xd_override_init_drive,"n unknown" }, /* Pat Mackinlay, pat@it.com.au */
-	{ 0x0008,"[BXD06 (C) DTC 17-MAY-1985]",xd_dtc_init_controller,xd_dtc5150cx_init_drive," DTC 5150CX" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */
-	{ 0x000B,"CRD18A   Not an IBM rom. (C) Copyright Data Technology Corp. 05/31/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Todd Fries, tfries@umr.edu */
-	{ 0x000B,"CXD23A Not an IBM ROM (C)Copyright Data Technology Corp 12/03/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Pat Mackinlay, pat@it.com.au */
-	{ 0x0008,"07/15/86(C) Copyright 1986 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. 1002-27X" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */
-	{ 0x0008,"06/24/88(C) Copyright 1988 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. WDXT-GEN2" }, /* Dan Newcombe, newcombe@aa.csc.peachnet.edu */
-	{ 0x0015,"SEAGATE ST11 BIOS REVISION",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Salvador Abreu, spa@fct.unl.pt */
-	{ 0x0010,"ST11R BIOS",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Risto Kankkunen, risto.kankkunen@cs.helsinki.fi */
-	{ 0x0010,"ST11 BIOS v1.7",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11R" }, /* Alan Hourihane, alanh@fairlite.demon.co.uk */
-	{ 0x1000,"(c)Copyright 1987 SMS",xd_omti_init_controller,xd_omti_init_drive,"n OMTI 5520" }, /* Dirk Melchers, dirk@merlin.nbg.sub.org */
-	{ 0x0006,"COPYRIGHT XEBEC (C) 1984",xd_xebec_init_controller,xd_xebec_init_drive," XEBEC" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */
-	{ 0x0008,"(C) Copyright 1984 Western Digital Corp", xd_wd_init_controller, xd_wd_init_drive," Western Dig. 1002s-wx2" },
-	{ 0x0008,"(C) Copyright 1986 Western Digital Corporation", xd_wd_init_controller, xd_wd_init_drive," 1986 Western Digital" }, /* jfree@sovereign.org */
-};
-
-static unsigned int xd_bases[] __initdata =
-{
-	0xC8000, 0xCA000, 0xCC000,
-	0xCE000, 0xD0000, 0xD2000,
-	0xD4000, 0xD6000, 0xD8000,
-	0xDA000, 0xDC000, 0xDE000,
-	0xE0000
-};
-
-static DEFINE_SPINLOCK(xd_lock);
-
-static struct gendisk *xd_gendisk[2];
-
-static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
-
-static const struct block_device_operations xd_fops = {
-	.owner	= THIS_MODULE,
-	.ioctl	= xd_ioctl,
-	.getgeo = xd_getgeo,
-};
-static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int);
-static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors;
-static u_char xd_override __initdata = 0, xd_type __initdata = 0;
-static u_short xd_iobase = 0x320;
-static int xd_geo[XD_MAXDRIVES*3] __initdata = { 0, };
-
-static volatile int xdc_busy;
-static struct timer_list xd_watchdog_int;
-
-static volatile u_char xd_error;
-static bool nodma = XD_DONT_USE_DMA;
-
-static struct request_queue *xd_queue;
-
-/* xd_init: register the block device number and set up pointer tables */
-static int __init xd_init(void)
-{
-	u_char i,controller;
-	unsigned int address;
-	int err;
-
-#ifdef MODULE
-	{
-		u_char count = 0;
-		for (i = 4; i > 0; i--)
-			if (((xd[i] = xd[i-1]) >= 0) && !count)
-				count = i;
-		if ((xd[0] = count))
-			do_xd_setup(xd);
-	}
-#endif
-
-	init_timer (&xd_watchdog_int); xd_watchdog_int.function = xd_watchdog;
-
-	err = -EBUSY;
-	if (register_blkdev(XT_DISK_MAJOR, "xd"))
-		goto out1;
-
-	err = -ENOMEM;
-	xd_queue = blk_init_queue(do_xd_request, &xd_lock);
-	if (!xd_queue)
-		goto out1a;
-
-	if (xd_detect(&controller,&address)) {
-
-		printk("Detected a%s controller (type %d) at address %06x\n",
-			xd_sigs[controller].name,controller,address);
-		if (!request_region(xd_iobase,4,"xd")) {
-			printk("xd: Ports at 0x%x are not available\n",
-				xd_iobase);
-			goto out2;
-		}
-		if (controller)
-			xd_sigs[controller].init_controller(address);
-		xd_drives = xd_initdrives(xd_sigs[controller].init_drive);
-		
-		printk("Detected %d hard drive%s (using IRQ%d & DMA%d)\n",
-			xd_drives,xd_drives == 1 ? "" : "s",xd_irq,xd_dma);
-	}
-
-	/*
-	 * With the drive detected, xd_maxsectors should now be known.
-	 * If xd_maxsectors is 0, nothing was detected and we fall through
-	 * to return -ENODEV
-	 */
-	if (!xd_dma_buffer && xd_maxsectors) {
-		xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
-		if (!xd_dma_buffer) {
-			printk(KERN_ERR "xd: Out of memory.\n");
-			goto out3;
-		}
-	}
-
-	err = -ENODEV;
-	if (!xd_drives)
-		goto out3;
-
-	for (i = 0; i < xd_drives; i++) {
-		XD_INFO *p = &xd_info[i];
-		struct gendisk *disk = alloc_disk(64);
-		if (!disk)
-			goto Enomem;
-		p->unit = i;
-		disk->major = XT_DISK_MAJOR;
-		disk->first_minor = i<<6;
-		sprintf(disk->disk_name, "xd%c", i+'a');
-		disk->fops = &xd_fops;
-		disk->private_data = p;
-		disk->queue = xd_queue;
-		set_capacity(disk, p->heads * p->cylinders * p->sectors);
-		printk(" %s: CHS=%d/%d/%d\n", disk->disk_name,
-			p->cylinders, p->heads, p->sectors);
-		xd_gendisk[i] = disk;
-	}
-
-	err = -EBUSY;
-	if (request_irq(xd_irq,xd_interrupt_handler, 0, "XT hard disk", NULL)) {
-		printk("xd: unable to get IRQ%d\n",xd_irq);
-		goto out4;
-	}
-
-	if (request_dma(xd_dma,"xd")) {
-		printk("xd: unable to get DMA%d\n",xd_dma);
-		goto out5;
-	}
-
-	/* xd_maxsectors depends on controller - so set after detection */
-	blk_queue_max_hw_sectors(xd_queue, xd_maxsectors);
-
-	for (i = 0; i < xd_drives; i++)
-		add_disk(xd_gendisk[i]);
-
-	return 0;
-
-out5:
-	free_irq(xd_irq, NULL);
-out4:
-	for (i = 0; i < xd_drives; i++)
-		put_disk(xd_gendisk[i]);
-out3:
-	if (xd_maxsectors)
-		release_region(xd_iobase,4);
-
-	if (xd_dma_buffer)
-		xd_dma_mem_free((unsigned long)xd_dma_buffer,
-				xd_maxsectors * 0x200);
-out2:
-	blk_cleanup_queue(xd_queue);
-out1a:
-	unregister_blkdev(XT_DISK_MAJOR, "xd");
-out1:
-	return err;
-Enomem:
-	err = -ENOMEM;
-	while (i--)
-		put_disk(xd_gendisk[i]);
-	goto out3;
-}
-
-/* xd_detect: scan the possible BIOS ROM locations for the signature strings */
-static u_char __init xd_detect (u_char *controller, unsigned int *address)
-{
-	int i, j;
-
-	if (xd_override)
-	{
-		*controller = xd_type;
-		*address = 0;
-		return(1);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(xd_bases); i++) {
-		void __iomem *p = ioremap(xd_bases[i], 0x2000);
-		if (!p)
-			continue;
-		for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) {
-			const char *s = xd_sigs[j].string;
-			if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) {
-				*controller = j;
-				xd_type = j;
-				*address = xd_bases[i];
-				iounmap(p);
-				return 1;
-			}
-		}
-		iounmap(p);
-	}
-	return 0;
-}
-
-/* do_xd_request: handle an incoming request */
-static void do_xd_request (struct request_queue * q)
-{
-	struct request *req;
-
-	if (xdc_busy)
-		return;
-
-	req = blk_fetch_request(q);
-	while (req) {
-		unsigned block = blk_rq_pos(req);
-		unsigned count = blk_rq_cur_sectors(req);
-		XD_INFO *disk = req->rq_disk->private_data;
-		int res = -EIO;
-		int retry;
-
-		if (req->cmd_type != REQ_TYPE_FS)
-			goto done;
-		if (block + count > get_capacity(req->rq_disk))
-			goto done;
-		for (retry = 0; (retry < XD_RETRIES) && !res; retry++)
-			res = xd_readwrite(rq_data_dir(req), disk, req->buffer,
-					   block, count);
-	done:
-		/* wrap up, 0 = success, -errno = fail */
-		if (!__blk_end_request_cur(req, res))
-			req = blk_fetch_request(q);
-	}
-}
-
-static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	XD_INFO *p = bdev->bd_disk->private_data;
-
-	geo->heads = p->heads;
-	geo->sectors = p->sectors;
-	geo->cylinders = p->cylinders;
-	return 0;
-}
-
-/* xd_ioctl: handle device ioctl's */
-static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg)
-{
-	switch (cmd) {
-		case HDIO_SET_DMA:
-			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
-			if (xdc_busy) return -EBUSY;
-			nodma = !arg;
-			if (nodma && xd_dma_buffer) {
-				xd_dma_mem_free((unsigned long)xd_dma_buffer,
-						xd_maxsectors * 0x200);
-				xd_dma_buffer = NULL;
-			} else if (!nodma && !xd_dma_buffer) {
-				xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
-				if (!xd_dma_buffer) {
-					nodma = XD_DONT_USE_DMA;
-					return -ENOMEM;
-				}
-			}
-			return 0;
-		case HDIO_GET_DMA:
-			return put_user(!nodma, (long __user *) arg);
-		case HDIO_GET_MULTCOUNT:
-			return put_user(xd_maxsectors, (long __user *) arg);
-		default:
-			return -EINVAL;
-	}
-}
-
-static int xd_ioctl(struct block_device *bdev, fmode_t mode,
-			     unsigned int cmd, unsigned long param)
-{
-	int ret;
-
-	mutex_lock(&xd_mutex);
-	ret = xd_locked_ioctl(bdev, mode, cmd, param);
-	mutex_unlock(&xd_mutex);
-
-	return ret;
-}
-
-/* xd_readwrite: handle a read/write request */
-static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count)
-{
-	int drive = p->unit;
-	u_char cmdblk[6],sense[4];
-	u_short track,cylinder;
-	u_char head,sector,control,mode = PIO_MODE,temp;
-	char **real_buffer;
-	register int i;
-	
-#ifdef DEBUG_READWRITE
-	printk("xd_readwrite: operation = %s, drive = %d, buffer = 0x%X, block = %d, count = %d\n",operation == READ ? "read" : "write",drive,buffer,block,count);
-#endif /* DEBUG_READWRITE */
-
-	spin_unlock_irq(&xd_lock);
-
-	control = p->control;
-	if (!xd_dma_buffer)
-		xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
-	while (count) {
-		temp = count < xd_maxsectors ? count : xd_maxsectors;
-
-		track = block / p->sectors;
-		head = track % p->heads;
-		cylinder = track / p->heads;
-		sector = block % p->sectors;
-
-#ifdef DEBUG_READWRITE
-		printk("xd_readwrite: drive = %d, head = %d, cylinder = %d, sector = %d, count = %d\n",drive,head,cylinder,sector,temp);
-#endif /* DEBUG_READWRITE */
-
-		if (xd_dma_buffer) {
-			mode = xd_setup_dma(operation == READ ? DMA_MODE_READ : DMA_MODE_WRITE,(u_char *)(xd_dma_buffer),temp * 0x200);
-			real_buffer = &xd_dma_buffer;
-			for (i=0; i < (temp * 0x200); i++)
-				xd_dma_buffer[i] = buffer[i];
-		}
-		else
-			real_buffer = &buffer;
-
-		xd_build(cmdblk,operation == READ ? CMD_READ : CMD_WRITE,drive,head,cylinder,sector,temp & 0xFF,control);
-
-		switch (xd_command(cmdblk,mode,(u_char *)(*real_buffer),(u_char *)(*real_buffer),sense,XD_TIMEOUT)) {
-			case 1:
-				printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write"));
-				xd_recalibrate(drive);
-				spin_lock_irq(&xd_lock);
-				return -EIO;
-			case 2:
-				if (sense[0] & 0x30) {
-					printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing"));
-					switch ((sense[0] & 0x30) >> 4) {
-					case 0: printk("drive error, code = 0x%X",sense[0] & 0x0F);
-						break;
-					case 1: printk("controller error, code = 0x%X",sense[0] & 0x0F);
-						break;
-					case 2: printk("command error, code = 0x%X",sense[0] & 0x0F);
-						break;
-					case 3: printk("miscellaneous error, code = 0x%X",sense[0] & 0x0F);
-						break;
-					}
-				}
-				if (sense[0] & 0x80)
-					printk(" - CHS = %d/%d/%d\n",((sense[2] & 0xC0) << 2) | sense[3],sense[1] & 0x1F,sense[2] & 0x3F);
-				/*	reported drive number = (sense[1] & 0xE0) >> 5 */
-				else
-					printk(" - no valid disk address\n");
-				spin_lock_irq(&xd_lock);
-				return -EIO;
-		}
-		if (xd_dma_buffer)
-			for (i=0; i < (temp * 0x200); i++)
-				buffer[i] = xd_dma_buffer[i];
-
-		count -= temp, buffer += temp * 0x200, block += temp;
-	}
-	spin_lock_irq(&xd_lock);
-	return 0;
-}
-
-/* xd_recalibrate: recalibrate a given drive and reset controller if necessary */
-static void xd_recalibrate (u_char drive)
-{
-	u_char cmdblk[6];
-	
-	xd_build(cmdblk,CMD_RECALIBRATE,drive,0,0,0,0,0);
-	if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 8))
-		printk("xd%c: warning! error recalibrating, controller may be unstable\n", 'a'+drive);
-}
-
-/* xd_interrupt_handler: interrupt service routine */
-static irqreturn_t xd_interrupt_handler(int irq, void *dev_id)
-{
-	if (inb(XD_STATUS) & STAT_INTERRUPT) {							/* check if it was our device */
-#ifdef DEBUG_OTHER
-		printk("xd_interrupt_handler: interrupt detected\n");
-#endif /* DEBUG_OTHER */
-		outb(0,XD_CONTROL);								/* acknowledge interrupt */
-		wake_up(&xd_wait_int);	/* and wake up sleeping processes */
-		return IRQ_HANDLED;
-	}
-	else
-		printk("xd: unexpected interrupt\n");
-	return IRQ_NONE;
-}
-
-/* xd_setup_dma: set up the DMA controller for a data transfer */
-static u_char xd_setup_dma (u_char mode,u_char *buffer,u_int count)
-{
-	unsigned long f;
-	
-	if (nodma)
-		return (PIO_MODE);
-	if (((unsigned long) buffer & 0xFFFF0000) != (((unsigned long) buffer + count) & 0xFFFF0000)) {
-#ifdef DEBUG_OTHER
-		printk("xd_setup_dma: using PIO, transfer overlaps 64k boundary\n");
-#endif /* DEBUG_OTHER */
-		return (PIO_MODE);
-	}
-	
-	f=claim_dma_lock();
-	disable_dma(xd_dma);
-	clear_dma_ff(xd_dma);
-	set_dma_mode(xd_dma,mode);
-	set_dma_addr(xd_dma, (unsigned long) buffer);
-	set_dma_count(xd_dma,count);
-	
-	release_dma_lock(f);
-
-	return (DMA_MODE);			/* use DMA and INT */
-}
-
-/* xd_build: put stuff into an array in a format suitable for the controller */
-static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control)
-{
-	cmdblk[0] = command;
-	cmdblk[1] = ((drive & 0x07) << 5) | (head & 0x1F);
-	cmdblk[2] = ((cylinder & 0x300) >> 2) | (sector & 0x3F);
-	cmdblk[3] = cylinder & 0xFF;
-	cmdblk[4] = count;
-	cmdblk[5] = control;
-	
-	return (cmdblk);
-}
-
-static void xd_watchdog (unsigned long unused)
-{
-	xd_error = 1;
-	wake_up(&xd_wait_int);
-}
-
-/* xd_waitport: waits until port & mask == flags or a timeout occurs. return 1 for a timeout */
-static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout)
-{
-	u_long expiry = jiffies + timeout;
-	int success;
-
-	xdc_busy = 1;
-	while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry))
-		schedule_timeout_uninterruptible(1);
-	xdc_busy = 0;
-	return (success);
-}
-
-static inline u_int xd_wait_for_IRQ (void)
-{
-	unsigned long flags;
-	xd_watchdog_int.expires = jiffies + 8 * HZ;
-	add_timer(&xd_watchdog_int);
-	
-	flags=claim_dma_lock();
-	enable_dma(xd_dma);
-	release_dma_lock(flags);
-	
-	sleep_on(&xd_wait_int);
-	del_timer(&xd_watchdog_int);
-	xdc_busy = 0;
-	
-	flags=claim_dma_lock();
-	disable_dma(xd_dma);
-	release_dma_lock(flags);
-	
-	if (xd_error) {
-		printk("xd: missed IRQ - command aborted\n");
-		xd_error = 0;
-		return (1);
-	}
-	return (0);
-}
-
-/* xd_command: handle all data transfers necessary for a single command */
-static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout)
-{
-	u_char cmdblk[6],csb,complete = 0;
-
-#ifdef DEBUG_COMMAND
-	printk("xd_command: command = 0x%X, mode = 0x%X, indata = 0x%X, outdata = 0x%X, sense = 0x%X\n",command,mode,indata,outdata,sense);
-#endif /* DEBUG_COMMAND */
-
-	outb(0,XD_SELECT);
-	outb(mode,XD_CONTROL);
-
-	if (xd_waitport(XD_STATUS,STAT_SELECT,STAT_SELECT,timeout))
-		return (1);
-
-	while (!complete) {
-		if (xd_waitport(XD_STATUS,STAT_READY,STAT_READY,timeout))
-			return (1);
-
-		switch (inb(XD_STATUS) & (STAT_COMMAND | STAT_INPUT)) {
-			case 0:
-				if (mode == DMA_MODE) {
-					if (xd_wait_for_IRQ())
-						return (1);
-				} else
-					outb(outdata ? *outdata++ : 0,XD_DATA);
-				break;
-			case STAT_INPUT:
-				if (mode == DMA_MODE) {
-					if (xd_wait_for_IRQ())
-						return (1);
-				} else
-					if (indata)
-						*indata++ = inb(XD_DATA);
-					else
-						inb(XD_DATA);
-				break;
-			case STAT_COMMAND:
-				outb(command ? *command++ : 0,XD_DATA);
-				break;
-			case STAT_COMMAND | STAT_INPUT:
-				complete = 1;
-				break;
-		}
-	}
-	csb = inb(XD_DATA);
-
-	if (xd_waitport(XD_STATUS,0,STAT_SELECT,timeout))					/* wait until deselected */
-		return (1);
-
-	if (csb & CSB_ERROR) {									/* read sense data if error */
-		xd_build(cmdblk,CMD_SENSE,(csb & CSB_LUN) >> 5,0,0,0,0,0);
-		if (xd_command(cmdblk,0,sense,NULL,NULL,XD_TIMEOUT))
-			printk("xd: warning! sense command failed!\n");
-	}
-
-#ifdef DEBUG_COMMAND
-	printk("xd_command: completed with csb = 0x%X\n",csb);
-#endif /* DEBUG_COMMAND */
-
-	return (csb & CSB_ERROR);
-}
-
-static u_char __init xd_initdrives (void (*init_drive)(u_char drive))
-{
-	u_char cmdblk[6],i,count = 0;
-
-	for (i = 0; i < XD_MAXDRIVES; i++) {
-		xd_build(cmdblk,CMD_TESTREADY,i,0,0,0,0,0);
-		if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT*8)) {
-			msleep_interruptible(XD_INIT_DISK_DELAY);
-
-			init_drive(count);
-			count++;
-
-			msleep_interruptible(XD_INIT_DISK_DELAY);
-		}
-	}
-	return (count);
-}
-
-static void __init xd_manual_geo_set (u_char drive)
-{
-	xd_info[drive].heads = (u_char)(xd_geo[3 * drive + 1]);
-	xd_info[drive].cylinders = (u_short)(xd_geo[3 * drive]);
-	xd_info[drive].sectors = (u_char)(xd_geo[3 * drive + 2]);
-}
-
-static void __init xd_dtc_init_controller (unsigned int address)
-{
-	switch (address) {
-		case 0x00000:
-		case 0xC8000:	break;			/*initial: 0x320 */
-		case 0xCA000:	xd_iobase = 0x324; 
-		case 0xD0000:				/*5150CX*/
-		case 0xD8000:	break;			/*5150CX & 5150XL*/
-		default:        printk("xd_dtc_init_controller: unsupported BIOS address %06x\n",address);
-				break;
-	}
-	xd_maxsectors = 0x01;		/* my card seems to have trouble doing multi-block transfers? */
-
-	outb(0,XD_RESET);		/* reset the controller */
-}
-
-
-static void __init xd_dtc5150cx_init_drive (u_char drive)
-{
-	/* values from controller's BIOS - BIOS chip may be removed */
-	static u_short geometry_table[][4] = {
-		{0x200,8,0x200,0x100},
-		{0x267,2,0x267,0x267},
-		{0x264,4,0x264,0x80},
-		{0x132,4,0x132,0x0},
-		{0x132,2,0x80, 0x132},
-		{0x177,8,0x177,0x0},
-		{0x132,8,0x84, 0x0},
-		{},  /* not used */
-		{0x132,6,0x80, 0x100},
-		{0x200,6,0x100,0x100},
-		{0x264,2,0x264,0x80},
-		{0x280,4,0x280,0x100},
-		{0x2B9,3,0x2B9,0x2B9},
-		{0x2B9,5,0x2B9,0x2B9},
-		{0x280,6,0x280,0x100},
-		{0x132,4,0x132,0x0}};
-	u_char n;
-
-	n = inb(XD_JUMPER);
-	n = (drive ? n : (n >> 2)) & 0x33;
-	n = (n | (n >> 2)) & 0x0F;
-	if (xd_geo[3*drive])
-		xd_manual_geo_set(drive);
-	else
-		if (n != 7) {	
-			xd_info[drive].heads = (u_char)(geometry_table[n][1]);			/* heads */
-			xd_info[drive].cylinders = geometry_table[n][0];	/* cylinders */
-			xd_info[drive].sectors = 17;				/* sectors */
-#if 0
-			xd_info[drive].rwrite = geometry_table[n][2];	/* reduced write */
-			xd_info[drive].precomp = geometry_table[n][3]		/* write precomp */
-			xd_info[drive].ecc = 0x0B;				/* ecc length */
-#endif /* 0 */
-		}
-		else {
-			printk("xd%c: undetermined drive geometry\n",'a'+drive);
-			return;
-		}
-	xd_info[drive].control = 5;				/* control byte */
-	xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B);
-	xd_recalibrate(drive);
-}
-
-static void __init xd_dtc_init_drive (u_char drive)
-{
-	u_char cmdblk[6],buf[64];
-
-	xd_build(cmdblk,CMD_DTCGETGEOM,drive,0,0,0,0,0);
-	if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) {
-		xd_info[drive].heads = buf[0x0A];			/* heads */
-		xd_info[drive].cylinders = ((u_short *) (buf))[0x04];	/* cylinders */
-		xd_info[drive].sectors = 17;				/* sectors */
-		if (xd_geo[3*drive])
-			xd_manual_geo_set(drive);
-#if 0
-		xd_info[drive].rwrite = ((u_short *) (buf + 1))[0x05];	/* reduced write */
-		xd_info[drive].precomp = ((u_short *) (buf + 1))[0x06];	/* write precomp */
-		xd_info[drive].ecc = buf[0x0F];				/* ecc length */
-#endif /* 0 */
-		xd_info[drive].control = 0;				/* control byte */
-
-		xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,((u_short *) (buf + 1))[0x05],((u_short *) (buf + 1))[0x06],buf[0x0F]);
-		xd_build(cmdblk,CMD_DTCSETSTEP,drive,0,0,0,0,7);
-		if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2))
-			printk("xd_dtc_init_drive: error setting step rate for xd%c\n", 'a'+drive);
-	}
-	else
-		printk("xd_dtc_init_drive: error reading geometry for xd%c\n", 'a'+drive);
-}
-
-static void __init xd_wd_init_controller (unsigned int address)
-{
-	switch (address) {
-		case 0x00000:
-		case 0xC8000:	break;			/*initial: 0x320 */
-		case 0xCA000:	xd_iobase = 0x324; break;
-		case 0xCC000:   xd_iobase = 0x328; break;
-		case 0xCE000:   xd_iobase = 0x32C; break;
-		case 0xD0000:	xd_iobase = 0x328; break; /* ? */
-		case 0xD8000:	xd_iobase = 0x32C; break; /* ? */
-		default:        printk("xd_wd_init_controller: unsupported BIOS address %06x\n",address);
-				break;
-	}
-	xd_maxsectors = 0x01;		/* this one doesn't wrap properly either... */
-
-	outb(0,XD_RESET);		/* reset the controller */
-
-	msleep(XD_INIT_DISK_DELAY);
-}
-
-static void __init xd_wd_init_drive (u_char drive)
-{
-	/* values from controller's BIOS - BIOS may be disabled */
-	static u_short geometry_table[][4] = {
-		{0x264,4,0x1C2,0x1C2},   /* common part */
-		{0x132,4,0x099,0x0},
-		{0x267,2,0x1C2,0x1C2},
-		{0x267,4,0x1C2,0x1C2},
-
-		{0x334,6,0x335,0x335},   /* 1004 series RLL */
-		{0x30E,4,0x30F,0x3DC},
-		{0x30E,2,0x30F,0x30F},
-		{0x267,4,0x268,0x268},
-
-		{0x3D5,5,0x3D6,0x3D6},   /* 1002 series RLL */
-		{0x3DB,7,0x3DC,0x3DC},
-		{0x264,4,0x265,0x265},
-		{0x267,4,0x268,0x268}};
-
-	u_char cmdblk[6],buf[0x200];
-	u_char n = 0,rll,jumper_state,use_jumper_geo;
-	u_char wd_1002 = (xd_sigs[xd_type].string[7] == '6');
-	
-	jumper_state = ~(inb(0x322));
-	if (jumper_state & 0x40)
-		xd_irq = 9;
-	rll = (jumper_state & 0x30) ? (0x04 << wd_1002) : 0;
-	xd_build(cmdblk,CMD_READ,drive,0,0,0,1,0);
-	if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) {
-		xd_info[drive].heads = buf[0x1AF];				/* heads */
-		xd_info[drive].cylinders = ((u_short *) (buf + 1))[0xD6];	/* cylinders */
-		xd_info[drive].sectors = 17;					/* sectors */
-		if (xd_geo[3*drive])
-			xd_manual_geo_set(drive);
-#if 0
-		xd_info[drive].rwrite = ((u_short *) (buf))[0xD8];		/* reduced write */
-		xd_info[drive].wprecomp = ((u_short *) (buf))[0xDA];		/* write precomp */
-		xd_info[drive].ecc = buf[0x1B4];				/* ecc length */
-#endif /* 0 */
-		xd_info[drive].control = buf[0x1B5];				/* control byte */
-		use_jumper_geo = !(xd_info[drive].heads) || !(xd_info[drive].cylinders);
-		if (xd_geo[3*drive]) {
-			xd_manual_geo_set(drive);
-			xd_info[drive].control = rll ? 7 : 5;
-		}
-		else if (use_jumper_geo) {
-			n = (((jumper_state & 0x0F) >> (drive << 1)) & 0x03) | rll;
-			xd_info[drive].cylinders = geometry_table[n][0];
-			xd_info[drive].heads = (u_char)(geometry_table[n][1]);
-			xd_info[drive].control = rll ? 7 : 5;
-#if 0
-			xd_info[drive].rwrite = geometry_table[n][2];
-			xd_info[drive].wprecomp = geometry_table[n][3];
-			xd_info[drive].ecc = 0x0B;
-#endif /* 0 */
-		}
-		if (!wd_1002) {
-			if (use_jumper_geo)
-				xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,
-					geometry_table[n][2],geometry_table[n][3],0x0B);
-			else
-				xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,
-					((u_short *) (buf))[0xD8],((u_short *) (buf))[0xDA],buf[0x1B4]);
-		}
-	/* 1002 based RLL controller requests converted addressing, but reports physical 
-	   (physical 26 sec., logical 17 sec.) 
-	   1004 based ???? */
-		if (rll & wd_1002) {
-			if ((xd_info[drive].cylinders *= 26,
-			     xd_info[drive].cylinders /= 17) > 1023)
-				xd_info[drive].cylinders = 1023;  /* 1024 ? */
-#if 0
-			xd_info[drive].rwrite *= 26; 
-			xd_info[drive].rwrite /= 17;
-			xd_info[drive].wprecomp *= 26
-			xd_info[drive].wprecomp /= 17;
-#endif /* 0 */
-		}
-	}
-	else
-		printk("xd_wd_init_drive: error reading geometry for xd%c\n",'a'+drive);	
-
-}
-
-static void __init xd_seagate_init_controller (unsigned int address)
-{
-	switch (address) {
-		case 0x00000:
-		case 0xC8000:	break;			/*initial: 0x320 */
-		case 0xD0000:	xd_iobase = 0x324; break;
-		case 0xD8000:	xd_iobase = 0x328; break;
-		case 0xE0000:	xd_iobase = 0x32C; break;
-		default:	printk("xd_seagate_init_controller: unsupported BIOS address %06x\n",address);
-				break;
-	}
-	xd_maxsectors = 0x40;
-
-	outb(0,XD_RESET);		/* reset the controller */
-}
-
-static void __init xd_seagate_init_drive (u_char drive)
-{
-	u_char cmdblk[6],buf[0x200];
-
-	xd_build(cmdblk,CMD_ST11GETGEOM,drive,0,0,0,1,0);
-	if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) {
-		xd_info[drive].heads = buf[0x04];				/* heads */
-		xd_info[drive].cylinders = (buf[0x02] << 8) | buf[0x03];	/* cylinders */
-		xd_info[drive].sectors = buf[0x05];				/* sectors */
-		xd_info[drive].control = 0;					/* control byte */
-	}
-	else
-		printk("xd_seagate_init_drive: error reading geometry from xd%c\n", 'a'+drive);
-}
-
-/* Omti support courtesy Dirk Melchers */
-static void __init xd_omti_init_controller (unsigned int address)
-{
-	switch (address) {
-		case 0x00000:
-		case 0xC8000:	break;			/*initial: 0x320 */
-		case 0xD0000:	xd_iobase = 0x324; break;
-		case 0xD8000:	xd_iobase = 0x328; break;
-		case 0xE0000:	xd_iobase = 0x32C; break;
-		default:	printk("xd_omti_init_controller: unsupported BIOS address %06x\n",address);
-				break;
-	}
-	
-	xd_maxsectors = 0x40;
-
-	outb(0,XD_RESET);		/* reset the controller */
-}
-
-static void __init xd_omti_init_drive (u_char drive)
-{
-	/* gets infos from drive */
-	xd_override_init_drive(drive);
-
-	/* set other parameters, Hardcoded, not that nice :-) */
-	xd_info[drive].control = 2;
-}
-
-/* Xebec support (AK) */
-static void __init xd_xebec_init_controller (unsigned int address)
-{
-/* iobase may be set manually in range 0x300 - 0x33C
-      irq may be set manually to 2(9),3,4,5,6,7
-      dma may be set manually to 1,2,3
-	(How to detect them ???)
-BIOS address may be set manually in range 0x0 - 0xF8000
-If you need non-standard settings use the xd=... command */
-
-	switch (address) {
-		case 0x00000:
-		case 0xC8000:	/* initially: xd_iobase==0x320 */
-		case 0xD0000:
-		case 0xD2000:
-		case 0xD4000:
-		case 0xD6000:
-		case 0xD8000:
-		case 0xDA000:
-		case 0xDC000:
-		case 0xDE000:
-		case 0xE0000:	break;
-		default:	printk("xd_xebec_init_controller: unsupported BIOS address %06x\n",address);
-				break;
-		}
-
-	xd_maxsectors = 0x01;
-	outb(0,XD_RESET);		/* reset the controller */
-
-	msleep(XD_INIT_DISK_DELAY);
-}
-
-static void __init xd_xebec_init_drive (u_char drive)
-{
-	/* values from controller's BIOS - BIOS chip may be removed */
-	static u_short geometry_table[][5] = {
-		{0x132,4,0x080,0x080,0x7},
-		{0x132,4,0x080,0x080,0x17},
-		{0x264,2,0x100,0x100,0x7},
-		{0x264,2,0x100,0x100,0x17},
-		{0x132,8,0x080,0x080,0x7},
-		{0x132,8,0x080,0x080,0x17},
-		{0x264,4,0x100,0x100,0x6},
-		{0x264,4,0x100,0x100,0x17},
-		{0x2BC,5,0x2BC,0x12C,0x6},
-		{0x3A5,4,0x3A5,0x3A5,0x7},
-		{0x26C,6,0x26C,0x26C,0x7},
-		{0x200,8,0x200,0x100,0x17},
-		{0x400,5,0x400,0x400,0x7},
-		{0x400,6,0x400,0x400,0x7},
-		{0x264,8,0x264,0x200,0x17},
-		{0x33E,7,0x33E,0x200,0x7}};
-	u_char n;
-
-	n = inb(XD_JUMPER) & 0x0F; /* BIOS's drive number: same geometry 
-					is assumed for BOTH drives */
-	if (xd_geo[3*drive])
-		xd_manual_geo_set(drive);
-	else {
-		xd_info[drive].heads = (u_char)(geometry_table[n][1]);			/* heads */
-		xd_info[drive].cylinders = geometry_table[n][0];	/* cylinders */
-		xd_info[drive].sectors = 17;				/* sectors */
-#if 0
-		xd_info[drive].rwrite = geometry_table[n][2];	/* reduced write */
-		xd_info[drive].precomp = geometry_table[n][3]		/* write precomp */
-		xd_info[drive].ecc = 0x0B;				/* ecc length */
-#endif /* 0 */
-	}
-	xd_info[drive].control = geometry_table[n][4];			/* control byte */
-	xd_setparam(CMD_XBSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B);
-	xd_recalibrate(drive);
-}
-
-/* xd_override_init_drive: this finds disk geometry in a "binary search" style, narrowing in on the "correct" number of heads
-   etc. by trying values until it gets the highest successful value. Idea courtesy Salvador Abreu (spa@fct.unl.pt). */
-static void __init xd_override_init_drive (u_char drive)
-{
-	u_short min[] = { 0,0,0 },max[] = { 16,1024,64 },test[] = { 0,0,0 };
-	u_char cmdblk[6],i;
-
-	if (xd_geo[3*drive])
-		xd_manual_geo_set(drive);
-	else {
-		for (i = 0; i < 3; i++) {
-			while (min[i] != max[i] - 1) {
-				test[i] = (min[i] + max[i]) / 2;
-				xd_build(cmdblk,CMD_SEEK,drive,(u_char) test[0],(u_short) test[1],(u_char) test[2],0,0);
-				if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2))
-					min[i] = test[i];
-				else
-					max[i] = test[i];
-			}
-			test[i] = min[i];
-		}
-		xd_info[drive].heads = (u_char) min[0] + 1;
-		xd_info[drive].cylinders = (u_short) min[1] + 1;
-		xd_info[drive].sectors = (u_char) min[2] + 1;
-	}
-	xd_info[drive].control = 0;
-}
-
-/* xd_setup: initialise controller from command line parameters */
-static void __init do_xd_setup (int *integers)
-{
-	switch (integers[0]) {
-		case 4: if (integers[4] < 0)
-				nodma = 1;
-			else if (integers[4] < 8)
-				xd_dma = integers[4];
-		case 3: if ((integers[3] > 0) && (integers[3] <= 0x3FC))
-				xd_iobase = integers[3];
-		case 2: if ((integers[2] > 0) && (integers[2] < 16))
-				xd_irq = integers[2];
-		case 1: xd_override = 1;
-			if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs)))
-				xd_type = integers[1];
-		case 0: break;
-		default:printk("xd: too many parameters for xd\n");
-	}
-	xd_maxsectors = 0x01;
-}
-
-/* xd_setparam: set the drive characteristics */
-static void __init xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc)
-{
-	u_char cmdblk[14];
-
-	xd_build(cmdblk,command,drive,0,0,0,0,0);
-	cmdblk[6] = (u_char) (cylinders >> 8) & 0x03;
-	cmdblk[7] = (u_char) (cylinders & 0xFF);
-	cmdblk[8] = heads & 0x1F;
-	cmdblk[9] = (u_char) (rwrite >> 8) & 0x03;
-	cmdblk[10] = (u_char) (rwrite & 0xFF);
-	cmdblk[11] = (u_char) (wprecomp >> 8) & 0x03;
-	cmdblk[12] = (u_char) (wprecomp & 0xFF);
-	cmdblk[13] = ecc;
-
-	/* Some controllers require geometry info as data, not command */
-
-	if (xd_command(cmdblk,PIO_MODE,NULL,&cmdblk[6],NULL,XD_TIMEOUT * 2))
-		printk("xd: error setting characteristics for xd%c\n", 'a'+drive);
-}
-
-
-#ifdef MODULE
-
-module_param_array(xd, int, NULL, 0);
-module_param_array(xd_geo, int, NULL, 0);
-module_param(nodma, bool, 0);
-
-MODULE_LICENSE("GPL");
-
-void cleanup_module(void)
-{
-	int i;
-	unregister_blkdev(XT_DISK_MAJOR, "xd");
-	for (i = 0; i < xd_drives; i++) {
-		del_gendisk(xd_gendisk[i]);
-		put_disk(xd_gendisk[i]);
-	}
-	blk_cleanup_queue(xd_queue);
-	release_region(xd_iobase,4);
-	if (xd_drives) {
-		free_irq(xd_irq, NULL);
-		free_dma(xd_dma);
-		if (xd_dma_buffer)
-			xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200);
-	}
-}
-#else
-
-static int __init xd_setup (char *str)
-{
-	int ints[5];
-	get_options (str, ARRAY_SIZE (ints), ints);
-	do_xd_setup (ints);
-	return 1;
-}
-
-/* xd_manual_geo_init: initialise drive geometry from command line parameters
-   (used only for WD drives) */
-static int __init xd_manual_geo_init (char *str)
-{
-	int i, integers[1 + 3*XD_MAXDRIVES];
-
-	get_options (str, ARRAY_SIZE (integers), integers);
-	if (integers[0]%3 != 0) {
-		printk("xd: incorrect number of parameters for xd_geo\n");
-		return 1;
-	}
-	for (i = 0; (i < integers[0]) && (i < 3*XD_MAXDRIVES); i++)
-		xd_geo[i] = integers[i+1];
-	return 1;
-}
-
-__setup ("xd=", xd_setup);
-__setup ("xd_geo=", xd_manual_geo_init);
-
-#endif /* MODULE */
-
-module_init(xd_init);
-MODULE_ALIAS_BLOCKDEV_MAJOR(XT_DISK_MAJOR);
diff --git a/drivers/block/xd.h b/drivers/block/xd.h
deleted file mode 100644
index 37cacef16e93..000000000000
--- a/drivers/block/xd.h
+++ /dev/null
@@ -1,134 +0,0 @@
-#ifndef _LINUX_XD_H
-#define _LINUX_XD_H
-
-/*
- * This file contains the definitions for the IO ports and errors etc. for XT hard disk controllers (at least the DTC 5150X).
- *
- * Author: Pat Mackinlay, pat@it.com.au
- * Date: 29/09/92
- *
- * Revised: 01/01/93, ...
- *
- * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, kevinf@agora.rain.com)
- * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and Wim Van Dorst.
- */
-
-#include <linux/interrupt.h>
-
-/* XT hard disk controller registers */
-#define XD_DATA		(xd_iobase + 0x00)	/* data RW register */
-#define XD_RESET	(xd_iobase + 0x01)	/* reset WO register */
-#define XD_STATUS	(xd_iobase + 0x01)	/* status RO register */
-#define XD_SELECT	(xd_iobase + 0x02)	/* select WO register */
-#define XD_JUMPER	(xd_iobase + 0x02)	/* jumper RO register */
-#define XD_CONTROL	(xd_iobase + 0x03)	/* DMAE/INTE WO register */
-#define XD_RESERVED	(xd_iobase + 0x03)	/* reserved */
-
-/* XT hard disk controller commands (incomplete list) */
-#define CMD_TESTREADY	0x00	/* test drive ready */
-#define CMD_RECALIBRATE	0x01	/* recalibrate drive */
-#define CMD_SENSE	0x03	/* request sense */
-#define CMD_FORMATDRV	0x04	/* format drive */
-#define CMD_VERIFY	0x05	/* read verify */
-#define CMD_FORMATTRK	0x06	/* format track */
-#define CMD_FORMATBAD	0x07	/* format bad track */
-#define CMD_READ	0x08	/* read */
-#define CMD_WRITE	0x0A	/* write */
-#define CMD_SEEK	0x0B	/* seek */
-
-/* Controller specific commands */
-#define CMD_DTCSETPARAM	0x0C	/* set drive parameters (DTC 5150X & CX only?) */
-#define CMD_DTCGETECC	0x0D	/* get ecc error length (DTC 5150X only?) */
-#define CMD_DTCREADBUF	0x0E	/* read sector buffer (DTC 5150X only?) */
-#define CMD_DTCWRITEBUF 0x0F	/* write sector buffer (DTC 5150X only?) */
-#define CMD_DTCREMAPTRK	0x11	/* assign alternate track (DTC 5150X only?) */
-#define CMD_DTCGETPARAM	0xFB	/* get drive parameters (DTC 5150X only?) */
-#define CMD_DTCSETSTEP	0xFC	/* set step rate (DTC 5150X only?) */
-#define CMD_DTCSETGEOM	0xFE	/* set geometry data (DTC 5150X only?) */
-#define CMD_DTCGETGEOM	0xFF	/* get geometry data (DTC 5150X only?) */
-#define CMD_ST11GETGEOM 0xF8	/* get geometry data (Seagate ST11R/M only?) */
-#define CMD_WDSETPARAM	0x0C	/* set drive parameters (WD 1004A27X only?) */
-#define CMD_XBSETPARAM	0x0C	/* set drive parameters (XEBEC only?) */
-
-/* Bits for command status byte */
-#define CSB_ERROR	0x02	/* error */
-#define CSB_LUN		0x20	/* logical Unit Number */
-
-/* XT hard disk controller status bits */
-#define STAT_READY	0x01	/* controller is ready */
-#define STAT_INPUT	0x02	/* data flowing from controller to host */
-#define STAT_COMMAND	0x04	/* controller in command phase */
-#define STAT_SELECT	0x08	/* controller is selected */
-#define STAT_REQUEST	0x10	/* controller requesting data */
-#define STAT_INTERRUPT	0x20	/* controller requesting interrupt */
-
-/* XT hard disk controller control bits */
-#define PIO_MODE	0x00	/* control bits to set for PIO */
-#define DMA_MODE	0x03	/* control bits to set for DMA & interrupt */
-
-#define XD_MAXDRIVES	2	/* maximum 2 drives */
-#define XD_TIMEOUT	HZ	/* 1 second timeout */
-#define XD_RETRIES	4	/* maximum 4 retries */
-
-#undef DEBUG			/* define for debugging output */
-
-#ifdef DEBUG
-	#define DEBUG_STARTUP	/* debug driver initialisation */
-	#define DEBUG_OVERRIDE	/* debug override geometry detection */
-	#define DEBUG_READWRITE	/* debug each read/write command */
-	#define DEBUG_OTHER	/* debug misc. interrupt/DMA stuff */
-	#define DEBUG_COMMAND	/* debug each controller command */
-#endif /* DEBUG */
-
-/* this structure defines the XT drives and their types */
-typedef struct {
-	u_char heads;
-	u_short cylinders;
-	u_char sectors;
-	u_char control;
-	int unit;
-} XD_INFO;
-
-/* this structure defines a ROM BIOS signature */
-typedef struct {
-	unsigned int offset;
-	const char *string;
-	void (*init_controller)(unsigned int address);
-	void (*init_drive)(u_char drive);
-	const char *name;
-} XD_SIGNATURE;
-
-#ifndef MODULE
-static int xd_manual_geo_init (char *command);
-#endif /* MODULE */
-static u_char xd_detect (u_char *controller, unsigned int *address);
-static u_char xd_initdrives (void (*init_drive)(u_char drive));
-
-static void do_xd_request (struct request_queue * q);
-static int xd_ioctl (struct block_device *bdev,fmode_t mode,unsigned int cmd,unsigned long arg);
-static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count);
-static void xd_recalibrate (u_char drive);
-
-static irqreturn_t xd_interrupt_handler(int irq, void *dev_id);
-static u_char xd_setup_dma (u_char opcode,u_char *buffer,u_int count);
-static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control);
-static void xd_watchdog (unsigned long unused);
-static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout);
-static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout);
-
-/* card specific setup and geometry gathering code */
-static void xd_dtc_init_controller (unsigned int address);
-static void xd_dtc5150cx_init_drive (u_char drive);
-static void xd_dtc_init_drive (u_char drive);
-static void xd_wd_init_controller (unsigned int address);
-static void xd_wd_init_drive (u_char drive);
-static void xd_seagate_init_controller (unsigned int address);
-static void xd_seagate_init_drive (u_char drive);
-static void xd_omti_init_controller (unsigned int address);
-static void xd_omti_init_drive (u_char drive);
-static void xd_xebec_init_controller (unsigned int address);
-static void xd_xebec_init_drive (u_char drive);
-static void xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc);
-static void xd_override_init_drive (u_char drive);
-
-#endif /* _LINUX_XD_H */
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 5ac841ff6cc7..de1f319f7bd7 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -46,6 +46,7 @@
 #include <xen/xen.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
+#include <xen/balloon.h>
 #include "common.h"
 
 /*
@@ -239,6 +240,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
 			ret = gnttab_unmap_refs(unmap, NULL, pages,
 				segs_to_unmap);
 			BUG_ON(ret);
+			free_xenballooned_pages(segs_to_unmap, pages);
 			segs_to_unmap = 0;
 		}
 
@@ -527,8 +529,8 @@ static int xen_blkbk_map(struct blkif_request *req,
 				GFP_KERNEL);
 			if (!persistent_gnt)
 				return -ENOMEM;
-			persistent_gnt->page = alloc_page(GFP_KERNEL);
-			if (!persistent_gnt->page) {
+			if (alloc_xenballooned_pages(1, &persistent_gnt->page,
+			    false)) {
 				kfree(persistent_gnt);
 				return -ENOMEM;
 			}
@@ -879,7 +881,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 		goto fail_response;
 	}
 
-	preq.dev           = req->u.rw.handle;
 	preq.sector_number = req->u.rw.sector_number;
 	preq.nr_sects      = 0;
 
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 63980722db41..5e237f630c47 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -367,6 +367,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
 		be->blkif = NULL;
 	}
 
+	kfree(be->mode);
 	kfree(be);
 	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
@@ -502,6 +503,7 @@ static void backend_changed(struct xenbus_watch *watch,
 		= container_of(watch, struct backend_info, backend_watch);
 	struct xenbus_device *dev = be->dev;
 	int cdrom = 0;
+	unsigned long handle;
 	char *device_type;
 
 	DPRINTK("");
@@ -521,10 +523,10 @@ static void backend_changed(struct xenbus_watch *watch,
 		return;
 	}
 
-	if ((be->major || be->minor) &&
-	    ((be->major != major) || (be->minor != minor))) {
-		pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
-			be->major, be->minor, major, minor);
+	if (be->major | be->minor) {
+		if (be->major != major || be->minor != minor)
+			pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
+				be->major, be->minor, major, minor);
 		return;
 	}
 
@@ -542,36 +544,33 @@ static void backend_changed(struct xenbus_watch *watch,
 		kfree(device_type);
 	}
 
-	if (be->major == 0 && be->minor == 0) {
-		/* Front end dir is a number, which is used as the handle. */
-
-		char *p = strrchr(dev->otherend, '/') + 1;
-		long handle;
-		err = strict_strtoul(p, 0, &handle);
-		if (err)
-			return;
+	/* Front end dir is a number, which is used as the handle. */
+	err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
+	if (err)
+		return;
 
-		be->major = major;
-		be->minor = minor;
+	be->major = major;
+	be->minor = minor;
 
-		err = xen_vbd_create(be->blkif, handle, major, minor,
-				 (NULL == strchr(be->mode, 'w')), cdrom);
-		if (err) {
-			be->major = 0;
-			be->minor = 0;
-			xenbus_dev_fatal(dev, err, "creating vbd structure");
-			return;
-		}
+	err = xen_vbd_create(be->blkif, handle, major, minor,
+			     !strchr(be->mode, 'w'), cdrom);
 
+	if (err)
+		xenbus_dev_fatal(dev, err, "creating vbd structure");
+	else {
 		err = xenvbd_sysfs_addif(dev);
 		if (err) {
 			xen_vbd_free(&be->blkif->vbd);
-			be->major = 0;
-			be->minor = 0;
 			xenbus_dev_fatal(dev, err, "creating sysfs entries");
-			return;
 		}
+	}
 
+	if (err) {
+		kfree(be->mode);
+		be->mode = NULL;
+		be->major = 0;
+		be->minor = 0;
+	} else {
 		/* We're potentially connected now */
 		xen_update_blkif_status(be->blkif);
 	}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 11043c18ac5a..c3dae2e0f290 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work)
 static void blkif_free(struct blkfront_info *info, int suspend)
 {
 	struct llist_node *all_gnts;
-	struct grant *persistent_gnt;
+	struct grant *persistent_gnt, *tmp;
 	struct llist_node *n;
 
 	/* Prevent new requests being issued until we fix things up. */
@@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	/* Remove all persistent grants */
 	if (info->persistent_gnts_c) {
 		all_gnts = llist_del_all(&info->persistent_gnts);
-		llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
+		persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node);
+		while (persistent_gnt) {
 			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
 			__free_page(pfn_to_page(persistent_gnt->pfn));
-			kfree(persistent_gnt);
+			tmp = persistent_gnt;
+			n = persistent_gnt->node.next;
+			if (n)
+				persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node);
+			else
+				persistent_gnt = NULL;
+			kfree(tmp);
 		}
 		info->persistent_gnts_c = 0;
 	}
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 1c7fdcd22a98..0ac9b45a585e 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1208,6 +1208,16 @@ static int smi_num; /* Used to sequence the SMIs */
 #define DEFAULT_REGSPACING	1
 #define DEFAULT_REGSIZE		1
 
+#ifdef CONFIG_ACPI
+static bool          si_tryacpi = 1;
+#endif
+#ifdef CONFIG_DMI
+static bool          si_trydmi = 1;
+#endif
+static bool          si_tryplatform = 1;
+#ifdef CONFIG_PCI
+static bool          si_trypci = 1;
+#endif
 static bool          si_trydefaults = 1;
 static char          *si_type[SI_MAX_PARMS];
 #define MAX_SI_TYPE_STR 30
@@ -1238,6 +1248,25 @@ MODULE_PARM_DESC(hotmod, "Add and remove interfaces.  See"
 		 " Documentation/IPMI.txt in the kernel sources for the"
 		 " gory details.");
 
+#ifdef CONFIG_ACPI
+module_param_named(tryacpi, si_tryacpi, bool, 0);
+MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the"
+		 " default scan of the interfaces identified via ACPI");
+#endif
+#ifdef CONFIG_DMI
+module_param_named(trydmi, si_trydmi, bool, 0);
+MODULE_PARM_DESC(trydmi, "Setting this to zero will disable the"
+		 " default scan of the interfaces identified via DMI");
+#endif
+module_param_named(tryplatform, si_tryplatform, bool, 0);
+MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the"
+		 " default scan of the interfaces identified via platform"
+		 " interfaces like openfirmware");
+#ifdef CONFIG_PCI
+module_param_named(trypci, si_trypci, bool, 0);
+MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the"
+		 " default scan of the interfaces identified via pci");
+#endif
 module_param_named(trydefaults, si_trydefaults, bool, 0);
 MODULE_PARM_DESC(trydefaults, "Setting this to 'false' will disable the"
 		 " default scan of the KCS and SMIC interface at the standard"
@@ -3371,13 +3400,15 @@ static int init_ipmi_si(void)
 		return 0;
 	initialized = 1;
 
-	rv = platform_driver_register(&ipmi_driver);
-	if (rv) {
-		printk(KERN_ERR PFX "Unable to register driver: %d\n", rv);
-		return rv;
+	if (si_tryplatform) {
+		rv = platform_driver_register(&ipmi_driver);
+		if (rv) {
+			printk(KERN_ERR PFX "Unable to register "
+			       "driver: %d\n", rv);
+			return rv;
+		}
 	}
 
-
 	/* Parse out the si_type string into its components. */
 	str = si_type_str;
 	if (*str != '\0') {
@@ -3400,24 +3431,31 @@ static int init_ipmi_si(void)
 		return 0;
 
 #ifdef CONFIG_PCI
-	rv = pci_register_driver(&ipmi_pci_driver);
-	if (rv)
-		printk(KERN_ERR PFX "Unable to register PCI driver: %d\n", rv);
-	else
-		pci_registered = 1;
+	if (si_trypci) {
+		rv = pci_register_driver(&ipmi_pci_driver);
+		if (rv)
+			printk(KERN_ERR PFX "Unable to register "
+			       "PCI driver: %d\n", rv);
+		else
+			pci_registered = 1;
+	}
 #endif
 
 #ifdef CONFIG_ACPI
-	pnp_register_driver(&ipmi_pnp_driver);
-	pnp_registered = 1;
+	if (si_tryacpi) {
+		pnp_register_driver(&ipmi_pnp_driver);
+		pnp_registered = 1;
+	}
 #endif
 
 #ifdef CONFIG_DMI
-	dmi_find_bmc();
+	if (si_trydmi)
+		dmi_find_bmc();
 #endif
 
 #ifdef CONFIG_ACPI
-	spmi_find_bmc();
+	if (si_tryacpi)
+		spmi_find_bmc();
 #endif
 
 	/* We prefer devices with interrupts, but in the case of a machine
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 522136d40843..190d4423653f 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -183,19 +183,12 @@ static const struct file_operations misc_fops = {
  
 int misc_register(struct miscdevice * misc)
 {
-	struct miscdevice *c;
 	dev_t dev;
 	int err = 0;
 
 	INIT_LIST_HEAD(&misc->list);
 
 	mutex_lock(&misc_mtx);
-	list_for_each_entry(c, &misc_list, list) {
-		if (c->minor == misc->minor) {
-			mutex_unlock(&misc_mtx);
-			return -EBUSY;
-		}
-	}
 
 	if (misc->minor == MISC_DYNAMIC_MINOR) {
 		int i = find_first_zero_bit(misc_minors, DYNAMIC_MINORS);
@@ -205,6 +198,15 @@ int misc_register(struct miscdevice * misc)
 		}
 		misc->minor = DYNAMIC_MINORS - i - 1;
 		set_bit(i, misc_minors);
+	} else {
+		struct miscdevice *c;
+
+		list_for_each_entry(c, &misc_list, list) {
+			if (c->minor == misc->minor) {
+				mutex_unlock(&misc_mtx);
+				return -EBUSY;
+			}
+		}
 	}
 
 	dev = MKDEV(MISC_MAJOR, misc->minor);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index fabbfe1a9253..ed87b2405806 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -52,31 +52,29 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk *c,
 				     int level)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 
 	if (!c)
 		return;
 
 	clk_summary_show_one(s, c, level);
 
-	hlist_for_each_entry(child, tmp, &c->children, child_node)
+	hlist_for_each_entry(child, &c->children, child_node)
 		clk_summary_show_subtree(s, child, level + 1);
 }
 
 static int clk_summary_show(struct seq_file *s, void *data)
 {
 	struct clk *c;
-	struct hlist_node *tmp;
 
 	seq_printf(s, "   clock                        enable_cnt  prepare_cnt  rate\n");
 	seq_printf(s, "---------------------------------------------------------------------\n");
 
 	mutex_lock(&prepare_lock);
 
-	hlist_for_each_entry(c, tmp, &clk_root_list, child_node)
+	hlist_for_each_entry(c, &clk_root_list, child_node)
 		clk_summary_show_subtree(s, c, 0);
 
-	hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node)
+	hlist_for_each_entry(c, &clk_orphan_list, child_node)
 		clk_summary_show_subtree(s, c, 0);
 
 	mutex_unlock(&prepare_lock);
@@ -111,14 +109,13 @@ static void clk_dump_one(struct seq_file *s, struct clk *c, int level)
 static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 
 	if (!c)
 		return;
 
 	clk_dump_one(s, c, level);
 
-	hlist_for_each_entry(child, tmp, &c->children, child_node) {
+	hlist_for_each_entry(child, &c->children, child_node) {
 		seq_printf(s, ",");
 		clk_dump_subtree(s, child, level + 1);
 	}
@@ -129,21 +126,20 @@ static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level)
 static int clk_dump(struct seq_file *s, void *data)
 {
 	struct clk *c;
-	struct hlist_node *tmp;
 	bool first_node = true;
 
 	seq_printf(s, "{");
 
 	mutex_lock(&prepare_lock);
 
-	hlist_for_each_entry(c, tmp, &clk_root_list, child_node) {
+	hlist_for_each_entry(c, &clk_root_list, child_node) {
 		if (!first_node)
 			seq_printf(s, ",");
 		first_node = false;
 		clk_dump_subtree(s, c, 0);
 	}
 
-	hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node) {
+	hlist_for_each_entry(c, &clk_orphan_list, child_node) {
 		seq_printf(s, ",");
 		clk_dump_subtree(s, c, 0);
 	}
@@ -222,7 +218,6 @@ out:
 static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 	int ret = -EINVAL;;
 
 	if (!clk || !pdentry)
@@ -233,7 +228,7 @@ static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry)
 	if (ret)
 		goto out;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node)
+	hlist_for_each_entry(child, &clk->children, child_node)
 		clk_debug_create_subtree(child, clk->dentry);
 
 	ret = 0;
@@ -299,7 +294,6 @@ out:
 static int __init clk_debug_init(void)
 {
 	struct clk *clk;
-	struct hlist_node *tmp;
 	struct dentry *d;
 
 	rootdir = debugfs_create_dir("clk", NULL);
@@ -324,10 +318,10 @@ static int __init clk_debug_init(void)
 
 	mutex_lock(&prepare_lock);
 
-	hlist_for_each_entry(clk, tmp, &clk_root_list, child_node)
+	hlist_for_each_entry(clk, &clk_root_list, child_node)
 		clk_debug_create_subtree(clk, rootdir);
 
-	hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node)
+	hlist_for_each_entry(clk, &clk_orphan_list, child_node)
 		clk_debug_create_subtree(clk, orphandir);
 
 	inited = 1;
@@ -345,13 +339,12 @@ static inline int clk_debug_register(struct clk *clk) { return 0; }
 static void clk_disable_unused_subtree(struct clk *clk)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 	unsigned long flags;
 
 	if (!clk)
 		goto out;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node)
+	hlist_for_each_entry(child, &clk->children, child_node)
 		clk_disable_unused_subtree(child);
 
 	spin_lock_irqsave(&enable_lock, flags);
@@ -384,14 +377,13 @@ out:
 static int clk_disable_unused(void)
 {
 	struct clk *clk;
-	struct hlist_node *tmp;
 
 	mutex_lock(&prepare_lock);
 
-	hlist_for_each_entry(clk, tmp, &clk_root_list, child_node)
+	hlist_for_each_entry(clk, &clk_root_list, child_node)
 		clk_disable_unused_subtree(clk);
 
-	hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node)
+	hlist_for_each_entry(clk, &clk_orphan_list, child_node)
 		clk_disable_unused_subtree(clk);
 
 	mutex_unlock(&prepare_lock);
@@ -484,12 +476,11 @@ static struct clk *__clk_lookup_subtree(const char *name, struct clk *clk)
 {
 	struct clk *child;
 	struct clk *ret;
-	struct hlist_node *tmp;
 
 	if (!strcmp(clk->name, name))
 		return clk;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node) {
+	hlist_for_each_entry(child, &clk->children, child_node) {
 		ret = __clk_lookup_subtree(name, child);
 		if (ret)
 			return ret;
@@ -502,20 +493,19 @@ struct clk *__clk_lookup(const char *name)
 {
 	struct clk *root_clk;
 	struct clk *ret;
-	struct hlist_node *tmp;
 
 	if (!name)
 		return NULL;
 
 	/* search the 'proper' clk tree first */
-	hlist_for_each_entry(root_clk, tmp, &clk_root_list, child_node) {
+	hlist_for_each_entry(root_clk, &clk_root_list, child_node) {
 		ret = __clk_lookup_subtree(name, root_clk);
 		if (ret)
 			return ret;
 	}
 
 	/* if not found, then search the orphan tree */
-	hlist_for_each_entry(root_clk, tmp, &clk_orphan_list, child_node) {
+	hlist_for_each_entry(root_clk, &clk_orphan_list, child_node) {
 		ret = __clk_lookup_subtree(name, root_clk);
 		if (ret)
 			return ret;
@@ -812,7 +802,6 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg)
 {
 	unsigned long old_rate;
 	unsigned long parent_rate = 0;
-	struct hlist_node *tmp;
 	struct clk *child;
 
 	old_rate = clk->rate;
@@ -832,7 +821,7 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg)
 	if (clk->notifier_count && msg)
 		__clk_notify(clk, msg, old_rate, clk->rate);
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node)
+	hlist_for_each_entry(child, &clk->children, child_node)
 		__clk_recalc_rates(child, msg);
 }
 
@@ -878,7 +867,6 @@ EXPORT_SYMBOL_GPL(clk_get_rate);
  */
 static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate)
 {
-	struct hlist_node *tmp;
 	struct clk *child;
 	unsigned long new_rate;
 	int ret = NOTIFY_DONE;
@@ -895,7 +883,7 @@ static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate)
 	if (ret == NOTIFY_BAD)
 		goto out;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node) {
+	hlist_for_each_entry(child, &clk->children, child_node) {
 		ret = __clk_speculate_rates(child, new_rate);
 		if (ret == NOTIFY_BAD)
 			break;
@@ -908,11 +896,10 @@ out:
 static void clk_calc_subtree(struct clk *clk, unsigned long new_rate)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 
 	clk->new_rate = new_rate;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node) {
+	hlist_for_each_entry(child, &clk->children, child_node) {
 		if (child->ops->recalc_rate)
 			child->new_rate = child->ops->recalc_rate(child->hw, new_rate);
 		else
@@ -983,7 +970,6 @@ out:
  */
 static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long event)
 {
-	struct hlist_node *tmp;
 	struct clk *child, *fail_clk = NULL;
 	int ret = NOTIFY_DONE;
 
@@ -996,7 +982,7 @@ static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long even
 			fail_clk = clk;
 	}
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node) {
+	hlist_for_each_entry(child, &clk->children, child_node) {
 		clk = clk_propagate_rate_change(child, event);
 		if (clk)
 			fail_clk = clk;
@@ -1014,7 +1000,6 @@ static void clk_change_rate(struct clk *clk)
 	struct clk *child;
 	unsigned long old_rate;
 	unsigned long best_parent_rate = 0;
-	struct hlist_node *tmp;
 
 	old_rate = clk->rate;
 
@@ -1032,7 +1017,7 @@ static void clk_change_rate(struct clk *clk)
 	if (clk->notifier_count && old_rate != clk->rate)
 		__clk_notify(clk, POST_RATE_CHANGE, old_rate, clk->rate);
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node)
+	hlist_for_each_entry(child, &clk->children, child_node)
 		clk_change_rate(child);
 }
 
@@ -1348,7 +1333,7 @@ int __clk_init(struct device *dev, struct clk *clk)
 {
 	int i, ret = 0;
 	struct clk *orphan;
-	struct hlist_node *tmp, *tmp2;
+	struct hlist_node *tmp2;
 
 	if (!clk)
 		return -EINVAL;
@@ -1448,7 +1433,7 @@ int __clk_init(struct device *dev, struct clk *clk)
 	 * walk the list of orphan clocks and reparent any that are children of
 	 * this clock
 	 */
-	hlist_for_each_entry_safe(orphan, tmp, tmp2, &clk_orphan_list, child_node) {
+	hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
 		if (orphan->ops->get_parent) {
 			i = orphan->ops->get_parent(orphan->hw);
 			if (!strcmp(clk->name, orphan->parent_names[i]))
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index e920cbe519fa..e507ab7df60b 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -62,3 +62,8 @@ config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 
 config ARM_ARCH_TIMER
 	bool
+
+config CLKSRC_METAG_GENERIC
+	def_bool y if METAG
+	help
+	  This option enables support for the Meta per-thread timers.
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 7d671b85a98e..4d8283aec5b5 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_ARCH_TEGRA)	+= tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
 
 obj-$(CONFIG_ARM_ARCH_TIMER)		+= arm_arch_timer.o
+obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c
new file mode 100644
index 000000000000..ade7513a11d1
--- /dev/null
+++ b/drivers/clocksource/metag_generic.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2005-2013 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Support for Meta per-thread timers.
+ *
+ * Meta hardware threads have 2 timers. The background timer (TXTIMER) is used
+ * as a free-running time base (hz clocksource), and the interrupt timer
+ * (TXTIMERI) is used for the timer interrupt (clock event). Both counters
+ * traditionally count at approximately 1MHz.
+ */
+
+#include <clocksource/metag_generic.h>
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+
+#include <asm/clock.h>
+#include <asm/hwthread.h>
+#include <asm/core_reg.h>
+#include <asm/metag_mem.h>
+#include <asm/tbx.h>
+
+#define HARDWARE_FREQ		1000000	/* 1MHz */
+#define HARDWARE_DIV		1	/* divide by 1 = 1MHz clock */
+#define HARDWARE_TO_NS_SHIFT	10	/* convert ticks to ns */
+
+static unsigned int hwtimer_freq = HARDWARE_FREQ;
+static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
+static DEFINE_PER_CPU(char [11], local_clockevent_name);
+
+static int metag_timer_set_next_event(unsigned long delta,
+				      struct clock_event_device *dev)
+{
+	__core_reg_set(TXTIMERI, -delta);
+	return 0;
+}
+
+static void metag_timer_set_mode(enum clock_event_mode mode,
+				 struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_ONESHOT:
+	case CLOCK_EVT_MODE_RESUME:
+		break;
+
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		/* We should disable the IRQ here */
+		break;
+
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_UNUSED:
+		WARN_ON(1);
+		break;
+	};
+}
+
+static cycle_t metag_clocksource_read(struct clocksource *cs)
+{
+	return __core_reg_get(TXTIMER);
+}
+
+static struct clocksource clocksource_metag = {
+	.name = "META",
+	.rating = 200,
+	.mask = CLOCKSOURCE_MASK(32),
+	.read = metag_clocksource_read,
+	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static irqreturn_t metag_timer_interrupt(int irq, void *dummy)
+{
+	struct clock_event_device *evt = &__get_cpu_var(local_clockevent);
+
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction metag_timer_irq = {
+	.name = "META core timer",
+	.handler = metag_timer_interrupt,
+	.flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_PERCPU,
+};
+
+unsigned long long sched_clock(void)
+{
+	unsigned long long ticks = __core_reg_get(TXTIMER);
+	return ticks << HARDWARE_TO_NS_SHIFT;
+}
+
+static void __cpuinit arch_timer_setup(unsigned int cpu)
+{
+	unsigned int txdivtime;
+	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
+	char *name = per_cpu(local_clockevent_name, cpu);
+
+	txdivtime = __core_reg_get(TXDIVTIME);
+
+	txdivtime &= ~TXDIVTIME_DIV_BITS;
+	txdivtime |= (HARDWARE_DIV & TXDIVTIME_DIV_BITS);
+
+	__core_reg_set(TXDIVTIME, txdivtime);
+
+	sprintf(name, "META %d", cpu);
+	clk->name = name;
+	clk->features = CLOCK_EVT_FEAT_ONESHOT,
+
+	clk->rating = 200,
+	clk->shift = 12,
+	clk->irq = tbisig_map(TBID_SIGNUM_TRT),
+	clk->set_mode = metag_timer_set_mode,
+	clk->set_next_event = metag_timer_set_next_event,
+
+	clk->mult = div_sc(hwtimer_freq, NSEC_PER_SEC, clk->shift);
+	clk->max_delta_ns = clockevent_delta2ns(0x7fffffff, clk);
+	clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
+	clk->cpumask = cpumask_of(cpu);
+
+	clockevents_register_device(clk);
+
+	/*
+	 * For all non-boot CPUs we need to synchronize our free
+	 * running clock (TXTIMER) with the boot CPU's clock.
+	 *
+	 * While this won't be accurate, it should be close enough.
+	 */
+	if (cpu) {
+		unsigned int thread0 = cpu_2_hwthread_id[0];
+		unsigned long val;
+
+		val = core_reg_read(TXUCT_ID, TXTIMER_REGNUM, thread0);
+		__core_reg_set(TXTIMER, val);
+	}
+}
+
+static int __cpuinit arch_timer_cpu_notify(struct notifier_block *self,
+					   unsigned long action, void *hcpu)
+{
+	int cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		arch_timer_setup(cpu);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata arch_timer_cpu_nb = {
+	.notifier_call = arch_timer_cpu_notify,
+};
+
+int __init metag_generic_timer_init(void)
+{
+	/*
+	 * On Meta 2 SoCs, the actual frequency of the timer is based on the
+	 * Meta core clock speed divided by an integer, so it is only
+	 * approximately 1MHz. Calculating the real frequency here drastically
+	 * reduces clock skew on these SoCs.
+	 */
+#ifdef CONFIG_METAG_META21
+	hwtimer_freq = get_coreclock() / (metag_in32(EXPAND_TIMER_DIV) + 1);
+#endif
+	clocksource_register_hz(&clocksource_metag, hwtimer_freq);
+
+	setup_irq(tbisig_map(TBID_SIGNUM_TRT), &metag_timer_irq);
+
+	/* Configure timer on boot CPU */
+	arch_timer_setup(smp_processor_id());
+
+	/* Hook cpu boot to configure other CPU's timers */
+	register_cpu_notifier(&arch_timer_cpu_nb);
+
+	return 0;
+}
diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c
index a4605fd7e303..47a673070d70 100644
--- a/drivers/clocksource/time-armada-370-xp.c
+++ b/drivers/clocksource/time-armada-370-xp.c
@@ -27,8 +27,10 @@
 #include <linux/of_address.h>
 #include <linux/irq.h>
 #include <linux/module.h>
-#include <asm/sched_clock.h>
 
+#include <asm/sched_clock.h>
+#include <asm/localtimer.h>
+#include <linux/percpu.h>
 /*
  * Timer block registers.
  */
@@ -49,6 +51,7 @@
 #define TIMER1_RELOAD_OFF	0x0018
 #define TIMER1_VAL_OFF		0x001c
 
+#define LCL_TIMER_EVENTS_STATUS	0x0028
 /* Global timers are connected to the coherency fabric clock, and the
    below divider reduces their incrementing frequency. */
 #define TIMER_DIVIDER_SHIFT     5
@@ -57,14 +60,17 @@
 /*
  * SoC-specific data.
  */
-static void __iomem *timer_base;
-static int timer_irq;
+static void __iomem *timer_base, *local_base;
+static unsigned int timer_clk;
+static bool timer25Mhz = true;
 
 /*
  * Number of timer ticks per jiffy.
  */
 static u32 ticks_per_jiffy;
 
+static struct clock_event_device __percpu **percpu_armada_370_xp_evt;
+
 static u32 notrace armada_370_xp_read_sched_clock(void)
 {
 	return ~readl(timer_base + TIMER0_VAL_OFF);
@@ -78,24 +84,23 @@ armada_370_xp_clkevt_next_event(unsigned long delta,
 				struct clock_event_device *dev)
 {
 	u32 u;
-
 	/*
 	 * Clear clockevent timer interrupt.
 	 */
-	writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS);
+	writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS);
 
 	/*
 	 * Setup new clockevent timer value.
 	 */
-	writel(delta, timer_base + TIMER1_VAL_OFF);
+	writel(delta, local_base + TIMER0_VAL_OFF);
 
 	/*
 	 * Enable the timer.
 	 */
-	u = readl(timer_base + TIMER_CTRL_OFF);
-	u = ((u & ~TIMER1_RELOAD_EN) | TIMER1_EN |
-	     TIMER1_DIV(TIMER_DIVIDER_SHIFT));
-	writel(u, timer_base + TIMER_CTRL_OFF);
+	u = readl(local_base + TIMER_CTRL_OFF);
+	u = ((u & ~TIMER0_RELOAD_EN) | TIMER0_EN |
+	     TIMER0_DIV(TIMER_DIVIDER_SHIFT));
+	writel(u, local_base + TIMER_CTRL_OFF);
 
 	return 0;
 }
@@ -107,37 +112,38 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode,
 	u32 u;
 
 	if (mode == CLOCK_EVT_MODE_PERIODIC) {
+
 		/*
 		 * Setup timer to fire at 1/HZ intervals.
 		 */
-		writel(ticks_per_jiffy - 1, timer_base + TIMER1_RELOAD_OFF);
-		writel(ticks_per_jiffy - 1, timer_base + TIMER1_VAL_OFF);
+		writel(ticks_per_jiffy - 1, local_base + TIMER0_RELOAD_OFF);
+		writel(ticks_per_jiffy - 1, local_base + TIMER0_VAL_OFF);
 
 		/*
 		 * Enable timer.
 		 */
-		u = readl(timer_base + TIMER_CTRL_OFF);
 
-		writel((u | TIMER1_EN | TIMER1_RELOAD_EN |
-			TIMER1_DIV(TIMER_DIVIDER_SHIFT)),
-		       timer_base + TIMER_CTRL_OFF);
+		u = readl(local_base + TIMER_CTRL_OFF);
+
+		writel((u | TIMER0_EN | TIMER0_RELOAD_EN |
+			TIMER0_DIV(TIMER_DIVIDER_SHIFT)),
+			local_base + TIMER_CTRL_OFF);
 	} else {
 		/*
 		 * Disable timer.
 		 */
-		u = readl(timer_base + TIMER_CTRL_OFF);
-		writel(u & ~TIMER1_EN, timer_base + TIMER_CTRL_OFF);
+		u = readl(local_base + TIMER_CTRL_OFF);
+		writel(u & ~TIMER0_EN, local_base + TIMER_CTRL_OFF);
 
 		/*
 		 * ACK pending timer interrupt.
 		 */
-		writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS);
-
+		writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS);
 	}
 }
 
 static struct clock_event_device armada_370_xp_clkevt = {
-	.name		= "armada_370_xp_tick",
+	.name		= "armada_370_xp_per_cpu_tick",
 	.features	= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
 	.shift		= 32,
 	.rating		= 300,
@@ -150,32 +156,78 @@ static irqreturn_t armada_370_xp_timer_interrupt(int irq, void *dev_id)
 	/*
 	 * ACK timer interrupt and call event handler.
 	 */
+	struct clock_event_device *evt = *(struct clock_event_device **)dev_id;
 
-	writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS);
-	armada_370_xp_clkevt.event_handler(&armada_370_xp_clkevt);
+	writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS);
+	evt->event_handler(evt);
 
 	return IRQ_HANDLED;
 }
 
-static struct irqaction armada_370_xp_timer_irq = {
-	.name		= "armada_370_xp_tick",
-	.flags		= IRQF_DISABLED | IRQF_TIMER,
-	.handler	= armada_370_xp_timer_interrupt
+/*
+ * Setup the local clock events for a CPU.
+ */
+static int __cpuinit armada_370_xp_timer_setup(struct clock_event_device *evt)
+{
+	u32 u;
+	int cpu = smp_processor_id();
+
+	/* Use existing clock_event for cpu 0 */
+	if (!smp_processor_id())
+		return 0;
+
+	u = readl(local_base + TIMER_CTRL_OFF);
+	if (timer25Mhz)
+		writel(u | TIMER0_25MHZ, local_base + TIMER_CTRL_OFF);
+	else
+		writel(u & ~TIMER0_25MHZ, local_base + TIMER_CTRL_OFF);
+
+	evt->name		= armada_370_xp_clkevt.name;
+	evt->irq		= armada_370_xp_clkevt.irq;
+	evt->features		= armada_370_xp_clkevt.features;
+	evt->shift		= armada_370_xp_clkevt.shift;
+	evt->rating		= armada_370_xp_clkevt.rating,
+	evt->set_next_event	= armada_370_xp_clkevt_next_event,
+	evt->set_mode		= armada_370_xp_clkevt_mode,
+	evt->cpumask		= cpumask_of(cpu);
+
+	*__this_cpu_ptr(percpu_armada_370_xp_evt) = evt;
+
+	clockevents_config_and_register(evt, timer_clk, 1, 0xfffffffe);
+	enable_percpu_irq(evt->irq, 0);
+
+	return 0;
+}
+
+static void  armada_370_xp_timer_stop(struct clock_event_device *evt)
+{
+	evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
+	disable_percpu_irq(evt->irq);
+}
+
+static struct local_timer_ops armada_370_xp_local_timer_ops __cpuinitdata = {
+	.setup	= armada_370_xp_timer_setup,
+	.stop	=  armada_370_xp_timer_stop,
 };
 
 void __init armada_370_xp_timer_init(void)
 {
 	u32 u;
 	struct device_node *np;
-	unsigned int timer_clk;
+	int res;
+
 	np = of_find_compatible_node(NULL, NULL, "marvell,armada-370-xp-timer");
 	timer_base = of_iomap(np, 0);
 	WARN_ON(!timer_base);
+	local_base = of_iomap(np, 1);
 
 	if (of_find_property(np, "marvell,timer-25Mhz", NULL)) {
 		/* The fixed 25MHz timer is available so let's use it */
+		u = readl(local_base + TIMER_CTRL_OFF);
+		writel(u | TIMER0_25MHZ,
+		       local_base + TIMER_CTRL_OFF);
 		u = readl(timer_base + TIMER_CTRL_OFF);
-		writel(u | TIMER0_25MHZ | TIMER1_25MHZ,
+		writel(u | TIMER0_25MHZ,
 		       timer_base + TIMER_CTRL_OFF);
 		timer_clk = 25000000;
 	} else {
@@ -183,15 +235,23 @@ void __init armada_370_xp_timer_init(void)
 		struct clk *clk = of_clk_get(np, 0);
 		WARN_ON(IS_ERR(clk));
 		rate =  clk_get_rate(clk);
+		u = readl(local_base + TIMER_CTRL_OFF);
+		writel(u & ~(TIMER0_25MHZ),
+		       local_base + TIMER_CTRL_OFF);
+
 		u = readl(timer_base + TIMER_CTRL_OFF);
-		writel(u & ~(TIMER0_25MHZ | TIMER1_25MHZ),
+		writel(u & ~(TIMER0_25MHZ),
 		       timer_base + TIMER_CTRL_OFF);
+
 		timer_clk = rate / TIMER_DIVIDER;
+		timer25Mhz = false;
 	}
 
-	/* We use timer 0 as clocksource, and timer 1 for
-	   clockevents */
-	timer_irq = irq_of_parse_and_map(np, 1);
+	/*
+	 * We use timer 0 as clocksource, and private(local) timer 0
+	 * for clockevents
+	 */
+	armada_370_xp_clkevt.irq = irq_of_parse_and_map(np, 4);
 
 	ticks_per_jiffy = (timer_clk + HZ / 2) / HZ;
 
@@ -216,12 +276,26 @@ void __init armada_370_xp_timer_init(void)
 			      "armada_370_xp_clocksource",
 			      timer_clk, 300, 32, clocksource_mmio_readl_down);
 
-	/*
-	 * Setup clockevent timer (interrupt-driven).
-	 */
-	setup_irq(timer_irq, &armada_370_xp_timer_irq);
+	/* Register the clockevent on the private timer of CPU 0 */
 	armada_370_xp_clkevt.cpumask = cpumask_of(0);
 	clockevents_config_and_register(&armada_370_xp_clkevt,
 					timer_clk, 1, 0xfffffffe);
-}
 
+	percpu_armada_370_xp_evt = alloc_percpu(struct clock_event_device *);
+
+
+	/*
+	 * Setup clockevent timer (interrupt-driven).
+	 */
+	*__this_cpu_ptr(percpu_armada_370_xp_evt) = &armada_370_xp_clkevt;
+	res = request_percpu_irq(armada_370_xp_clkevt.irq,
+				armada_370_xp_timer_interrupt,
+				armada_370_xp_clkevt.name,
+				percpu_armada_370_xp_evt);
+	if (!res) {
+		enable_percpu_irq(armada_370_xp_clkevt.irq, 0);
+#ifdef CONFIG_LOCAL_TIMERS
+		local_timer_register(&armada_370_xp_local_timer_ops);
+#endif
+	}
+}
diff --git a/drivers/dca/dca-sysfs.c b/drivers/dca/dca-sysfs.c
index 591b6597c00a..126cf295b198 100644
--- a/drivers/dca/dca-sysfs.c
+++ b/drivers/dca/dca-sysfs.c
@@ -53,22 +53,19 @@ void dca_sysfs_remove_req(struct dca_provider *dca, int slot)
 int dca_sysfs_add_provider(struct dca_provider *dca, struct device *dev)
 {
 	struct device *cd;
-	int err = 0;
+	int ret;
 
-idr_try_again:
-	if (!idr_pre_get(&dca_idr, GFP_KERNEL))
-		return -ENOMEM;
+	idr_preload(GFP_KERNEL);
 	spin_lock(&dca_idr_lock);
-	err = idr_get_new(&dca_idr, dca, &dca->id);
+
+	ret = idr_alloc(&dca_idr, dca, 0, 0, GFP_NOWAIT);
+	if (ret >= 0)
+		dca->id = ret;
+
 	spin_unlock(&dca_idr_lock);
-	switch (err) {
-	case 0:
-		break;
-	case -EAGAIN:
-		goto idr_try_again;
-	default:
-		return err;
-	}
+	idr_preload_end();
+	if (ret < 0)
+		return ret;
 
 	cd = device_create(dca_class, dev, MKDEV(0, 0), NULL, "dca%d", dca->id);
 	if (IS_ERR(cd)) {
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 242b8c0a3de8..b2728d6ba2fd 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -686,18 +686,14 @@ static int get_dma_id(struct dma_device *device)
 {
 	int rc;
 
- idr_retry:
-	if (!idr_pre_get(&dma_idr, GFP_KERNEL))
-		return -ENOMEM;
 	mutex_lock(&dma_list_mutex);
-	rc = idr_get_new(&dma_idr, NULL, &device->dev_id);
-	mutex_unlock(&dma_list_mutex);
-	if (rc == -EAGAIN)
-		goto idr_retry;
-	else if (rc != 0)
-		return rc;
 
-	return 0;
+	rc = idr_alloc(&dma_idr, NULL, 0, 0, GFP_KERNEL);
+	if (rc >= 0)
+		device->dev_id = rc;
+
+	mutex_unlock(&dma_list_mutex);
+	return rc < 0 ? rc : 0;
 }
 
 /**
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 51c3ea2ed41a..c599558faeda 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -20,6 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/of.h>
+#include <linux/of_dma.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -171,7 +172,13 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 	if (dwc->initialized == true)
 		return;
 
-	if (dws) {
+	if (dws && dws->cfg_hi == ~0 && dws->cfg_lo == ~0) {
+		/* autoconfigure based on request line from DT */
+		if (dwc->direction == DMA_MEM_TO_DEV)
+			cfghi = DWC_CFGH_DST_PER(dwc->request_line);
+		else if (dwc->direction == DMA_DEV_TO_MEM)
+			cfghi = DWC_CFGH_SRC_PER(dwc->request_line);
+	} else if (dws) {
 		/*
 		 * We need controller-specific data to set up slave
 		 * transfers.
@@ -1226,49 +1233,64 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
-bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
+struct dw_dma_filter_args {
+	struct dw_dma *dw;
+	unsigned int req;
+	unsigned int src;
+	unsigned int dst;
+};
+
+static bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
 {
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
 	struct dw_dma *dw = to_dw_dma(chan->device);
-	static struct dw_dma *last_dw;
-	static char *last_bus_id;
-	int i = -1;
+	struct dw_dma_filter_args *fargs = param;
+	struct dw_dma_slave *dws = &dwc->slave;
 
-	/*
-	 * dmaengine framework calls this routine for all channels of all dma
-	 * controller, until true is returned. If 'param' bus_id is not
-	 * registered with a dma controller (dw), then there is no need of
-	 * running below function for all channels of dw.
-	 *
-	 * This block of code does this by saving the parameters of last
-	 * failure. If dw and param are same, i.e. trying on same dw with
-	 * different channel, return false.
-	 */
-	if ((last_dw == dw) && (last_bus_id == param))
-		return false;
-	/*
-	 * Return true:
-	 * - If dw_dma's platform data is not filled with slave info, then all
-	 *   dma controllers are fine for transfer.
-	 * - Or if param is NULL
-	 */
-	if (!dw->sd || !param)
-		return true;
+	/* ensure the device matches our channel */
+        if (chan->device != &fargs->dw->dma)
+                return false;
 
-	while (++i < dw->sd_count) {
-		if (!strcmp(dw->sd[i].bus_id, param)) {
-			chan->private = &dw->sd[i];
-			last_dw = NULL;
-			last_bus_id = NULL;
+	dws->dma_dev	= dw->dma.dev;
+	dws->cfg_hi	= ~0;
+	dws->cfg_lo	= ~0;
+	dws->src_master	= fargs->src;
+	dws->dst_master	= fargs->dst;
 
-			return true;
-		}
-	}
+	dwc->request_line = fargs->req;
 
-	last_dw = dw;
-	last_bus_id = param;
-	return false;
+	chan->private = dws;
+
+	return true;
+}
+
+static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct dw_dma *dw = ofdma->of_dma_data;
+	struct dw_dma_filter_args fargs = {
+		.dw = dw,
+	};
+	dma_cap_mask_t cap;
+
+	if (dma_spec->args_count != 3)
+		return NULL;
+
+	fargs.req = be32_to_cpup(dma_spec->args+0);
+	fargs.src = be32_to_cpup(dma_spec->args+1);
+	fargs.dst = be32_to_cpup(dma_spec->args+2);
+
+	if (WARN_ON(fargs.req >= DW_DMA_MAX_NR_REQUESTS ||
+		    fargs.src >= dw->nr_masters ||
+		    fargs.dst >= dw->nr_masters))
+		return NULL;
+
+	dma_cap_zero(cap);
+	dma_cap_set(DMA_SLAVE, cap);
+
+	/* TODO: there should be a simpler way to do this */
+	return dma_request_channel(cap, dw_dma_generic_filter, &fargs);
 }
-EXPORT_SYMBOL(dw_dma_generic_filter);
 
 /* --------------------- Cyclic DMA API extensions -------------------- */
 
@@ -1554,9 +1576,8 @@ static void dw_dma_off(struct dw_dma *dw)
 static struct dw_dma_platform_data *
 dw_dma_parse_dt(struct platform_device *pdev)
 {
-	struct device_node *sn, *cn, *np = pdev->dev.of_node;
+	struct device_node *np = pdev->dev.of_node;
 	struct dw_dma_platform_data *pdata;
-	struct dw_dma_slave *sd;
 	u32 tmp, arr[4];
 
 	if (!np) {
@@ -1568,7 +1589,7 @@ dw_dma_parse_dt(struct platform_device *pdev)
 	if (!pdata)
 		return NULL;
 
-	if (of_property_read_u32(np, "nr_channels", &pdata->nr_channels))
+	if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels))
 		return NULL;
 
 	if (of_property_read_bool(np, "is_private"))
@@ -1583,7 +1604,7 @@ dw_dma_parse_dt(struct platform_device *pdev)
 	if (!of_property_read_u32(np, "block_size", &tmp))
 		pdata->block_size = tmp;
 
-	if (!of_property_read_u32(np, "nr_masters", &tmp)) {
+	if (!of_property_read_u32(np, "dma-masters", &tmp)) {
 		if (tmp > 4)
 			return NULL;
 
@@ -1595,36 +1616,6 @@ dw_dma_parse_dt(struct platform_device *pdev)
 		for (tmp = 0; tmp < pdata->nr_masters; tmp++)
 			pdata->data_width[tmp] = arr[tmp];
 
-	/* parse slave data */
-	sn = of_find_node_by_name(np, "slave_info");
-	if (!sn)
-		return pdata;
-
-	/* calculate number of slaves */
-	tmp = of_get_child_count(sn);
-	if (!tmp)
-		return NULL;
-
-	sd = devm_kzalloc(&pdev->dev, sizeof(*sd) * tmp, GFP_KERNEL);
-	if (!sd)
-		return NULL;
-
-	pdata->sd = sd;
-	pdata->sd_count = tmp;
-
-	for_each_child_of_node(sn, cn) {
-		sd->dma_dev = &pdev->dev;
-		of_property_read_string(cn, "bus_id", &sd->bus_id);
-		of_property_read_u32(cn, "cfg_hi", &sd->cfg_hi);
-		of_property_read_u32(cn, "cfg_lo", &sd->cfg_lo);
-		if (!of_property_read_u32(cn, "src_master", &tmp))
-			sd->src_master = tmp;
-
-		if (!of_property_read_u32(cn, "dst_master", &tmp))
-			sd->dst_master = tmp;
-		sd++;
-	}
-
 	return pdata;
 }
 #else
@@ -1705,8 +1696,6 @@ static int dw_probe(struct platform_device *pdev)
 	clk_prepare_enable(dw->clk);
 
 	dw->regs = regs;
-	dw->sd = pdata->sd;
-	dw->sd_count = pdata->sd_count;
 
 	/* get hardware configuration parameters */
 	if (autocfg) {
@@ -1837,6 +1826,14 @@ static int dw_probe(struct platform_device *pdev)
 
 	dma_async_device_register(&dw->dma);
 
+	if (pdev->dev.of_node) {
+		err = of_dma_controller_register(pdev->dev.of_node,
+						 dw_dma_xlate, dw);
+		if (err && err != -ENODEV)
+			dev_err(&pdev->dev,
+				"could not register of_dma_controller\n");
+	}
+
 	return 0;
 }
 
@@ -1845,6 +1842,8 @@ static int dw_remove(struct platform_device *pdev)
 	struct dw_dma		*dw = platform_get_drvdata(pdev);
 	struct dw_dma_chan	*dwc, *_dwc;
 
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
 	dw_dma_off(dw);
 	dma_async_device_unregister(&dw->dma);
 
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 88dd8eb31957..cf0ce5c77d60 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -13,6 +13,7 @@
 #include <linux/dw_dmac.h>
 
 #define DW_DMA_MAX_NR_CHANNELS	8
+#define DW_DMA_MAX_NR_REQUESTS	16
 
 /* flow controller */
 enum dw_dma_fc {
@@ -211,6 +212,8 @@ struct dw_dma_chan {
 	/* hardware configuration */
 	unsigned int		block_size;
 	bool			nollp;
+	unsigned int		request_line;
+	struct dw_dma_slave	slave;
 
 	/* configuration passed via DMA_SLAVE_CONFIG */
 	struct dma_slave_config dma_sconfig;
@@ -239,10 +242,6 @@ struct dw_dma {
 	struct tasklet_struct	tasklet;
 	struct clk		*clk;
 
-	/* slave information */
-	struct dw_dma_slave	*sd;
-	unsigned int		sd_count;
-
 	u8			all_chan_mask;
 
 	/* hardware configuration */
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index acb709bfac0f..e443f2c1dfd1 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -80,6 +80,29 @@ config EDAC_MM_EDAC
 	  occurred so that a particular failing memory module can be
 	  replaced.  If unsure, select 'Y'.
 
+config EDAC_GHES
+	bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
+	depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y)
+	default y
+	help
+	  Not all machines support hardware-driven error report. Some of those
+	  provide a BIOS-driven error report mechanism via ACPI, using the
+	  APEI/GHES driver. By enabling this option, the error reports provided
+	  by GHES are sent to userspace via the EDAC API.
+
+	  When this option is enabled, it will disable the hardware-driven
+	  mechanisms, if a GHES BIOS is detected, entering into the
+	  "Firmware First" mode.
+
+	  It should be noticed that keeping both GHES and a hardware-driven
+	  error mechanism won't work well, as BIOS will race with OS, while
+	  reading the error registers. So, if you want to not use "Firmware
+	  first" GHES error mechanism, you should disable GHES either at
+	  compilation time or by passing "ghes.disable=1" Kernel parameter
+	  at boot time.
+
+	  In doubt, say 'Y'.
+
 config EDAC_AMD64
 	tristate "AMD64 (Opteron, Athlon64) K8, F10h"
 	depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 5608a9ba61b7..4154ed6a02c6 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -16,6 +16,7 @@ ifdef CONFIG_PCI
 edac_core-y	+= edac_pci.o edac_pci_sysfs.o
 endif
 
+obj-$(CONFIG_EDAC_GHES)			+= ghes_edac.o
 obj-$(CONFIG_EDAC_MCE_INJ)		+= mce_amd_inj.o
 
 edac_mce_amd-y				:= mce_amd.o
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 23bb99fa44f1..3c2625e7980d 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
 extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
 extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
 				      unsigned long page);
+
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+			      struct mem_ctl_info *mci,
+			      struct edac_raw_error_desc *e);
+
 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  struct mem_ctl_info *mci,
 			  const u16 error_count,
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index d1e9eb191f2b..cdb81aa73ab7 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -42,6 +42,12 @@
 static DEFINE_MUTEX(mem_ctls_mutex);
 static LIST_HEAD(mc_devices);
 
+/*
+ * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
+ *	apei/ghes and i7core_edac to be used at the same time.
+ */
+static void const *edac_mc_owner;
+
 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
 			         unsigned len)
 {
@@ -441,13 +447,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 
 	mci->op_state = OP_ALLOC;
 
-	/* at this point, the root kobj is valid, and in order to
-	 * 'free' the object, then the function:
-	 *      edac_mc_unregister_sysfs_main_kobj() must be called
-	 * which will perform kobj unregistration and the actual free
-	 * will occur during the kobject callback operation
-	 */
-
 	return mci;
 
 error:
@@ -666,9 +665,9 @@ fail1:
 	return 1;
 }
 
-static void del_mc_from_global_list(struct mem_ctl_info *mci)
+static int del_mc_from_global_list(struct mem_ctl_info *mci)
 {
-	atomic_dec(&edac_handlers);
+	int handlers = atomic_dec_return(&edac_handlers);
 	list_del_rcu(&mci->link);
 
 	/* these are for safe removal of devices from global list while
@@ -676,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
 	 */
 	synchronize_rcu();
 	INIT_LIST_HEAD(&mci->link);
+
+	return handlers;
 }
 
 /**
@@ -719,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find);
 /* FIXME - should a warning be printed if no error detection? correction? */
 int edac_mc_add_mc(struct mem_ctl_info *mci)
 {
+	int ret = -EINVAL;
 	edac_dbg(0, "\n");
 
 #ifdef CONFIG_EDAC_DEBUG
@@ -749,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 #endif
 	mutex_lock(&mem_ctls_mutex);
 
+	if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
+		ret = -EPERM;
+		goto fail0;
+	}
+
 	if (add_mc_to_global_list(mci))
 		goto fail0;
 
@@ -775,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 
+	edac_mc_owner = mci->mod_name;
+
 	mutex_unlock(&mem_ctls_mutex);
 	return 0;
 
@@ -783,7 +792,7 @@ fail1:
 
 fail0:
 	mutex_unlock(&mem_ctls_mutex);
-	return 1;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 
@@ -809,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 		return NULL;
 	}
 
-	del_mc_from_global_list(mci);
+	if (!del_mc_from_global_list(mci))
+		edac_mc_owner = NULL;
 	mutex_unlock(&mem_ctls_mutex);
 
 	/* flush workq processes */
@@ -907,6 +917,7 @@ const char *edac_layer_name[] = {
 	[EDAC_MC_LAYER_CHANNEL] = "channel",
 	[EDAC_MC_LAYER_SLOT] = "slot",
 	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
+	[EDAC_MC_LAYER_ALL_MEM] = "memory",
 };
 EXPORT_SYMBOL_GPL(edac_layer_name);
 
@@ -1054,7 +1065,46 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
 }
 
-#define OTHER_LABEL " or "
+/**
+ * edac_raw_mc_handle_error - reports a memory event to userspace without doing
+ *			      anything to discover the error location
+ *
+ * @type:		severity of the error (CE/UE/Fatal)
+ * @mci:		a struct mem_ctl_info pointer
+ * @e:			error description
+ *
+ * This raw function is used internally by edac_mc_handle_error(). It should
+ * only be called directly when the hardware error come directly from BIOS,
+ * like in the case of APEI GHES driver.
+ */
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+			      struct mem_ctl_info *mci,
+			      struct edac_raw_error_desc *e)
+{
+	char detail[80];
+	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+
+	/* Memory type dependent details about the error */
+	if (type == HW_EVENT_ERR_CORRECTED) {
+		snprintf(detail, sizeof(detail),
+			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
+			e->page_frame_number, e->offset_in_page,
+			e->grain, e->syndrome);
+		edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+			      detail, e->other_detail, e->enable_per_layer_report,
+			      e->page_frame_number, e->offset_in_page, e->grain);
+	} else {
+		snprintf(detail, sizeof(detail),
+			"page:0x%lx offset:0x%lx grain:%ld",
+			e->page_frame_number, e->offset_in_page, e->grain);
+
+		edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+			      detail, e->other_detail, e->enable_per_layer_report);
+	}
+
+
+}
+EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
 
 /**
  * edac_mc_handle_error - reports a memory event to userspace
@@ -1086,19 +1136,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  const char *msg,
 			  const char *other_detail)
 {
-	/* FIXME: too much for stack: move it to some pre-alocated area */
-	char detail[80], location[80];
-	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
 	char *p;
 	int row = -1, chan = -1;
 	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
-	int i;
-	long grain;
-	bool enable_per_layer_report = false;
+	int i, n_labels = 0;
 	u8 grain_bits;
+	struct edac_raw_error_desc *e = &mci->error_desc;
 
 	edac_dbg(3, "MC%d\n", mci->mc_idx);
 
+	/* Fills the error report buffer */
+	memset(e, 0, sizeof (*e));
+	e->error_count = error_count;
+	e->top_layer = top_layer;
+	e->mid_layer = mid_layer;
+	e->low_layer = low_layer;
+	e->page_frame_number = page_frame_number;
+	e->offset_in_page = offset_in_page;
+	e->syndrome = syndrome;
+	e->msg = msg;
+	e->other_detail = other_detail;
+
 	/*
 	 * Check if the event report is consistent and if the memory
 	 * location is known. If it is known, enable_per_layer_report will be
@@ -1121,7 +1179,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			pos[i] = -1;
 		}
 		if (pos[i] >= 0)
-			enable_per_layer_report = true;
+			e->enable_per_layer_report = true;
 	}
 
 	/*
@@ -1135,8 +1193,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	 * where each memory belongs to a separate channel within the same
 	 * branch.
 	 */
-	grain = 0;
-	p = label;
+	p = e->label;
 	*p = '\0';
 
 	for (i = 0; i < mci->tot_dimms; i++) {
@@ -1150,8 +1207,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			continue;
 
 		/* get the max grain, over the error match range */
-		if (dimm->grain > grain)
-			grain = dimm->grain;
+		if (dimm->grain > e->grain)
+			e->grain = dimm->grain;
 
 		/*
 		 * If the error is memory-controller wide, there's no need to
@@ -1159,8 +1216,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 		 * channel/memory controller/...  may be affected.
 		 * Also, don't show errors for empty DIMM slots.
 		 */
-		if (enable_per_layer_report && dimm->nr_pages) {
-			if (p != label) {
+		if (e->enable_per_layer_report && dimm->nr_pages) {
+			if (n_labels >= EDAC_MAX_LABELS) {
+				e->enable_per_layer_report = false;
+				break;
+			}
+			n_labels++;
+			if (p != e->label) {
 				strcpy(p, OTHER_LABEL);
 				p += strlen(OTHER_LABEL);
 			}
@@ -1187,12 +1249,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 		}
 	}
 
-	if (!enable_per_layer_report) {
-		strcpy(label, "any memory");
+	if (!e->enable_per_layer_report) {
+		strcpy(e->label, "any memory");
 	} else {
 		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
-		if (p == label)
-			strcpy(label, "unknown memory");
+		if (p == e->label)
+			strcpy(e->label, "unknown memory");
 		if (type == HW_EVENT_ERR_CORRECTED) {
 			if (row >= 0) {
 				mci->csrows[row]->ce_count += error_count;
@@ -1205,7 +1267,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	}
 
 	/* Fill the RAM location data */
-	p = location;
+	p = e->location;
 
 	for (i = 0; i < mci->n_layers; i++) {
 		if (pos[i] < 0)
@@ -1215,32 +1277,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			     edac_layer_name[mci->layers[i].type],
 			     pos[i]);
 	}
-	if (p > location)
+	if (p > e->location)
 		*(p - 1) = '\0';
 
 	/* Report the error via the trace interface */
-	grain_bits = fls_long(grain) + 1;
-	trace_mc_event(type, msg, label, error_count,
-		       mci->mc_idx, top_layer, mid_layer, low_layer,
-		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
-		       grain_bits, syndrome, other_detail);
+	grain_bits = fls_long(e->grain) + 1;
+	trace_mc_event(type, e->msg, e->label, e->error_count,
+		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
+		       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
+		       grain_bits, e->syndrome, e->other_detail);
 
-	/* Memory type dependent details about the error */
-	if (type == HW_EVENT_ERR_CORRECTED) {
-		snprintf(detail, sizeof(detail),
-			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
-			page_frame_number, offset_in_page,
-			grain, syndrome);
-		edac_ce_error(mci, error_count, pos, msg, location, label,
-			      detail, other_detail, enable_per_layer_report,
-			      page_frame_number, offset_in_page, grain);
-	} else {
-		snprintf(detail, sizeof(detail),
-			"page:0x%lx offset:0x%lx grain:%ld",
-			page_frame_number, offset_in_page, grain);
-
-		edac_ue_error(mci, error_count, pos, msg, location, label,
-			      detail, other_detail, enable_per_layer_report);
-	}
+	edac_raw_mc_handle_error(type, mci, e);
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 0ca1ca71157f..4f4b6137d74e 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -7,7 +7,7 @@
  *
  * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com
  *
- * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com>
+ * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com>
  *	The entire API were re-written, and ported to use struct device
  *
  */
@@ -429,8 +429,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
 		if (!nr_pages_per_csrow(csrow))
 			continue;
 		err = edac_create_csrow_object(mci, mci->csrows[i], i);
-		if (err < 0)
+		if (err < 0) {
+			edac_dbg(1,
+				 "failure: create csrow objects for csrow %d\n",
+				 i);
 			goto error;
+		}
 	}
 	return 0;
 
@@ -677,9 +681,6 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev,
 	unsigned long bandwidth = 0;
 	int new_bw = 0;
 
-	if (!mci->set_sdram_scrub_rate)
-		return -ENODEV;
-
 	if (strict_strtoul(data, 10, &bandwidth) < 0)
 		return -EINVAL;
 
@@ -703,9 +704,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev,
 	struct mem_ctl_info *mci = to_mci(dev);
 	int bandwidth = 0;
 
-	if (!mci->get_sdram_scrub_rate)
-		return -ENODEV;
-
 	bandwidth = mci->get_sdram_scrub_rate(mci);
 	if (bandwidth < 0) {
 		edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n");
@@ -866,8 +864,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL);
 DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL);
 
 /* memory scrubber attribute file */
-DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show,
-	mci_sdram_scrub_rate_store);
+DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL);
 
 static struct attribute *mci_attrs[] = {
 	&dev_attr_reset_counters.attr,
@@ -878,7 +875,6 @@ static struct attribute *mci_attrs[] = {
 	&dev_attr_ce_noinfo_count.attr,
 	&dev_attr_ue_count.attr,
 	&dev_attr_ce_count.attr,
-	&dev_attr_sdram_scrub_rate.attr,
 	&dev_attr_max_location.attr,
 	NULL
 };
@@ -1007,11 +1003,28 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	edac_dbg(0, "creating device %s\n", dev_name(&mci->dev));
 	err = device_add(&mci->dev);
 	if (err < 0) {
+		edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
 		bus_unregister(&mci->bus);
 		kfree(mci->bus.name);
 		return err;
 	}
 
+	if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) {
+		if (mci->get_sdram_scrub_rate) {
+			dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO;
+			dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show;
+		}
+		if (mci->set_sdram_scrub_rate) {
+			dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR;
+			dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store;
+		}
+		err = device_create_file(&mci->dev,
+					 &dev_attr_sdram_scrub_rate);
+		if (err) {
+			edac_dbg(1, "failure: create sdram_scrub_rate\n");
+			goto fail2;
+		}
+	}
 	/*
 	 * Create the dimm/rank devices
 	 */
@@ -1056,6 +1069,7 @@ fail:
 			continue;
 		device_unregister(&dimm->dev);
 	}
+fail2:
 	device_unregister(&mci->dev);
 	bus_unregister(&mci->bus);
 	kfree(mci->bus.name);
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 12c951a2c33d..a66941fea5a4 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -146,7 +146,7 @@ static void __exit edac_exit(void)
 /*
  * Inform the kernel of our entry and exit points
  */
-module_init(edac_init);
+subsys_initcall(edac_init);
 module_exit(edac_exit);
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 0056c4dae9d5..e8658e451762 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void)
 	if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
 		edac_dbg(0, "called kobject_put on main kobj\n");
 		kobject_put(edac_pci_top_main_kobj);
+		edac_put_sysfs_subsys();
 	}
-	edac_put_sysfs_subsys();
 }
 
 /*
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
new file mode 100644
index 000000000000..bb534670ec02
--- /dev/null
+++ b/drivers/edac/ghes_edac.c
@@ -0,0 +1,537 @@
+/*
+ * GHES/EDAC Linux driver
+ *
+ * This file may be distributed under the terms of the GNU General Public
+ * License version 2.
+ *
+ * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * Red Hat Inc. http://www.redhat.com
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <acpi/ghes.h>
+#include <linux/edac.h>
+#include <linux/dmi.h>
+#include "edac_core.h"
+#include <ras/ras_event.h>
+
+#define GHES_EDAC_REVISION " Ver: 1.0.0"
+
+struct ghes_edac_pvt {
+	struct list_head list;
+	struct ghes *ghes;
+	struct mem_ctl_info *mci;
+
+	/* Buffers for the error handling routine */
+	char detail_location[240];
+	char other_detail[160];
+	char msg[80];
+};
+
+static LIST_HEAD(ghes_reglist);
+static DEFINE_MUTEX(ghes_edac_lock);
+static int ghes_edac_mc_num;
+
+
+/* Memory Device - Type 17 of SMBIOS spec */
+struct memdev_dmi_entry {
+	u8 type;
+	u8 length;
+	u16 handle;
+	u16 phys_mem_array_handle;
+	u16 mem_err_info_handle;
+	u16 total_width;
+	u16 data_width;
+	u16 size;
+	u8 form_factor;
+	u8 device_set;
+	u8 device_locator;
+	u8 bank_locator;
+	u8 memory_type;
+	u16 type_detail;
+	u16 speed;
+	u8 manufacturer;
+	u8 serial_number;
+	u8 asset_tag;
+	u8 part_number;
+	u8 attributes;
+	u32 extended_size;
+	u16 conf_mem_clk_speed;
+} __attribute__((__packed__));
+
+struct ghes_edac_dimm_fill {
+	struct mem_ctl_info *mci;
+	unsigned count;
+};
+
+char *memory_type[] = {
+	[MEM_EMPTY] = "EMPTY",
+	[MEM_RESERVED] = "RESERVED",
+	[MEM_UNKNOWN] = "UNKNOWN",
+	[MEM_FPM] = "FPM",
+	[MEM_EDO] = "EDO",
+	[MEM_BEDO] = "BEDO",
+	[MEM_SDR] = "SDR",
+	[MEM_RDR] = "RDR",
+	[MEM_DDR] = "DDR",
+	[MEM_RDDR] = "RDDR",
+	[MEM_RMBS] = "RMBS",
+	[MEM_DDR2] = "DDR2",
+	[MEM_FB_DDR2] = "FB_DDR2",
+	[MEM_RDDR2] = "RDDR2",
+	[MEM_XDR] = "XDR",
+	[MEM_DDR3] = "DDR3",
+	[MEM_RDDR3] = "RDDR3",
+};
+
+static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
+{
+	int *num_dimm = arg;
+
+	if (dh->type == DMI_ENTRY_MEM_DEVICE)
+		(*num_dimm)++;
+}
+
+static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
+{
+	struct ghes_edac_dimm_fill *dimm_fill = arg;
+	struct mem_ctl_info *mci = dimm_fill->mci;
+
+	if (dh->type == DMI_ENTRY_MEM_DEVICE) {
+		struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
+		struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+						       mci->n_layers,
+						       dimm_fill->count, 0, 0);
+
+		if (entry->size == 0xffff) {
+			pr_info("Can't get DIMM%i size\n",
+				dimm_fill->count);
+			dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
+		} else if (entry->size == 0x7fff) {
+			dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
+		} else {
+			if (entry->size & 1 << 15)
+				dimm->nr_pages = MiB_TO_PAGES((entry->size &
+							       0x7fff) << 10);
+			else
+				dimm->nr_pages = MiB_TO_PAGES(entry->size);
+		}
+
+		switch (entry->memory_type) {
+		case 0x12:
+			if (entry->type_detail & 1 << 13)
+				dimm->mtype = MEM_RDDR;
+			else
+				dimm->mtype = MEM_DDR;
+			break;
+		case 0x13:
+			if (entry->type_detail & 1 << 13)
+				dimm->mtype = MEM_RDDR2;
+			else
+				dimm->mtype = MEM_DDR2;
+			break;
+		case 0x14:
+			dimm->mtype = MEM_FB_DDR2;
+			break;
+		case 0x18:
+			if (entry->type_detail & 1 << 13)
+				dimm->mtype = MEM_RDDR3;
+			else
+				dimm->mtype = MEM_DDR3;
+			break;
+		default:
+			if (entry->type_detail & 1 << 6)
+				dimm->mtype = MEM_RMBS;
+			else if ((entry->type_detail & ((1 << 7) | (1 << 13)))
+				 == ((1 << 7) | (1 << 13)))
+				dimm->mtype = MEM_RDR;
+			else if (entry->type_detail & 1 << 7)
+				dimm->mtype = MEM_SDR;
+			else if (entry->type_detail & 1 << 9)
+				dimm->mtype = MEM_EDO;
+			else
+				dimm->mtype = MEM_UNKNOWN;
+		}
+
+		/*
+		 * Actually, we can only detect if the memory has bits for
+		 * checksum or not
+		 */
+		if (entry->total_width == entry->data_width)
+			dimm->edac_mode = EDAC_NONE;
+		else
+			dimm->edac_mode = EDAC_SECDED;
+
+		dimm->dtype = DEV_UNKNOWN;
+		dimm->grain = 128;		/* Likely, worse case */
+
+		/*
+		 * FIXME: It shouldn't be hard to also fill the DIMM labels
+		 */
+
+		if (dimm->nr_pages) {
+			edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
+				dimm_fill->count, memory_type[dimm->mtype],
+				PAGES_TO_MiB(dimm->nr_pages),
+				(dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
+			edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
+				entry->memory_type, entry->type_detail,
+				entry->total_width, entry->data_width);
+		}
+
+		dimm_fill->count++;
+	}
+}
+
+void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
+				struct cper_sec_mem_err *mem_err)
+{
+	enum hw_event_mc_err_type type;
+	struct edac_raw_error_desc *e;
+	struct mem_ctl_info *mci;
+	struct ghes_edac_pvt *pvt = NULL;
+	char *p;
+	u8 grain_bits;
+
+	list_for_each_entry(pvt, &ghes_reglist, list) {
+		if (ghes == pvt->ghes)
+			break;
+	}
+	if (!pvt) {
+		pr_err("Internal error: Can't find EDAC structure\n");
+		return;
+	}
+	mci = pvt->mci;
+	e = &mci->error_desc;
+
+	/* Cleans the error report buffer */
+	memset(e, 0, sizeof (*e));
+	e->error_count = 1;
+	strcpy(e->label, "unknown label");
+	e->msg = pvt->msg;
+	e->other_detail = pvt->other_detail;
+	e->top_layer = -1;
+	e->mid_layer = -1;
+	e->low_layer = -1;
+	*pvt->other_detail = '\0';
+	*pvt->msg = '\0';
+
+	switch (sev) {
+	case GHES_SEV_CORRECTED:
+		type = HW_EVENT_ERR_CORRECTED;
+		break;
+	case GHES_SEV_RECOVERABLE:
+		type = HW_EVENT_ERR_UNCORRECTED;
+		break;
+	case GHES_SEV_PANIC:
+		type = HW_EVENT_ERR_FATAL;
+		break;
+	default:
+	case GHES_SEV_NO:
+		type = HW_EVENT_ERR_INFO;
+	}
+
+	edac_dbg(1, "error validation_bits: 0x%08llx\n",
+		 (long long)mem_err->validation_bits);
+
+	/* Error type, mapped on e->msg */
+	if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
+		p = pvt->msg;
+		switch (mem_err->error_type) {
+		case 0:
+			p += sprintf(p, "Unknown");
+			break;
+		case 1:
+			p += sprintf(p, "No error");
+			break;
+		case 2:
+			p += sprintf(p, "Single-bit ECC");
+			break;
+		case 3:
+			p += sprintf(p, "Multi-bit ECC");
+			break;
+		case 4:
+			p += sprintf(p, "Single-symbol ChipKill ECC");
+			break;
+		case 5:
+			p += sprintf(p, "Multi-symbol ChipKill ECC");
+			break;
+		case 6:
+			p += sprintf(p, "Master abort");
+			break;
+		case 7:
+			p += sprintf(p, "Target abort");
+			break;
+		case 8:
+			p += sprintf(p, "Parity Error");
+			break;
+		case 9:
+			p += sprintf(p, "Watchdog timeout");
+			break;
+		case 10:
+			p += sprintf(p, "Invalid address");
+			break;
+		case 11:
+			p += sprintf(p, "Mirror Broken");
+			break;
+		case 12:
+			p += sprintf(p, "Memory Sparing");
+			break;
+		case 13:
+			p += sprintf(p, "Scrub corrected error");
+			break;
+		case 14:
+			p += sprintf(p, "Scrub uncorrected error");
+			break;
+		case 15:
+			p += sprintf(p, "Physical Memory Map-out event");
+			break;
+		default:
+			p += sprintf(p, "reserved error (%d)",
+				     mem_err->error_type);
+		}
+	} else {
+		strcpy(pvt->msg, "unknown error");
+	}
+
+	/* Error address */
+	if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
+		e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
+		e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
+	}
+
+	/* Error grain */
+	if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) {
+		e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
+	}
+
+	/* Memory error location, mapped on e->location */
+	p = e->location;
+	if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
+		p += sprintf(p, "node:%d ", mem_err->node);
+	if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
+		p += sprintf(p, "card:%d ", mem_err->card);
+	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
+		p += sprintf(p, "module:%d ", mem_err->module);
+	if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
+		p += sprintf(p, "bank:%d ", mem_err->bank);
+	if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
+		p += sprintf(p, "row:%d ", mem_err->row);
+	if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
+		p += sprintf(p, "col:%d ", mem_err->column);
+	if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
+		p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
+	if (p > e->location)
+		*(p - 1) = '\0';
+
+	/* All other fields are mapped on e->other_detail */
+	p = pvt->other_detail;
+	if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
+		u64 status = mem_err->error_status;
+
+		p += sprintf(p, "status(0x%016llx): ", (long long)status);
+		switch ((status >> 8) & 0xff) {
+		case 1:
+			p += sprintf(p, "Error detected internal to the component ");
+			break;
+		case 16:
+			p += sprintf(p, "Error detected in the bus ");
+			break;
+		case 4:
+			p += sprintf(p, "Storage error in DRAM memory ");
+			break;
+		case 5:
+			p += sprintf(p, "Storage error in TLB ");
+			break;
+		case 6:
+			p += sprintf(p, "Storage error in cache ");
+			break;
+		case 7:
+			p += sprintf(p, "Error in one or more functional units ");
+			break;
+		case 8:
+			p += sprintf(p, "component failed self test ");
+			break;
+		case 9:
+			p += sprintf(p, "Overflow or undervalue of internal queue ");
+			break;
+		case 17:
+			p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
+			break;
+		case 18:
+			p += sprintf(p, "Improper access error ");
+			break;
+		case 19:
+			p += sprintf(p, "Access to a memory address which is not mapped to any component ");
+			break;
+		case 20:
+			p += sprintf(p, "Loss of Lockstep ");
+			break;
+		case 21:
+			p += sprintf(p, "Response not associated with a request ");
+			break;
+		case 22:
+			p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
+			break;
+		case 23:
+			p += sprintf(p, "Detection of a PATH_ERROR ");
+			break;
+		case 25:
+			p += sprintf(p, "Bus operation timeout ");
+			break;
+		case 26:
+			p += sprintf(p, "A read was issued to data that has been poisoned ");
+			break;
+		default:
+			p += sprintf(p, "reserved ");
+			break;
+		}
+	}
+	if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
+		p += sprintf(p, "requestorID: 0x%016llx ",
+			     (long long)mem_err->requestor_id);
+	if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
+		p += sprintf(p, "responderID: 0x%016llx ",
+			     (long long)mem_err->responder_id);
+	if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
+		p += sprintf(p, "targetID: 0x%016llx ",
+			     (long long)mem_err->responder_id);
+	if (p > pvt->other_detail)
+		*(p - 1) = '\0';
+
+	/* Generate the trace event */
+	grain_bits = fls_long(e->grain);
+	sprintf(pvt->detail_location, "APEI location: %s %s",
+		e->location, e->other_detail);
+	trace_mc_event(type, e->msg, e->label, e->error_count,
+		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
+		       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
+		       grain_bits, e->syndrome, pvt->detail_location);
+
+	/* Report the error via EDAC API */
+	edac_raw_mc_handle_error(type, mci, e);
+}
+EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error);
+
+int ghes_edac_register(struct ghes *ghes, struct device *dev)
+{
+	bool fake = false;
+	int rc, num_dimm = 0;
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[1];
+	struct ghes_edac_pvt *pvt;
+	struct ghes_edac_dimm_fill dimm_fill;
+
+	/* Get the number of DIMMs */
+	dmi_walk(ghes_edac_count_dimms, &num_dimm);
+
+	/* Check if we've got a bogus BIOS */
+	if (num_dimm == 0) {
+		fake = true;
+		num_dimm = 1;
+	}
+
+	layers[0].type = EDAC_MC_LAYER_ALL_MEM;
+	layers[0].size = num_dimm;
+	layers[0].is_virt_csrow = true;
+
+	/*
+	 * We need to serialize edac_mc_alloc() and edac_mc_add_mc(),
+	 * to avoid duplicated memory controller numbers
+	 */
+	mutex_lock(&ghes_edac_lock);
+	mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers,
+			    sizeof(*pvt));
+	if (!mci) {
+		pr_info("Can't allocate memory for EDAC data\n");
+		mutex_unlock(&ghes_edac_lock);
+		return -ENOMEM;
+	}
+
+	pvt = mci->pvt_info;
+	memset(pvt, 0, sizeof(*pvt));
+	list_add_tail(&pvt->list, &ghes_reglist);
+	pvt->ghes = ghes;
+	pvt->mci  = mci;
+	mci->pdev = dev;
+
+	mci->mtype_cap = MEM_FLAG_EMPTY;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE;
+	mci->edac_cap = EDAC_FLAG_NONE;
+	mci->mod_name = "ghes_edac.c";
+	mci->mod_ver = GHES_EDAC_REVISION;
+	mci->ctl_name = "ghes_edac";
+	mci->dev_name = "ghes";
+
+	if (!ghes_edac_mc_num) {
+		if (!fake) {
+			pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
+			pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
+			pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
+			pr_info("If you find incorrect reports, please contact your hardware vendor\n");
+			pr_info("to correct its BIOS.\n");
+			pr_info("This system has %d DIMM sockets.\n",
+				num_dimm);
+		} else {
+			pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
+			pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
+			pr_info("work on such system. Use this driver with caution\n");
+		}
+	}
+
+	if (!fake) {
+		/*
+		 * Fill DIMM info from DMI for the memory controller #0
+		 *
+		 * Keep it in blank for the other memory controllers, as
+		 * there's no reliable way to properly credit each DIMM to
+		 * the memory controller, as different BIOSes fill the
+		 * DMI bank location fields on different ways
+		 */
+		if (!ghes_edac_mc_num) {
+			dimm_fill.count = 0;
+			dimm_fill.mci = mci;
+			dmi_walk(ghes_edac_dmidecode, &dimm_fill);
+		}
+	} else {
+		struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+						       mci->n_layers, 0, 0, 0);
+
+		dimm->nr_pages = 1;
+		dimm->grain = 128;
+		dimm->mtype = MEM_UNKNOWN;
+		dimm->dtype = DEV_UNKNOWN;
+		dimm->edac_mode = EDAC_SECDED;
+	}
+
+	rc = edac_mc_add_mc(mci);
+	if (rc < 0) {
+		pr_info("Can't register at EDAC core\n");
+		edac_mc_free(mci);
+		mutex_unlock(&ghes_edac_lock);
+		return -ENODEV;
+	}
+
+	ghes_edac_mc_num++;
+	mutex_unlock(&ghes_edac_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ghes_edac_register);
+
+void ghes_edac_unregister(struct ghes *ghes)
+{
+	struct mem_ctl_info *mci;
+	struct ghes_edac_pvt *pvt, *tmp;
+
+	list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) {
+		if (ghes == pvt->ghes) {
+			mci = pvt->mci;
+			edac_mc_del_mc(mci->pdev);
+			edac_mc_free(mci);
+			list_del(&pvt->list);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(ghes_edac_unregister);
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index 4e8337602e78..aa44c1718f50 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -106,16 +106,26 @@ static int nr_channels;
 
 static int how_many_channels(struct pci_dev *pdev)
 {
+	int n_channels;
+
 	unsigned char capid0_8b; /* 8th byte of CAPID0 */
 
 	pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
+
 	if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
 		edac_dbg(0, "In single channel mode\n");
-		return 1;
+		n_channels = 1;
 	} else {
 		edac_dbg(0, "In dual channel mode\n");
-		return 2;
+		n_channels = 2;
 	}
+
+	if (capid0_8b & 0x10) /* check if both channels are filled */
+		edac_dbg(0, "2 DIMMS per channel disabled\n");
+	else
+		edac_dbg(0, "2 DIMMS per channel enabled\n");
+
+	return n_channels;
 }
 
 static unsigned long eccerrlog_syndrome(u64 log)
@@ -290,6 +300,8 @@ static void i3200_get_drbs(void __iomem *window,
 	for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
 		drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
 		drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
+
+		edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]);
 	}
 }
 
@@ -311,6 +323,9 @@ static unsigned long drb_to_nr_pages(
 	int n;
 
 	n = drbs[channel][rank];
+	if (!n)
+		return 0;
+
 	if (rank > 0)
 		n -= drbs[channel][rank - 1];
 	if (stacked && (channel == 1) &&
@@ -377,19 +392,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
 	 * cumulative; the last one will contain the total memory
 	 * contained in all ranks.
 	 */
-	for (i = 0; i < mci->nr_csrows; i++) {
+	for (i = 0; i < I3200_DIMMS; i++) {
 		unsigned long nr_pages;
-		struct csrow_info *csrow = mci->csrows[i];
 
-		nr_pages = drb_to_nr_pages(drbs, stacked,
-			i / I3200_RANKS_PER_CHANNEL,
-			i % I3200_RANKS_PER_CHANNEL);
+		for (j = 0; j < nr_channels; j++) {
+			struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+							       mci->n_layers, i, j, 0);
 
-		if (nr_pages == 0)
-			continue;
+			nr_pages = drb_to_nr_pages(drbs, stacked, j, i);
+			if (nr_pages == 0)
+				continue;
 
-		for (j = 0; j < nr_channels; j++) {
-			struct dimm_info *dimm = csrow->channels[j]->dimm;
+			edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j,
+				 stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
 
 			dimm->nr_pages = nr_pages;
 			dimm->grain = nr_pages << PAGE_SHIFT;
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index d6955b2cc99f..1b635178cc44 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -27,6 +27,7 @@
 #include <linux/edac.h>
 #include <linux/delay.h>
 #include <linux/mmzone.h>
+#include <linux/debugfs.h>
 
 #include "edac_core.h"
 
@@ -68,6 +69,14 @@
 			I5100_FERR_NF_MEM_M1ERR_MASK)
 #define	I5100_NERR_NF_MEM	0xa4	/* MC Next Non-Fatal Errors */
 #define I5100_EMASK_MEM		0xa8	/* MC Error Mask Register */
+#define I5100_MEM0EINJMSK0	0x200	/* Injection Mask0 Register Channel 0 */
+#define I5100_MEM1EINJMSK0	0x208	/* Injection Mask0 Register Channel 1 */
+#define		I5100_MEMXEINJMSK0_EINJEN	(1 << 27)
+#define I5100_MEM0EINJMSK1	0x204	/* Injection Mask1 Register Channel 0 */
+#define I5100_MEM1EINJMSK1	0x206	/* Injection Mask1 Register Channel 1 */
+
+/* Device 19, Function 0 */
+#define I5100_DINJ0 0x9a
 
 /* device 21 and 22, func 0 */
 #define I5100_MTR_0	0x154	/* Memory Technology Registers 0-3 */
@@ -338,13 +347,26 @@ struct i5100_priv {
 	unsigned ranksperchan;	/* number of ranks per channel */
 
 	struct pci_dev *mc;	/* device 16 func 1 */
+	struct pci_dev *einj;	/* device 19 func 0 */
 	struct pci_dev *ch0mm;	/* device 21 func 0 */
 	struct pci_dev *ch1mm;	/* device 22 func 0 */
 
 	struct delayed_work i5100_scrubbing;
 	int scrub_enable;
+
+	/* Error injection */
+	u8 inject_channel;
+	u8 inject_hlinesel;
+	u8 inject_deviceptr1;
+	u8 inject_deviceptr2;
+	u16 inject_eccmask1;
+	u16 inject_eccmask2;
+
+	struct dentry *debugfs;
 };
 
+static struct dentry *i5100_debugfs;
+
 /* map a rank/chan to a slot number on the mainboard */
 static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
 			      int chan, int rank)
@@ -863,13 +885,126 @@ static void i5100_init_csrows(struct mem_ctl_info *mci)
 	}
 }
 
+/****************************************************************************
+ *                       Error injection routines
+ ****************************************************************************/
+
+static void i5100_do_inject(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	u32 mask0;
+	u16 mask1;
+
+	/* MEM[1:0]EINJMSK0
+	 * 31    - ADDRMATCHEN
+	 * 29:28 - HLINESEL
+	 *         00 Reserved
+	 *         01 Lower half of cache line
+	 *         10 Upper half of cache line
+	 *         11 Both upper and lower parts of cache line
+	 * 27    - EINJEN
+	 * 25:19 - XORMASK1 for deviceptr1
+	 * 9:5   - SEC2RAM for deviceptr2
+	 * 4:0   - FIR2RAM for deviceptr1
+	 */
+	mask0 = ((priv->inject_hlinesel & 0x3) << 28) |
+		I5100_MEMXEINJMSK0_EINJEN |
+		((priv->inject_eccmask1 & 0xffff) << 10) |
+		((priv->inject_deviceptr2 & 0x1f) << 5) |
+		(priv->inject_deviceptr1 & 0x1f);
+
+	/* MEM[1:0]EINJMSK1
+	 * 15:0  - XORMASK2 for deviceptr2
+	 */
+	mask1 = priv->inject_eccmask2;
+
+	if (priv->inject_channel == 0) {
+		pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0);
+		pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1);
+	} else {
+		pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0);
+		pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1);
+	}
+
+	/* Error Injection Response Function
+	 * Intel 5100 Memory Controller Hub Chipset (318378) datasheet
+	 * hints about this register but carry no data about them. All
+	 * data regarding device 19 is based on experimentation and the
+	 * Intel 7300 Chipset Memory Controller Hub (318082) datasheet
+	 * which appears to be accurate for the i5100 in this area.
+	 *
+	 * The injection code don't work without setting this register.
+	 * The register needs to be flipped off then on else the hardware
+	 * will only preform the first injection.
+	 *
+	 * Stop condition bits 7:4
+	 * 1010 - Stop after one injection
+	 * 1011 - Never stop injecting faults
+	 *
+	 * Start condition bits 3:0
+	 * 1010 - Never start
+	 * 1011 - Start immediately
+	 */
+	pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa);
+	pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab);
+}
+
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+static ssize_t inject_enable_write(struct file *file, const char __user *data,
+		size_t count, loff_t *ppos)
+{
+	struct device *dev = file->private_data;
+	struct mem_ctl_info *mci = to_mci(dev);
+
+	i5100_do_inject(mci);
+
+	return count;
+}
+
+static const struct file_operations i5100_inject_enable_fops = {
+	.open = simple_open,
+	.write = inject_enable_write,
+	.llseek = generic_file_llseek,
+};
+
+static int i5100_setup_debugfs(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+
+	if (!i5100_debugfs)
+		return -ENODEV;
+
+	priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs);
+
+	if (!priv->debugfs)
+		return -ENOMEM;
+
+	debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_channel);
+	debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_hlinesel);
+	debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_deviceptr1);
+	debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_deviceptr2);
+	debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_eccmask1);
+	debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_eccmask2);
+	debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs,
+			&mci->dev, &i5100_inject_enable_fops);
+
+	return 0;
+
+}
+
 static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int rc;
 	struct mem_ctl_info *mci;
 	struct edac_mc_layer layers[2];
 	struct i5100_priv *priv;
-	struct pci_dev *ch0mm, *ch1mm;
+	struct pci_dev *ch0mm, *ch1mm, *einj;
 	int ret = 0;
 	u32 dw;
 	int ranksperch;
@@ -941,6 +1076,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto bail_disable_ch1;
 	}
 
+
+	/* device 19, func 0, Error injection */
+	einj = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_19, 0);
+	if (!einj) {
+		ret = -ENODEV;
+		goto bail_einj;
+	}
+
+	rc = pci_enable_device(einj);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_disable_einj;
+	}
+
+
 	mci->pdev = &pdev->dev;
 
 	priv = mci->pvt_info;
@@ -948,6 +1099,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	priv->mc = pdev;
 	priv->ch0mm = ch0mm;
 	priv->ch1mm = ch1mm;
+	priv->einj = einj;
 
 	INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing);
 
@@ -975,6 +1127,13 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	mci->set_sdram_scrub_rate = i5100_set_scrub_rate;
 	mci->get_sdram_scrub_rate = i5100_get_scrub_rate;
 
+	priv->inject_channel = 0;
+	priv->inject_hlinesel = 0;
+	priv->inject_deviceptr1 = 0;
+	priv->inject_deviceptr2 = 0;
+	priv->inject_eccmask1 = 0;
+	priv->inject_eccmask2 = 0;
+
 	i5100_init_csrows(mci);
 
 	/* this strange construction seems to be in every driver, dunno why */
@@ -992,6 +1151,8 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto bail_scrub;
 	}
 
+	i5100_setup_debugfs(mci);
+
 	return ret;
 
 bail_scrub:
@@ -999,6 +1160,12 @@ bail_scrub:
 	cancel_delayed_work_sync(&(priv->i5100_scrubbing));
 	edac_mc_free(mci);
 
+bail_disable_einj:
+	pci_disable_device(einj);
+
+bail_einj:
+	pci_dev_put(einj);
+
 bail_disable_ch1:
 	pci_disable_device(ch1mm);
 
@@ -1030,14 +1197,18 @@ static void i5100_remove_one(struct pci_dev *pdev)
 
 	priv = mci->pvt_info;
 
+	debugfs_remove_recursive(priv->debugfs);
+
 	priv->scrub_enable = 0;
 	cancel_delayed_work_sync(&(priv->i5100_scrubbing));
 
 	pci_disable_device(pdev);
 	pci_disable_device(priv->ch0mm);
 	pci_disable_device(priv->ch1mm);
+	pci_disable_device(priv->einj);
 	pci_dev_put(priv->ch0mm);
 	pci_dev_put(priv->ch1mm);
+	pci_dev_put(priv->einj);
 
 	edac_mc_free(mci);
 }
@@ -1060,13 +1231,16 @@ static int __init i5100_init(void)
 {
 	int pci_rc;
 
-	pci_rc = pci_register_driver(&i5100_driver);
+	i5100_debugfs = debugfs_create_dir("i5100_edac", NULL);
 
+	pci_rc = pci_register_driver(&i5100_driver);
 	return (pci_rc < 0) ? pci_rc : 0;
 }
 
 static void __exit i5100_exit(void)
 {
+	debugfs_remove(i5100_debugfs);
+
 	pci_unregister_driver(&i5100_driver);
 }
 
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index e213d030b0dd..0ec3e95a12cd 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms)
 
 static inline int numrank(u32 rank)
 {
-	static int ranks[4] = { 1, 2, 4, -EINVAL };
+	static const int ranks[] = { 1, 2, 4, -EINVAL };
 
 	return ranks[rank & 0x3];
 }
 
 static inline int numbank(u32 bank)
 {
-	static int banks[4] = { 4, 8, 16, -EINVAL };
+	static const int banks[] = { 4, 8, 16, -EINVAL };
 
 	return banks[bank & 0x3];
 }
 
 static inline int numrow(u32 row)
 {
-	static int rows[8] = {
+	static const int rows[] = {
 		1 << 12, 1 << 13, 1 << 14, 1 << 15,
 		1 << 16, -EINVAL, -EINVAL, -EINVAL,
 	};
@@ -444,7 +444,7 @@ static inline int numrow(u32 row)
 
 static inline int numcol(u32 col)
 {
-	static int cols[8] = {
+	static const int cols[] = {
 		1 << 10, 1 << 11, 1 << 12, -EINVAL,
 	};
 	return cols[col & 0x3];
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index da7e2986e3d5..57244f995614 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -639,7 +639,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
 	tmp_mb = (1 + pvt->tohm) >> 20;
 
 	mb = div_u64_rem(tmp_mb, 1000, &kb);
-	edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm);
+	edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm);
 
 	/*
 	 * Step 2) Get SAD range and SAD Interleave list
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index f8d22872d753..27ac423ab25e 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -487,27 +487,28 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
 static int add_client_resource(struct client *client,
 			       struct client_resource *resource, gfp_t gfp_mask)
 {
+	bool preload = gfp_mask & __GFP_WAIT;
 	unsigned long flags;
 	int ret;
 
- retry:
-	if (idr_pre_get(&client->resource_idr, gfp_mask) == 0)
-		return -ENOMEM;
-
+	if (preload)
+		idr_preload(gfp_mask);
 	spin_lock_irqsave(&client->lock, flags);
+
 	if (client->in_shutdown)
 		ret = -ECANCELED;
 	else
-		ret = idr_get_new(&client->resource_idr, resource,
-				  &resource->handle);
+		ret = idr_alloc(&client->resource_idr, resource, 0, 0,
+				GFP_NOWAIT);
 	if (ret >= 0) {
+		resource->handle = ret;
 		client_get(client);
 		schedule_if_iso_resource(resource);
 	}
-	spin_unlock_irqrestore(&client->lock, flags);
 
-	if (ret == -EAGAIN)
-		goto retry;
+	spin_unlock_irqrestore(&client->lock, flags);
+	if (preload)
+		idr_preload_end();
 
 	return ret < 0 ? ret : 0;
 }
@@ -1779,7 +1780,6 @@ static int fw_device_op_release(struct inode *inode, struct file *file)
 	wait_event(client->tx_flush_wait, !has_outbound_transactions(client));
 
 	idr_for_each(&client->resource_idr, shutdown_resource, client);
-	idr_remove_all(&client->resource_idr);
 	idr_destroy(&client->resource_idr);
 
 	list_for_each_entry_safe(event, next_event, &client->event_list, link)
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 3873d535b28d..03ce7d980c6a 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -1017,12 +1017,11 @@ static void fw_device_init(struct work_struct *work)
 
 	fw_device_get(device);
 	down_write(&fw_device_rwsem);
-	ret = idr_pre_get(&fw_device_idr, GFP_KERNEL) ?
-	      idr_get_new(&fw_device_idr, device, &minor) :
-	      -ENOMEM;
+	minor = idr_alloc(&fw_device_idr, device, 0, 1 << MINORBITS,
+			GFP_KERNEL);
 	up_write(&fw_device_rwsem);
 
-	if (ret < 0)
+	if (minor < 0)
 		goto error;
 
 	device->device.bus = &fw_bus_type;
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index fed08b661711..7320bf891706 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -79,6 +79,7 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/pstore.h>
+#include <linux/ctype.h>
 
 #include <linux/fs.h>
 #include <linux/ramfs.h>
@@ -908,6 +909,48 @@ static struct inode *efivarfs_get_inode(struct super_block *sb,
 	return inode;
 }
 
+/*
+ * Return true if 'str' is a valid efivarfs filename of the form,
+ *
+ *	VariableName-12345678-1234-1234-1234-1234567891bc
+ */
+static bool efivarfs_valid_name(const char *str, int len)
+{
+	static const char dashes[GUID_LEN] = {
+		[8] = 1, [13] = 1, [18] = 1, [23] = 1
+	};
+	const char *s = str + len - GUID_LEN;
+	int i;
+
+	/*
+	 * We need a GUID, plus at least one letter for the variable name,
+	 * plus the '-' separator
+	 */
+	if (len < GUID_LEN + 2)
+		return false;
+
+	/* GUID should be right after the first '-' */
+	if (s - 1 != strchr(str, '-'))
+		return false;
+
+	/*
+	 * Validate that 's' is of the correct format, e.g.
+	 *
+	 *	12345678-1234-1234-1234-123456789abc
+	 */
+	for (i = 0; i < GUID_LEN; i++) {
+		if (dashes[i]) {
+			if (*s++ != '-')
+				return false;
+		} else {
+			if (!isxdigit(*s++))
+				return false;
+		}
+	}
+
+	return true;
+}
+
 static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid)
 {
 	guid->b[0] = hex_to_bin(str[6]) << 4 | hex_to_bin(str[7]);
@@ -936,11 +979,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
 	struct efivar_entry *var;
 	int namelen, i = 0, err = 0;
 
-	/*
-	 * We need a GUID, plus at least one letter for the variable name,
-	 * plus the '-' separator
-	 */
-	if (dentry->d_name.len < GUID_LEN + 2)
+	if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len))
 		return -EINVAL;
 
 	inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0);
@@ -1012,6 +1051,84 @@ static int efivarfs_unlink(struct inode *dir, struct dentry *dentry)
 	return -EINVAL;
 };
 
+/*
+ * Compare two efivarfs file names.
+ *
+ * An efivarfs filename is composed of two parts,
+ *
+ *	1. A case-sensitive variable name
+ *	2. A case-insensitive GUID
+ *
+ * So we need to perform a case-sensitive match on part 1 and a
+ * case-insensitive match on part 2.
+ */
+static int efivarfs_d_compare(const struct dentry *parent, const struct inode *pinode,
+			      const struct dentry *dentry, const struct inode *inode,
+			      unsigned int len, const char *str,
+			      const struct qstr *name)
+{
+	int guid = len - GUID_LEN;
+
+	if (name->len != len)
+		return 1;
+
+	/* Case-sensitive compare for the variable name */
+	if (memcmp(str, name->name, guid))
+		return 1;
+
+	/* Case-insensitive compare for the GUID */
+	return strncasecmp(name->name + guid, str + guid, GUID_LEN);
+}
+
+static int efivarfs_d_hash(const struct dentry *dentry,
+			   const struct inode *inode, struct qstr *qstr)
+{
+	unsigned long hash = init_name_hash();
+	const unsigned char *s = qstr->name;
+	unsigned int len = qstr->len;
+
+	if (!efivarfs_valid_name(s, len))
+		return -EINVAL;
+
+	while (len-- > GUID_LEN)
+		hash = partial_name_hash(*s++, hash);
+
+	/* GUID is case-insensitive. */
+	while (len--)
+		hash = partial_name_hash(tolower(*s++), hash);
+
+	qstr->hash = end_name_hash(hash);
+	return 0;
+}
+
+/*
+ * Retaining negative dentries for an in-memory filesystem just wastes
+ * memory and lookup time: arrange for them to be deleted immediately.
+ */
+static int efivarfs_delete_dentry(const struct dentry *dentry)
+{
+	return 1;
+}
+
+static struct dentry_operations efivarfs_d_ops = {
+	.d_compare = efivarfs_d_compare,
+	.d_hash = efivarfs_d_hash,
+	.d_delete = efivarfs_delete_dentry,
+};
+
+static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
+{
+	struct qstr q;
+
+	q.name = name;
+	q.len = strlen(name);
+
+	if (efivarfs_d_hash(NULL, NULL, &q))
+		return NULL;
+
+	return d_alloc(parent, &q);
+}
+
 static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode = NULL;
@@ -1027,6 +1144,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_blocksize_bits    = PAGE_CACHE_SHIFT;
 	sb->s_magic             = EFIVARFS_MAGIC;
 	sb->s_op                = &efivarfs_ops;
+	sb->s_d_op		= &efivarfs_d_ops;
 	sb->s_time_gran         = 1;
 
 	inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0);
@@ -1067,7 +1185,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
 		if (!inode)
 			goto fail_name;
 
-		dentry = d_alloc_name(root, name);
+		dentry = efivarfs_alloc_dentry(root, name);
 		if (!dentry)
 			goto fail_inode;
 
@@ -1084,7 +1202,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
 
 		mutex_lock(&inode->i_mutex);
 		inode->i_private = entry;
-		i_size_write(inode, size+4);
+		i_size_write(inode, size + sizeof(entry->var.Attributes));
 		mutex_unlock(&inode->i_mutex);
 		d_add(dentry, inode);
 	}
@@ -1117,8 +1235,20 @@ static struct file_system_type efivarfs_type = {
 	.kill_sb = efivarfs_kill_sb,
 };
 
+/*
+ * Handle negative dentry.
+ */
+static struct dentry *efivarfs_lookup(struct inode *dir, struct dentry *dentry,
+				      unsigned int flags)
+{
+	if (dentry->d_name.len > NAME_MAX)
+		return ERR_PTR(-ENAMETOOLONG);
+	d_add(dentry, NULL);
+	return NULL;
+}
+
 static const struct inode_operations efivarfs_dir_inode_operations = {
-	.lookup = simple_lookup,
+	.lookup = efivarfs_lookup,
 	.unlink = efivarfs_unlink,
 	.create = efivarfs_create,
 };
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4828fe7c66cb..fff9786cdc64 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -411,15 +411,10 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 			goto err_out;
 		}
 
-		do {
-			ret = -ENOMEM;
-			if (idr_pre_get(&dirent_idr, GFP_KERNEL))
-				ret = idr_get_new_above(&dirent_idr, value_sd,
-							1, &id);
-		} while (ret == -EAGAIN);
-
-		if (ret)
+		ret = idr_alloc(&dirent_idr, value_sd, 1, 0, GFP_KERNEL);
+		if (ret < 0)
 			goto free_sd;
+		id = ret;
 
 		desc->flags &= GPIO_FLAGS_MASK;
 		desc->flags |= (unsigned long)id << ID_SHIFT;
diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c
index 45adf97e678f..725968d38976 100644
--- a/drivers/gpu/drm/drm_context.c
+++ b/drivers/gpu/drm/drm_context.c
@@ -74,24 +74,13 @@ void drm_ctxbitmap_free(struct drm_device * dev, int ctx_handle)
  */
 static int drm_ctxbitmap_next(struct drm_device * dev)
 {
-	int new_id;
 	int ret;
 
-again:
-	if (idr_pre_get(&dev->ctx_idr, GFP_KERNEL) == 0) {
-		DRM_ERROR("Out of memory expanding drawable idr\n");
-		return -ENOMEM;
-	}
 	mutex_lock(&dev->struct_mutex);
-	ret = idr_get_new_above(&dev->ctx_idr, NULL,
-				DRM_RESERVED_CONTEXTS, &new_id);
+	ret = idr_alloc(&dev->ctx_idr, NULL, DRM_RESERVED_CONTEXTS, 0,
+			GFP_KERNEL);
 	mutex_unlock(&dev->struct_mutex);
-	if (ret == -EAGAIN)
-		goto again;
-	else if (ret)
-		return ret;
-
-	return new_id;
+	return ret;
 }
 
 /**
@@ -118,7 +107,7 @@ int drm_ctxbitmap_init(struct drm_device * dev)
 void drm_ctxbitmap_cleanup(struct drm_device * dev)
 {
 	mutex_lock(&dev->struct_mutex);
-	idr_remove_all(&dev->ctx_idr);
+	idr_destroy(&dev->ctx_idr);
 	mutex_unlock(&dev->struct_mutex);
 }
 
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 3bdf2a650d9c..792c3e3795ca 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -266,32 +266,21 @@ char *drm_get_connector_status_name(enum drm_connector_status status)
 static int drm_mode_object_get(struct drm_device *dev,
 			       struct drm_mode_object *obj, uint32_t obj_type)
 {
-	int new_id = 0;
 	int ret;
 
-again:
-	if (idr_pre_get(&dev->mode_config.crtc_idr, GFP_KERNEL) == 0) {
-		DRM_ERROR("Ran out memory getting a mode number\n");
-		return -ENOMEM;
-	}
-
 	mutex_lock(&dev->mode_config.idr_mutex);
-	ret = idr_get_new_above(&dev->mode_config.crtc_idr, obj, 1, &new_id);
-
-	if (!ret) {
+	ret = idr_alloc(&dev->mode_config.crtc_idr, obj, 1, 0, GFP_KERNEL);
+	if (ret >= 0) {
 		/*
 		 * Set up the object linking under the protection of the idr
 		 * lock so that other users can't see inconsistent state.
 		 */
-		obj->id = new_id;
+		obj->id = ret;
 		obj->type = obj_type;
 	}
 	mutex_unlock(&dev->mode_config.idr_mutex);
 
-	if (ret == -EAGAIN)
-		goto again;
-
-	return ret;
+	return ret < 0 ? ret : 0;
 }
 
 /**
@@ -1272,7 +1261,6 @@ void drm_mode_config_cleanup(struct drm_device *dev)
 		crtc->funcs->destroy(crtc);
 	}
 
-	idr_remove_all(&dev->mode_config.crtc_idr);
 	idr_destroy(&dev->mode_config.crtc_idr);
 }
 EXPORT_SYMBOL(drm_mode_config_cleanup);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index be174cab105a..25f91cd23e60 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -297,7 +297,6 @@ static void __exit drm_core_exit(void)
 
 	unregister_chrdev(DRM_MAJOR, "drm");
 
-	idr_remove_all(&drm_minors_idr);
 	idr_destroy(&drm_minors_idr);
 }
 
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 24efae464e2c..af779ae19ebf 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -270,21 +270,19 @@ drm_gem_handle_create(struct drm_file *file_priv,
 	int ret;
 
 	/*
-	 * Get the user-visible handle using idr.
+	 * Get the user-visible handle using idr.  Preload and perform
+	 * allocation under our spinlock.
 	 */
-again:
-	/* ensure there is space available to allocate a handle */
-	if (idr_pre_get(&file_priv->object_idr, GFP_KERNEL) == 0)
-		return -ENOMEM;
-
-	/* do the allocation under our spinlock */
+	idr_preload(GFP_KERNEL);
 	spin_lock(&file_priv->table_lock);
-	ret = idr_get_new_above(&file_priv->object_idr, obj, 1, (int *)handlep);
+
+	ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
+
 	spin_unlock(&file_priv->table_lock);
-	if (ret == -EAGAIN)
-		goto again;
-	else if (ret)
+	idr_preload_end();
+	if (ret < 0)
 		return ret;
+	*handlep = ret;
 
 	drm_gem_object_handle_reference(obj);
 
@@ -451,29 +449,25 @@ drm_gem_flink_ioctl(struct drm_device *dev, void *data,
 	if (obj == NULL)
 		return -ENOENT;
 
-again:
-	if (idr_pre_get(&dev->object_name_idr, GFP_KERNEL) == 0) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
+	idr_preload(GFP_KERNEL);
 	spin_lock(&dev->object_name_lock);
 	if (!obj->name) {
-		ret = idr_get_new_above(&dev->object_name_idr, obj, 1,
-					&obj->name);
+		ret = idr_alloc(&dev->object_name_idr, obj, 1, 0, GFP_NOWAIT);
+		obj->name = ret;
 		args->name = (uint64_t) obj->name;
 		spin_unlock(&dev->object_name_lock);
+		idr_preload_end();
 
-		if (ret == -EAGAIN)
-			goto again;
-		else if (ret)
+		if (ret < 0)
 			goto err;
+		ret = 0;
 
 		/* Allocate a reference for the name table.  */
 		drm_gem_object_reference(obj);
 	} else {
 		args->name = (uint64_t) obj->name;
 		spin_unlock(&dev->object_name_lock);
+		idr_preload_end();
 		ret = 0;
 	}
 
@@ -561,8 +555,6 @@ drm_gem_release(struct drm_device *dev, struct drm_file *file_private)
 {
 	idr_for_each(&file_private->object_idr,
 		     &drm_gem_object_release_handle, file_private);
-
-	idr_remove_all(&file_private->object_idr);
 	idr_destroy(&file_private->object_idr);
 }
 
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 80254547a3f8..7e4bae760e27 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -60,14 +60,13 @@ void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key)
 {
 	struct drm_hash_item *entry;
 	struct hlist_head *h_list;
-	struct hlist_node *list;
 	unsigned int hashed_key;
 	int count = 0;
 
 	hashed_key = hash_long(key, ht->order);
 	DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key);
 	h_list = &ht->table[hashed_key];
-	hlist_for_each_entry(entry, list, h_list, head)
+	hlist_for_each_entry(entry, h_list, head)
 		DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key);
 }
 
@@ -76,14 +75,13 @@ static struct hlist_node *drm_ht_find_key(struct drm_open_hash *ht,
 {
 	struct drm_hash_item *entry;
 	struct hlist_head *h_list;
-	struct hlist_node *list;
 	unsigned int hashed_key;
 
 	hashed_key = hash_long(key, ht->order);
 	h_list = &ht->table[hashed_key];
-	hlist_for_each_entry(entry, list, h_list, head) {
+	hlist_for_each_entry(entry, h_list, head) {
 		if (entry->key == key)
-			return list;
+			return &entry->head;
 		if (entry->key > key)
 			break;
 	}
@@ -95,14 +93,13 @@ static struct hlist_node *drm_ht_find_key_rcu(struct drm_open_hash *ht,
 {
 	struct drm_hash_item *entry;
 	struct hlist_head *h_list;
-	struct hlist_node *list;
 	unsigned int hashed_key;
 
 	hashed_key = hash_long(key, ht->order);
 	h_list = &ht->table[hashed_key];
-	hlist_for_each_entry_rcu(entry, list, h_list, head) {
+	hlist_for_each_entry_rcu(entry, h_list, head) {
 		if (entry->key == key)
-			return list;
+			return &entry->head;
 		if (entry->key > key)
 			break;
 	}
@@ -113,19 +110,19 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 {
 	struct drm_hash_item *entry;
 	struct hlist_head *h_list;
-	struct hlist_node *list, *parent;
+	struct hlist_node *parent;
 	unsigned int hashed_key;
 	unsigned long key = item->key;
 
 	hashed_key = hash_long(key, ht->order);
 	h_list = &ht->table[hashed_key];
 	parent = NULL;
-	hlist_for_each_entry(entry, list, h_list, head) {
+	hlist_for_each_entry(entry, h_list, head) {
 		if (entry->key == key)
 			return -EINVAL;
 		if (entry->key > key)
 			break;
-		parent = list;
+		parent = &entry->head;
 	}
 	if (parent) {
 		hlist_add_after_rcu(parent, &item->head);
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 200e104f1fa0..7d30802a018f 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -109,7 +109,6 @@ EXPORT_SYMBOL(drm_ut_debug_printk);
 
 static int drm_minor_get_id(struct drm_device *dev, int type)
 {
-	int new_id;
 	int ret;
 	int base = 0, limit = 63;
 
@@ -121,25 +120,11 @@ static int drm_minor_get_id(struct drm_device *dev, int type)
                 limit = base + 255;
         }
 
-again:
-	if (idr_pre_get(&drm_minors_idr, GFP_KERNEL) == 0) {
-		DRM_ERROR("Out of memory expanding drawable idr\n");
-		return -ENOMEM;
-	}
 	mutex_lock(&dev->struct_mutex);
-	ret = idr_get_new_above(&drm_minors_idr, NULL,
-				base, &new_id);
+	ret = idr_alloc(&drm_minors_idr, NULL, base, limit, GFP_KERNEL);
 	mutex_unlock(&dev->struct_mutex);
-	if (ret == -EAGAIN)
-		goto again;
-	else if (ret)
-		return ret;
 
-	if (new_id >= limit) {
-		idr_remove(&drm_minors_idr, new_id);
-		return -EINVAL;
-	}
-	return new_id;
+	return ret == -ENOSPC ? -EINVAL : ret;
 }
 
 struct drm_master *drm_master_create(struct drm_minor *minor)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index 1a556354e92f..1adce07ecb5b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -137,21 +137,15 @@ static int ipp_create_id(struct idr *id_idr, struct mutex *lock, void *obj,
 
 	DRM_DEBUG_KMS("%s\n", __func__);
 
-again:
-	/* ensure there is space available to allocate a handle */
-	if (idr_pre_get(id_idr, GFP_KERNEL) == 0) {
-		DRM_ERROR("failed to get idr.\n");
-		return -ENOMEM;
-	}
-
 	/* do the allocation under our mutexlock */
 	mutex_lock(lock);
-	ret = idr_get_new_above(id_idr, obj, 1, (int *)idp);
+	ret = idr_alloc(id_idr, obj, 1, 0, GFP_KERNEL);
 	mutex_unlock(lock);
-	if (ret == -EAGAIN)
-		goto again;
+	if (ret < 0)
+		return ret;
 
-	return ret;
+	*idp = ret;
+	return 0;
 }
 
 static void *ipp_find_obj(struct idr *id_idr, struct mutex *lock, u32 id)
@@ -1786,8 +1780,6 @@ err_iommu:
 			drm_iommu_detach_device(drm_dev, ippdrv->dev);
 
 err_idr:
-	idr_remove_all(&ctx->ipp_idr);
-	idr_remove_all(&ctx->prop_idr);
 	idr_destroy(&ctx->ipp_idr);
 	idr_destroy(&ctx->prop_idr);
 	return ret;
@@ -1965,8 +1957,6 @@ static int ipp_remove(struct platform_device *pdev)
 	exynos_drm_subdrv_unregister(&ctx->subdrv);
 
 	/* remove,destroy ipp idr */
-	idr_remove_all(&ctx->ipp_idr);
-	idr_remove_all(&ctx->prop_idr);
 	idr_destroy(&ctx->ipp_idr);
 	idr_destroy(&ctx->prop_idr);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 21177d9df423..94d873a6cffb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -139,7 +139,7 @@ create_hw_context(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_context *ctx;
-	int ret, id;
+	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx == NULL)
@@ -164,22 +164,11 @@ create_hw_context(struct drm_device *dev,
 
 	ctx->file_priv = file_priv;
 
-again:
-	if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) {
-		ret = -ENOMEM;
-		DRM_DEBUG_DRIVER("idr allocation failed\n");
-		goto err_out;
-	}
-
-	ret = idr_get_new_above(&file_priv->context_idr, ctx,
-				DEFAULT_CONTEXT_ID + 1, &id);
-	if (ret == 0)
-		ctx->id = id;
-
-	if (ret == -EAGAIN)
-		goto again;
-	else if (ret)
+	ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID + 1, 0,
+			GFP_KERNEL);
+	if (ret < 0)
 		goto err_out;
+	ctx->id = ret;
 
 	return ctx;
 
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index 841065b998a1..5a5325e6b759 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -58,7 +58,6 @@ static int sis_driver_unload(struct drm_device *dev)
 {
 	drm_sis_private_t *dev_priv = dev->dev_private;
 
-	idr_remove_all(&dev_priv->object_idr);
 	idr_destroy(&dev_priv->object_idr);
 
 	kfree(dev_priv);
diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 2b2f78c428af..9a43d98e5003 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c
@@ -128,17 +128,10 @@ static int sis_drm_alloc(struct drm_device *dev, struct drm_file *file,
 	if (retval)
 		goto fail_alloc;
 
-again:
-	if (idr_pre_get(&dev_priv->object_idr, GFP_KERNEL) == 0) {
-		retval = -ENOMEM;
-		goto fail_idr;
-	}
-
-	retval = idr_get_new_above(&dev_priv->object_idr, item, 1, &user_key);
-	if (retval == -EAGAIN)
-		goto again;
-	if (retval)
+	retval = idr_alloc(&dev_priv->object_idr, item, 1, 0, GFP_KERNEL);
+	if (retval < 0)
 		goto fail_idr;
+	user_key = retval;
 
 	list_add(&item->owner_list, &file_priv->obj_list);
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/via/via_map.c b/drivers/gpu/drm/via/via_map.c
index c0f1cc7f5ca9..d0ab3fb32acd 100644
--- a/drivers/gpu/drm/via/via_map.c
+++ b/drivers/gpu/drm/via/via_map.c
@@ -120,7 +120,6 @@ int via_driver_unload(struct drm_device *dev)
 {
 	drm_via_private_t *dev_priv = dev->dev_private;
 
-	idr_remove_all(&dev_priv->object_idr);
 	idr_destroy(&dev_priv->object_idr);
 
 	kfree(dev_priv);
diff --git a/drivers/gpu/drm/via/via_mm.c b/drivers/gpu/drm/via/via_mm.c
index 0d55432e02a2..0ab93ff09873 100644
--- a/drivers/gpu/drm/via/via_mm.c
+++ b/drivers/gpu/drm/via/via_mm.c
@@ -148,17 +148,10 @@ int via_mem_alloc(struct drm_device *dev, void *data,
 	if (retval)
 		goto fail_alloc;
 
-again:
-	if (idr_pre_get(&dev_priv->object_idr, GFP_KERNEL) == 0) {
-		retval = -ENOMEM;
-		goto fail_idr;
-	}
-
-	retval = idr_get_new_above(&dev_priv->object_idr, item, 1, &user_key);
-	if (retval == -EAGAIN)
-		goto again;
-	if (retval)
+	retval = idr_alloc(&dev_priv->object_idr, item, 1, 0, GFP_KERNEL);
+	if (retval < 0)
 		goto fail_idr;
+	user_key = retval;
 
 	list_add(&item->owner_list, &file_priv->obj_list);
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 16556170fb32..bc784254e78e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -177,17 +177,16 @@ int vmw_resource_alloc_id(struct vmw_resource *res)
 
 	BUG_ON(res->id != -1);
 
-	do {
-		if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
-			return -ENOMEM;
-
-		write_lock(&dev_priv->resource_lock);
-		ret = idr_get_new_above(idr, res, 1, &res->id);
-		write_unlock(&dev_priv->resource_lock);
+	idr_preload(GFP_KERNEL);
+	write_lock(&dev_priv->resource_lock);
 
-	} while (ret == -EAGAIN);
+	ret = idr_alloc(idr, res, 1, 0, GFP_NOWAIT);
+	if (ret >= 0)
+		res->id = ret;
 
-	return ret;
+	write_unlock(&dev_priv->resource_lock);
+	idr_preload_end();
+	return ret < 0 ? ret : 0;
 }
 
 /**
diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c
index 2d58f939d27f..833dd1afbf46 100644
--- a/drivers/hsi/hsi.c
+++ b/drivers/hsi/hsi.c
@@ -420,7 +420,7 @@ static int hsi_event_notifier_call(struct notifier_block *nb,
 /**
  * hsi_register_port_event - Register a client to receive port events
  * @cl: HSI client that wants to receive port events
- * @cb: Event handler callback
+ * @handler: Event handler callback
  *
  * Clients should register a callback to be able to receive
  * events from the ports. Registration should happen after
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 53a8600162a5..ff1be167eb04 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -318,7 +318,7 @@ static u32 get_vp_index(uuid_le *type_guid)
 		return 0;
 	}
 	cur_cpu = (++next_vp % max_cpus);
-	return 0;
+	return cur_cpu;
 }
 
 /*
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 1c5481da6e4a..731158910c1e 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -272,7 +272,7 @@ u16 hv_signal_event(void *con_id)
  * retrieve the initialized message and event pages.  Otherwise, we create and
  * initialize the message and event pages.
  */
-void hv_synic_init(void *irqarg)
+void hv_synic_init(void *arg)
 {
 	u64 version;
 	union hv_synic_simp simp;
@@ -281,7 +281,6 @@ void hv_synic_init(void *irqarg)
 	union hv_synic_scontrol sctrl;
 	u64 vp_index;
 
-	u32 irq_vector = *((u32 *)(irqarg));
 	int cpu = smp_processor_id();
 
 	if (!hv_context.hypercall_page)
@@ -335,7 +334,7 @@ void hv_synic_init(void *irqarg)
 	rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 
 	shared_sint.as_uint64 = 0;
-	shared_sint.vector = irq_vector; /* HV_SHARED_SINT_IDT_VECTOR + 0x20; */
+	shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR;
 	shared_sint.masked = false;
 	shared_sint.auto_eoi = true;
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index cf19dfa5ead1..bf421e0efa1e 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -36,6 +36,7 @@
 #include <linux/kernel_stat.h>
 #include <asm/hyperv.h>
 #include <asm/hypervisor.h>
+#include <asm/mshyperv.h>
 #include "hyperv_vmbus.h"
 
 
@@ -528,7 +529,6 @@ static void vmbus_flow_handler(unsigned int irq, struct irq_desc *desc)
 static int vmbus_bus_init(int irq)
 {
 	int ret;
-	unsigned int vector;
 
 	/* Hypervisor initialization...setup hypercall page..etc */
 	ret = hv_init();
@@ -558,13 +558,16 @@ static int vmbus_bus_init(int irq)
 	 */
 	irq_set_handler(irq, vmbus_flow_handler);
 
-	vector = IRQ0_VECTOR + irq;
+	/*
+	 * Register our interrupt handler.
+	 */
+	hv_register_vmbus_handler(irq, vmbus_isr);
 
 	/*
-	 * Notify the hypervisor of our irq and
+	 * Initialize the per-cpu interrupt state and
 	 * connect to the host.
 	 */
-	on_each_cpu(hv_synic_init, (void *)&vector, 1);
+	on_each_cpu(hv_synic_init, NULL, 1);
 	ret = vmbus_connect();
 	if (ret)
 		goto err_irq;
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 66a30f7ac882..991d38daa87d 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -935,25 +935,17 @@ out_list:
  */
 int i2c_add_adapter(struct i2c_adapter *adapter)
 {
-	int	id, res = 0;
-
-retry:
-	if (idr_pre_get(&i2c_adapter_idr, GFP_KERNEL) == 0)
-		return -ENOMEM;
+	int id;
 
 	mutex_lock(&core_lock);
-	/* "above" here means "above or equal to", sigh */
-	res = idr_get_new_above(&i2c_adapter_idr, adapter,
-				__i2c_first_dynamic_bus_num, &id);
+	id = idr_alloc(&i2c_adapter_idr, adapter,
+		       __i2c_first_dynamic_bus_num, 0, GFP_KERNEL);
 	mutex_unlock(&core_lock);
-
-	if (res < 0) {
-		if (res == -EAGAIN)
-			goto retry;
-		return res;
-	}
+	if (id < 0)
+		return id;
 
 	adapter->nr = id;
+
 	return i2c_register_adapter(adapter);
 }
 EXPORT_SYMBOL(i2c_add_adapter);
@@ -984,33 +976,17 @@ EXPORT_SYMBOL(i2c_add_adapter);
 int i2c_add_numbered_adapter(struct i2c_adapter *adap)
 {
 	int	id;
-	int	status;
 
 	if (adap->nr == -1) /* -1 means dynamically assign bus id */
 		return i2c_add_adapter(adap);
-	if (adap->nr & ~MAX_IDR_MASK)
-		return -EINVAL;
-
-retry:
-	if (idr_pre_get(&i2c_adapter_idr, GFP_KERNEL) == 0)
-		return -ENOMEM;
 
 	mutex_lock(&core_lock);
-	/* "above" here means "above or equal to", sigh;
-	 * we need the "equal to" result to force the result
-	 */
-	status = idr_get_new_above(&i2c_adapter_idr, adap, adap->nr, &id);
-	if (status == 0 && id != adap->nr) {
-		status = -EBUSY;
-		idr_remove(&i2c_adapter_idr, id);
-	}
+	id = idr_alloc(&i2c_adapter_idr, adap, adap->nr, adap->nr + 1,
+		       GFP_KERNEL);
 	mutex_unlock(&core_lock);
-	if (status == -EAGAIN)
-		goto retry;
-
-	if (status == 0)
-		status = i2c_register_adapter(adap);
-	return status;
+	if (id < 0)
+		return id == -ENOSPC ? -EBUSY : id;
+	return i2c_register_adapter(adap);
 }
 EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter);
 
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 394fea2ba1bc..784b97cb05b0 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -382,20 +382,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
 	unsigned long flags;
-	int ret, id;
+	int id;
 	static int next_id;
 
-	do {
-		spin_lock_irqsave(&cm.lock, flags);
-		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
-					next_id, &id);
-		if (!ret)
-			next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
-		spin_unlock_irqrestore(&cm.lock, flags);
-	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+	idr_preload(GFP_KERNEL);
+	spin_lock_irqsave(&cm.lock, flags);
+
+	id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT);
+	if (id >= 0)
+		next_id = max(id + 1, 0);
+
+	spin_unlock_irqrestore(&cm.lock, flags);
+	idr_preload_end();
 
 	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
-	return ret;
+	return id < 0 ? id : 0;
 }
 
 static void cm_free_id(__be32 local_id)
@@ -3844,7 +3845,6 @@ static int __init ib_cm_init(void)
 	cm.remote_sidr_table = RB_ROOT;
 	idr_init(&cm.local_id_table);
 	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
-	idr_pre_get(&cm.local_id_table, GFP_KERNEL);
 	INIT_LIST_HEAD(&cm.timewait_list);
 
 	ret = class_register(&cm_class);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d789eea32168..71c2c7116802 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2143,33 +2143,23 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
 			  unsigned short snum)
 {
 	struct rdma_bind_list *bind_list;
-	int port, ret;
+	int ret;
 
 	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
 	if (!bind_list)
 		return -ENOMEM;
 
-	do {
-		ret = idr_get_new_above(ps, bind_list, snum, &port);
-	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
-
-	if (ret)
-		goto err1;
-
-	if (port != snum) {
-		ret = -EADDRNOTAVAIL;
-		goto err2;
-	}
+	ret = idr_alloc(ps, bind_list, snum, snum + 1, GFP_KERNEL);
+	if (ret < 0)
+		goto err;
 
 	bind_list->ps = ps;
-	bind_list->port = (unsigned short) port;
+	bind_list->port = (unsigned short)ret;
 	cma_bind_port(bind_list, id_priv);
 	return 0;
-err2:
-	idr_remove(ps, port);
-err1:
+err:
 	kfree(bind_list);
-	return ret;
+	return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
 }
 
 static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
@@ -2214,10 +2204,9 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
 {
 	struct rdma_id_private *cur_id;
 	struct sockaddr *addr, *cur_addr;
-	struct hlist_node *node;
 
 	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
+	hlist_for_each_entry(cur_id, &bind_list->owners, node) {
 		if (id_priv == cur_id)
 			continue;
 
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 176c8f90f2bb..9f5ad7cc33c8 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -118,14 +118,13 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
 {
 	struct hlist_head *bucket;
 	struct ib_pool_fmr *fmr;
-	struct hlist_node *pos;
 
 	if (!pool->cache_bucket)
 		return NULL;
 
 	bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
 
-	hlist_for_each_entry(fmr, pos, bucket, cache_node)
+	hlist_for_each_entry(fmr, bucket, cache_node)
 		if (io_virtual_address == fmr->io_virtual_address &&
 		    page_list_len      == fmr->page_list_len      &&
 		    !memcmp(page_list, fmr->page_list,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a8905abc56e4..934f45e79e5e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -611,19 +611,21 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 
 static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
 {
+	bool preload = gfp_mask & __GFP_WAIT;
 	unsigned long flags;
 	int ret, id;
 
-retry:
-	if (!idr_pre_get(&query_idr, gfp_mask))
-		return -ENOMEM;
+	if (preload)
+		idr_preload(gfp_mask);
 	spin_lock_irqsave(&idr_lock, flags);
-	ret = idr_get_new(&query_idr, query, &id);
+
+	id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT);
+
 	spin_unlock_irqrestore(&idr_lock, flags);
-	if (ret == -EAGAIN)
-		goto retry;
-	if (ret)
-		return ret;
+	if (preload)
+		idr_preload_end();
+	if (id < 0)
+		return id;
 
 	query->mad_buf->timeout_ms  = timeout_ms;
 	query->mad_buf->context[0] = query;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 49b15ac1987e..f2f63933e8a9 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -176,7 +176,6 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
 static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
 {
 	struct ib_ucm_context *ctx;
-	int result;
 
 	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
 	if (!ctx)
@@ -187,17 +186,10 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
 	ctx->file = file;
 	INIT_LIST_HEAD(&ctx->events);
 
-	do {
-		result = idr_pre_get(&ctx_id_table, GFP_KERNEL);
-		if (!result)
-			goto error;
-
-		mutex_lock(&ctx_id_mutex);
-		result = idr_get_new(&ctx_id_table, ctx, &ctx->id);
-		mutex_unlock(&ctx_id_mutex);
-	} while (result == -EAGAIN);
-
-	if (result)
+	mutex_lock(&ctx_id_mutex);
+	ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL);
+	mutex_unlock(&ctx_id_mutex);
+	if (ctx->id < 0)
 		goto error;
 
 	list_add_tail(&ctx->file_list, &file->ctxs);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 2709ff581392..5ca44cd9b00c 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -145,7 +145,6 @@ static void ucma_put_ctx(struct ucma_context *ctx)
 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 {
 	struct ucma_context *ctx;
-	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -156,17 +155,10 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 	INIT_LIST_HEAD(&ctx->mc_list);
 	ctx->file = file;
 
-	do {
-		ret = idr_pre_get(&ctx_idr, GFP_KERNEL);
-		if (!ret)
-			goto error;
-
-		mutex_lock(&mut);
-		ret = idr_get_new(&ctx_idr, ctx, &ctx->id);
-		mutex_unlock(&mut);
-	} while (ret == -EAGAIN);
-
-	if (ret)
+	mutex_lock(&mut);
+	ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
+	mutex_unlock(&mut);
+	if (ctx->id < 0)
 		goto error;
 
 	list_add_tail(&ctx->list, &file->ctx_list);
@@ -180,23 +172,15 @@ error:
 static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
 {
 	struct ucma_multicast *mc;
-	int ret;
 
 	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
 	if (!mc)
 		return NULL;
 
-	do {
-		ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
-		if (!ret)
-			goto error;
-
-		mutex_lock(&mut);
-		ret = idr_get_new(&multicast_idr, mc, &mc->id);
-		mutex_unlock(&mut);
-	} while (ret == -EAGAIN);
-
-	if (ret)
+	mutex_lock(&mut);
+	mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL);
+	mutex_unlock(&mut);
+	if (mc->id < 0)
 		goto error;
 
 	mc->ctx = ctx;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index e71d834c922a..a7d00f6b3bc1 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -125,18 +125,17 @@ static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
 {
 	int ret;
 
-retry:
-	if (!idr_pre_get(idr, GFP_KERNEL))
-		return -ENOMEM;
-
+	idr_preload(GFP_KERNEL);
 	spin_lock(&ib_uverbs_idr_lock);
-	ret = idr_get_new(idr, uobj, &uobj->id);
-	spin_unlock(&ib_uverbs_idr_lock);
 
-	if (ret == -EAGAIN)
-		goto retry;
+	ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
+	if (ret >= 0)
+		uobj->id = ret;
 
-	return ret;
+	spin_unlock(&ib_uverbs_idr_lock);
+	idr_preload_end();
+
+	return ret < 0 ? ret : 0;
 }
 
 void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
index 28cd5cb51859..0ab826b280b2 100644
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -382,14 +382,17 @@ static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
 {
 	int ret;
 
-        do {
-		spin_lock_irq(&c2dev->qp_table.lock);
-		ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
-					c2dev->qp_table.last++, &qp->qpn);
-		spin_unlock_irq(&c2dev->qp_table.lock);
-        } while ((ret == -EAGAIN) &&
-	 	 idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
-	return ret;
+	idr_preload(GFP_KERNEL);
+	spin_lock_irq(&c2dev->qp_table.lock);
+
+	ret = idr_alloc(&c2dev->qp_table.idr, qp, c2dev->qp_table.last++, 0,
+			GFP_NOWAIT);
+	if (ret >= 0)
+		qp->qpn = ret;
+
+	spin_unlock_irq(&c2dev->qp_table.lock);
+	idr_preload_end();
+	return ret < 0 ? ret : 0;
 }
 
 static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index a1c44578e039..837862287a29 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -153,19 +153,17 @@ static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
 				void *handle, u32 id)
 {
 	int ret;
-	int newid;
-
-	do {
-		if (!idr_pre_get(idr, GFP_KERNEL)) {
-			return -ENOMEM;
-		}
-		spin_lock_irq(&rhp->lock);
-		ret = idr_get_new_above(idr, handle, id, &newid);
-		BUG_ON(newid != id);
-		spin_unlock_irq(&rhp->lock);
-	} while (ret == -EAGAIN);
-
-	return ret;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock_irq(&rhp->lock);
+
+	ret = idr_alloc(idr, handle, id, id + 1, GFP_NOWAIT);
+
+	spin_unlock_irq(&rhp->lock);
+	idr_preload_end();
+
+	BUG_ON(ret == -ENOSPC);
+	return ret < 0 ? ret : 0;
 }
 
 static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 4c07fc069766..7eec5e13fa8c 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -260,20 +260,21 @@ static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
 				 void *handle, u32 id, int lock)
 {
 	int ret;
-	int newid;
 
-	do {
-		if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC))
-			return -ENOMEM;
-		if (lock)
-			spin_lock_irq(&rhp->lock);
-		ret = idr_get_new_above(idr, handle, id, &newid);
-		BUG_ON(!ret && newid != id);
-		if (lock)
-			spin_unlock_irq(&rhp->lock);
-	} while (ret == -EAGAIN);
-
-	return ret;
+	if (lock) {
+		idr_preload(GFP_KERNEL);
+		spin_lock_irq(&rhp->lock);
+	}
+
+	ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC);
+
+	if (lock) {
+		spin_unlock_irq(&rhp->lock);
+		idr_preload_end();
+	}
+
+	BUG_ON(ret == -ENOSPC);
+	return ret < 0 ? ret : 0;
 }
 
 static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 8f5290147e8a..212150c25ea0 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -128,7 +128,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
 	void *vpage;
 	u32 counter;
 	u64 rpage, cqx_fec, h_ret;
-	int ipz_rc, ret, i;
+	int ipz_rc, i;
 	unsigned long flags;
 
 	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
@@ -163,32 +163,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
 	adapter_handle = shca->ipz_hca_handle;
 	param.eq_handle = shca->eq.ipz_eq_handle;
 
-	do {
-		if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
-			cq = ERR_PTR(-ENOMEM);
-			ehca_err(device, "Can't reserve idr nr. device=%p",
-				 device);
-			goto create_cq_exit1;
-		}
-
-		write_lock_irqsave(&ehca_cq_idr_lock, flags);
-		ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
-		write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-	} while (ret == -EAGAIN);
+	idr_preload(GFP_KERNEL);
+	write_lock_irqsave(&ehca_cq_idr_lock, flags);
+	my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT);
+	write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+	idr_preload_end();
 
-	if (ret) {
+	if (my_cq->token < 0) {
 		cq = ERR_PTR(-ENOMEM);
 		ehca_err(device, "Can't allocate new idr entry. device=%p",
 			 device);
 		goto create_cq_exit1;
 	}
 
-	if (my_cq->token > 0x1FFFFFF) {
-		cq = ERR_PTR(-ENOMEM);
-		ehca_err(device, "Invalid number of cq. device=%p", device);
-		goto create_cq_exit2;
-	}
-
 	/*
 	 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
 	 * for receiving errors CQEs.
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 149393915ae5..00d6861a6a18 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -636,30 +636,26 @@ static struct ehca_qp *internal_create_qp(
 		my_qp->send_cq =
 			container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
 
-	do {
-		if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
-			ret = -ENOMEM;
-			ehca_err(pd->device, "Can't reserve idr resources.");
-			goto create_qp_exit0;
-		}
+	idr_preload(GFP_KERNEL);
+	write_lock_irqsave(&ehca_qp_idr_lock, flags);
 
-		write_lock_irqsave(&ehca_qp_idr_lock, flags);
-		ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
-		write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-	} while (ret == -EAGAIN);
+	ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT);
+	if (ret >= 0)
+		my_qp->token = ret;
 
-	if (ret) {
-		ret = -ENOMEM;
-		ehca_err(pd->device, "Can't allocate new idr entry.");
+	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+	idr_preload_end();
+	if (ret < 0) {
+		if (ret == -ENOSPC) {
+			ret = -EINVAL;
+			ehca_err(pd->device, "Invalid number of qp");
+		} else {
+			ret = -ENOMEM;
+			ehca_err(pd->device, "Can't allocate new idr entry.");
+		}
 		goto create_qp_exit0;
 	}
 
-	if (my_qp->token > 0x1FFFFFF) {
-		ret = -EINVAL;
-		ehca_err(pd->device, "Invalid number of qp");
-		goto create_qp_exit1;
-	}
-
 	if (has_srq)
 		parms.srq_token = my_qp->token;
 
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 7b371f545ece..bd0caedafe99 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -194,11 +194,6 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
 	struct ipath_devdata *dd;
 	int ret;
 
-	if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
-		dd = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
 	dd = vzalloc(sizeof(*dd));
 	if (!dd) {
 		dd = ERR_PTR(-ENOMEM);
@@ -206,9 +201,10 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
 	}
 	dd->ipath_unit = -1;
 
+	idr_preload(GFP_KERNEL);
 	spin_lock_irqsave(&ipath_devs_lock, flags);
 
-	ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
+	ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT);
 	if (ret < 0) {
 		printk(KERN_ERR IPATH_DRV_NAME
 		       ": Could not allocate unit ID: error %d\n", -ret);
@@ -216,6 +212,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
 		dd = ERR_PTR(ret);
 		goto bail_unlock;
 	}
+	dd->ipath_unit = ret;
 
 	dd->pcidev = pdev;
 	pci_set_drvdata(pdev, dd);
@@ -224,7 +221,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
 
 bail_unlock:
 	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
+	idr_preload_end();
 bail:
 	return dd;
 }
@@ -2503,11 +2500,6 @@ static int __init infinipath_init(void)
 	 * the PCI subsystem.
 	 */
 	idr_init(&unit_table);
-	if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
-		printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n");
-		ret = -ENOMEM;
-		goto bail;
-	}
 
 	ret = pci_register_driver(&ipath_driver);
 	if (ret < 0) {
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index dbc99d41605c..e0d79b2395e4 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -203,7 +203,7 @@ static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
 static struct id_map_entry *
 id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
 {
-	int ret, id;
+	int ret;
 	static int next_id;
 	struct id_map_entry *ent;
 	struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
@@ -220,25 +220,23 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
 	ent->dev = to_mdev(ibdev);
 	INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
 
-	do {
-		spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
-		ret = idr_get_new_above(&sriov->pv_id_table, ent,
-					next_id, &id);
-		if (!ret) {
-			next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
-			ent->pv_cm_id = (u32)id;
-			sl_id_map_add(ibdev, ent);
-		}
+	idr_preload(GFP_KERNEL);
+	spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
 
-		spin_unlock(&sriov->id_map_lock);
-	} while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
-	/*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
-	if (!ret) {
-		spin_lock(&sriov->id_map_lock);
+	ret = idr_alloc(&sriov->pv_id_table, ent, next_id, 0, GFP_NOWAIT);
+	if (ret >= 0) {
+		next_id = max(ret + 1, 0);
+		ent->pv_cm_id = (u32)ret;
+		sl_id_map_add(ibdev, ent);
 		list_add_tail(&ent->list, &sriov->cm_list);
-		spin_unlock(&sriov->id_map_lock);
-		return ent;
 	}
+
+	spin_unlock(&sriov->id_map_lock);
+	idr_preload_end();
+
+	if (ret >= 0)
+		return ent;
+
 	/*error flow*/
 	kfree(ent);
 	mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index c4e0131f1b57..48928c8e7774 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -51,18 +51,6 @@ static DEFINE_IDR(ocrdma_dev_id);
 
 static union ib_gid ocrdma_zero_sgid;
 
-static int ocrdma_get_instance(void)
-{
-	int instance = 0;
-
-	/* Assign an unused number */
-	if (!idr_pre_get(&ocrdma_dev_id, GFP_KERNEL))
-		return -1;
-	if (idr_get_new(&ocrdma_dev_id, NULL, &instance))
-		return -1;
-	return instance;
-}
-
 void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
 {
 	u8 mac_addr[6];
@@ -416,7 +404,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 		goto idr_err;
 
 	memcpy(&dev->nic_info, dev_info, sizeof(*dev_info));
-	dev->id = ocrdma_get_instance();
+	dev->id = idr_alloc(&ocrdma_dev_id, NULL, 0, 0, GFP_KERNEL);
 	if (dev->id < 0)
 		goto idr_err;
 
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index ddf066d9abb6..50e33aa0b4e3 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1060,22 +1060,23 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
 	struct qib_devdata *dd;
 	int ret;
 
-	if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) {
-		dd = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
 	dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
 	if (!dd) {
 		dd = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
 
+	idr_preload(GFP_KERNEL);
 	spin_lock_irqsave(&qib_devs_lock, flags);
-	ret = idr_get_new(&qib_unit_table, dd, &dd->unit);
-	if (ret >= 0)
+
+	ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT);
+	if (ret >= 0) {
+		dd->unit = ret;
 		list_add(&dd->list, &qib_dev_list);
+	}
+
 	spin_unlock_irqrestore(&qib_devs_lock, flags);
+	idr_preload_end();
 
 	if (ret < 0) {
 		qib_early_err(&pdev->dev,
@@ -1180,11 +1181,6 @@ static int __init qlogic_ib_init(void)
 	 * the PCI subsystem.
 	 */
 	idr_init(&qib_unit_table);
-	if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) {
-		pr_err("idr_pre_get() failed\n");
-		ret = -ENOMEM;
-		goto bail_cq_wq;
-	}
 
 	ret = pci_register_driver(&qib_driver);
 	if (ret < 0) {
@@ -1199,7 +1195,6 @@ static int __init qlogic_ib_init(void)
 
 bail_unit:
 	idr_destroy(&qib_unit_table);
-bail_cq_wq:
 	destroy_workqueue(qib_cq_wq);
 bail_dev:
 	qib_dev_cleanup();
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c
index 0b4f54265f62..2e3334b8f82d 100644
--- a/drivers/input/misc/hp_sdc_rtc.c
+++ b/drivers/input/misc/hp_sdc_rtc.c
@@ -109,7 +109,9 @@ static int hp_sdc_rtc_do_read_bbrtc (struct rtc_time *rtctm)
 	
 	if (hp_sdc_enqueue_transaction(&t)) return -1;
 	
-	down_interruptible(&tsem);  /* Put ourselves to sleep for results. */
+	/* Put ourselves to sleep for results. */
+	if (WARN_ON(down_interruptible(&tsem)))
+		return -1;
 	
 	/* Check for nonpresence of BBRTC */
 	if (!((tseq[83] | tseq[90] | tseq[69] | tseq[76] |
@@ -176,11 +178,16 @@ static int64_t hp_sdc_rtc_read_i8042timer (uint8_t loadcmd, int numreg)
 	t.seq =			tseq;
 	t.act.semaphore =	&i8042tregs;
 
-	down_interruptible(&i8042tregs);  /* Sleep if output regs in use. */
+	/* Sleep if output regs in use. */
+	if (WARN_ON(down_interruptible(&i8042tregs)))
+		return -1;
 
 	if (hp_sdc_enqueue_transaction(&t)) return -1;
 	
-	down_interruptible(&i8042tregs);  /* Sleep until results come back. */
+	/* Sleep until results come back. */
+	if (WARN_ON(down_interruptible(&i8042tregs)))
+		return -1;
+
 	up(&i8042tregs);
 
 	return (tseq[5] | 
@@ -276,6 +283,7 @@ static inline int hp_sdc_rtc_read_ct(struct timeval *res) {
 }
 
 
+#if 0 /* not used yet */
 /* Set the i8042 real-time clock */
 static int hp_sdc_rtc_set_rt (struct timeval *setto)
 {
@@ -386,6 +394,7 @@ static int hp_sdc_rtc_set_i8042timer (struct timeval *setto, uint8_t setcmd)
 	}
 	return 0;
 }
+#endif
 
 static ssize_t hp_sdc_rtc_read(struct file *file, char __user *buf,
 			       size_t count, loff_t *ppos) {
diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig
index 6e9cc765e0dc..3ec5ef2dd443 100644
--- a/drivers/input/serio/Kconfig
+++ b/drivers/input/serio/Kconfig
@@ -22,7 +22,7 @@ config SERIO_I8042
 	tristate "i8042 PC Keyboard controller" if EXPERT || !X86
 	default y
 	depends on !PARISC && (!ARM || ARCH_SHARK || FOOTBRIDGE_HOST) && \
-		   (!SUPERH || SH_CAYMAN) && !M68K && !BLACKFIN
+		   (!SUPERH || SH_CAYMAN) && !M68K && !BLACKFIN && !S390
 	help
 	  i8042 is the chip over which the standard AT keyboard and PS/2
 	  mouse are connected to the computer. If you use these devices,
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index e65fbf2cdf71..98e3b87bdf1b 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -2,6 +2,8 @@ obj-$(CONFIG_IRQCHIP)			+= irqchip.o
 
 obj-$(CONFIG_ARCH_BCM2835)		+= irq-bcm2835.o
 obj-$(CONFIG_ARCH_EXYNOS)		+= exynos-combiner.o
+obj-$(CONFIG_METAG)			+= irq-metag-ext.o
+obj-$(CONFIG_METAG_PERFCOUNTER_IRQS)	+= irq-metag.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sunxi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
 obj-$(CONFIG_ARM_GIC)			+= irq-gic.o
diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c
new file mode 100644
index 000000000000..92c41ab4dbfd
--- /dev/null
+++ b/drivers/irqchip/irq-metag-ext.c
@@ -0,0 +1,868 @@
+/*
+ * Meta External interrupt code.
+ *
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * External interrupts on Meta are configured at two-levels, in the CPU core and
+ * in the external trigger block. Interrupts from SoC peripherals are
+ * multiplexed onto a single Meta CPU "trigger" - traditionally it has always
+ * been trigger 2 (TR2). For info on how de-multiplexing happens check out
+ * meta_intc_irq_demux().
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqdomain.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define HWSTAT_STRIDE 8
+#define HWVEC_BLK_STRIDE 0x1000
+
+/**
+ * struct meta_intc_priv - private meta external interrupt data
+ * @nr_banks:		Number of interrupt banks
+ * @domain:		IRQ domain for all banks of external IRQs
+ * @unmasked:		Record of unmasked IRQs
+ * @levels_altered:	Record of altered level bits
+ */
+struct meta_intc_priv {
+	unsigned int		nr_banks;
+	struct irq_domain	*domain;
+
+	unsigned long		unmasked[4];
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+	unsigned long		levels_altered[4];
+#endif
+};
+
+/* Private data for the one and only external interrupt controller */
+static struct meta_intc_priv meta_intc_priv;
+
+/**
+ * meta_intc_offset() - Get the offset into the bank of a hardware IRQ number
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Bit offset into the IRQ's bank registers
+ */
+static unsigned int meta_intc_offset(irq_hw_number_t hw)
+{
+	return hw & 0x1f;
+}
+
+/**
+ * meta_intc_bank() - Get the bank number of a hardware IRQ number
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Bank number indicating which register the IRQ's bits are
+ */
+static unsigned int meta_intc_bank(irq_hw_number_t hw)
+{
+	return hw >> 5;
+}
+
+/**
+ * meta_intc_stat_addr() - Get the address of a HWSTATEXT register
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWSTATEXT register containing the status bit for
+ *		the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_stat_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWSTATEXT +
+				HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_level_addr() - Get the address of a HWLEVELEXT register
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWLEVELEXT register containing the sense bit for
+ *		the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_level_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWLEVELEXT +
+				HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_mask_addr() - Get the address of a HWMASKEXT register
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWMASKEXT register containing the mask bit for the
+ *		specified hardware IRQ number
+ */
+static void __iomem *meta_intc_mask_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWMASKEXT +
+				HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_vec_addr() - Get the vector address of a hardware interrupt
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWVECEXT register controlling the core trigger to
+ *		vector the IRQ onto
+ */
+static inline void __iomem *meta_intc_vec_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWVEC0EXT +
+				HWVEC_BLK_STRIDE * meta_intc_bank(hw) +
+				HWVECnEXT_STRIDE * meta_intc_offset(hw));
+}
+
+/**
+ * meta_intc_startup_irq() - set up an external irq
+ * @data:	data for the external irq to start up
+ *
+ * Multiplex interrupts for irq onto TR2. Clear any pending interrupts and
+ * unmask irq, both using the appropriate callbacks.
+ */
+static unsigned int meta_intc_startup_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	int thread = hard_processor_id();
+
+	/* Perform any necessary acking. */
+	if (data->chip->irq_ack)
+		data->chip->irq_ack(data);
+
+	/* Wire up this interrupt to the core with HWVECxEXT. */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	/* Perform any necessary unmasking. */
+	data->chip->irq_unmask(data);
+
+	return 0;
+}
+
+/**
+ * meta_intc_shutdown_irq() - turn off an external irq
+ * @data:	data for the external irq to turn off
+ *
+ * Mask irq using the appropriate callback and stop muxing it onto TR2.
+ */
+static void meta_intc_shutdown_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+	/* Mask the IRQ */
+	data->chip->irq_mask(data);
+
+	/*
+	 * Disable the IRQ at the core by removing the interrupt from
+	 * the HW vector mapping.
+	 */
+	metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_ack_irq() - acknowledge an external irq
+ * @data:	data for the external irq to ack
+ *
+ * Clear down an edge interrupt in the status register.
+ */
+static void meta_intc_ack_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *stat_addr = meta_intc_stat_addr(hw);
+
+	/* Ack the int, if it is still 'on'.
+	 * NOTE - this only works for edge triggered interrupts.
+	 */
+	if (metag_in32(stat_addr) & bit)
+		metag_out32(bit, stat_addr);
+}
+
+/**
+ * record_irq_is_masked() - record the IRQ masked so it doesn't get handled
+ * @data:	data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is masked (by whichever
+ * callback is used). It records the IRQ masked so that it doesn't get handled
+ * if it still shows up in the status register.
+ */
+static void record_irq_is_masked(struct irq_data *data)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	irq_hw_number_t hw = data->hwirq;
+
+	clear_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/**
+ * record_irq_is_unmasked() - record the IRQ unmasked so it can be handled
+ * @data:	data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is unmasked (by whichever
+ * callback is used). It records the IRQ unmasked so that it gets handled if it
+ * shows up in the status register.
+ */
+static void record_irq_is_unmasked(struct irq_data *data)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	irq_hw_number_t hw = data->hwirq;
+
+	set_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/*
+ * For use by wrapper IRQ drivers
+ */
+
+/**
+ * meta_intc_mask_irq_simple() - minimal mask used by wrapper IRQ drivers
+ * @data:	data for the external irq being masked
+ *
+ * This should be called by any wrapper IRQ driver mask functions. it doesn't do
+ * any masking but records the IRQ as masked so that the core code knows the
+ * mask has taken place. It is the callers responsibility to ensure that the IRQ
+ * won't trigger an interrupt to the core.
+ */
+void meta_intc_mask_irq_simple(struct irq_data *data)
+{
+	record_irq_is_masked(data);
+}
+
+/**
+ * meta_intc_unmask_irq_simple() - minimal unmask used by wrapper IRQ drivers
+ * @data:	data for the external irq being unmasked
+ *
+ * This should be called by any wrapper IRQ driver unmask functions. it doesn't
+ * do any unmasking but records the IRQ as unmasked so that the core code knows
+ * the unmask has taken place. It is the callers responsibility to ensure that
+ * the IRQ can now trigger an interrupt to the core.
+ */
+void meta_intc_unmask_irq_simple(struct irq_data *data)
+{
+	record_irq_is_unmasked(data);
+}
+
+
+/**
+ * meta_intc_mask_irq() - mask an external irq using HWMASKEXT
+ * @data:	data for the external irq to mask
+ *
+ * This is a default implementation of a mask function which makes use of the
+ * HWMASKEXT registers available in newer versions.
+ *
+ * Earlier versions without these registers should use SoC level IRQ masking
+ * which call the meta_intc_*_simple() functions above, or if that isn't
+ * available should use the fallback meta_intc_*_nomask() functions below.
+ */
+static void meta_intc_mask_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *mask_addr = meta_intc_mask_addr(hw);
+	unsigned long flags;
+
+	record_irq_is_masked(data);
+
+	/* update the interrupt mask */
+	__global_lock2(flags);
+	metag_out32(metag_in32(mask_addr) & ~bit, mask_addr);
+	__global_unlock2(flags);
+}
+
+/**
+ * meta_intc_unmask_irq() - unmask an external irq using HWMASKEXT
+ * @data:	data for the external irq to unmask
+ *
+ * This is a default implementation of an unmask function which makes use of the
+ * HWMASKEXT registers available on new versions. It should be paired with
+ * meta_intc_mask_irq() above.
+ */
+static void meta_intc_unmask_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *mask_addr = meta_intc_mask_addr(hw);
+	unsigned long flags;
+
+	record_irq_is_unmasked(data);
+
+	/* update the interrupt mask */
+	__global_lock2(flags);
+	metag_out32(metag_in32(mask_addr) | bit, mask_addr);
+	__global_unlock2(flags);
+}
+
+/**
+ * meta_intc_mask_irq_nomask() - mask an external irq by unvectoring
+ * @data:	data for the external irq to mask
+ *
+ * This is the version of the mask function for older versions which don't have
+ * HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the IRQ is
+ * unvectored from the core and retriggered if necessary later.
+ */
+static void meta_intc_mask_irq_nomask(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+	record_irq_is_masked(data);
+
+	/* there is no interrupt mask, so unvector the interrupt */
+	metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data:	data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to edge interrupts.
+ */
+static void meta_intc_unmask_edge_irq_nomask(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *stat_addr = meta_intc_stat_addr(hw);
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	unsigned int thread = hard_processor_id();
+
+	record_irq_is_unmasked(data);
+
+	/* there is no interrupt mask, so revector the interrupt */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	/*
+	 * Re-trigger interrupt
+	 *
+	 * Writing a 1 toggles, and a 0->1 transition triggers. We only
+	 * retrigger if the status bit is already set, which means we
+	 * need to clear it first. Retriggering is fundamentally racy
+	 * because if the interrupt fires again after we clear it we
+	 * could end up clearing it again and the interrupt handler
+	 * thinking it hasn't fired. Therefore we need to keep trying to
+	 * retrigger until the bit is set.
+	 */
+	if (metag_in32(stat_addr) & bit) {
+		metag_out32(bit, stat_addr);
+		while (!(metag_in32(stat_addr) & bit))
+			metag_out32(bit, stat_addr);
+	}
+}
+
+/**
+ * meta_intc_unmask_level_irq_nomask() - unmask a level irq by revectoring
+ * @data:	data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to level interrupts.
+ */
+static void meta_intc_unmask_level_irq_nomask(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *stat_addr = meta_intc_stat_addr(hw);
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	unsigned int thread = hard_processor_id();
+
+	record_irq_is_unmasked(data);
+
+	/* there is no interrupt mask, so revector the interrupt */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	/* Re-trigger interrupt */
+	/* Writing a 1 triggers interrupt */
+	if (metag_in32(stat_addr) & bit)
+		metag_out32(bit, stat_addr);
+}
+
+/**
+ * meta_intc_irq_set_type() - set the type of an external irq
+ * @data:	data for the external irq to set the type of
+ * @flow_type:	new irq flow type
+ *
+ * Set the flow type of an external interrupt. This updates the irq chip and irq
+ * handler depending on whether the irq is edge or level sensitive (the polarity
+ * is ignored), and also sets up the bit in HWLEVELEXT so the hardware knows
+ * when to trigger.
+ */
+static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+#ifdef CONFIG_METAG_SUSPEND_MEM
+	struct meta_intc_priv *priv = &meta_intc_priv;
+#endif
+	unsigned int irq = data->irq;
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *level_addr = meta_intc_level_addr(hw);
+	unsigned long flags;
+	unsigned int level;
+
+	/* update the chip/handler */
+	if (flow_type & IRQ_TYPE_LEVEL_MASK)
+		__irq_set_chip_handler_name_locked(irq, &meta_intc_level_chip,
+						   handle_level_irq, NULL);
+	else
+		__irq_set_chip_handler_name_locked(irq, &meta_intc_edge_chip,
+						   handle_edge_irq, NULL);
+
+	/* and clear/set the bit in HWLEVELEXT */
+	__global_lock2(flags);
+	level = metag_in32(level_addr);
+	if (flow_type & IRQ_TYPE_LEVEL_MASK)
+		level |= bit;
+	else
+		level &= ~bit;
+	metag_out32(level, level_addr);
+#ifdef CONFIG_METAG_SUSPEND_MEM
+	priv->levels_altered[meta_intc_bank(hw)] |= bit;
+#endif
+	__global_unlock2(flags);
+
+	return 0;
+}
+
+/**
+ * meta_intc_irq_demux() - external irq de-multiplexer
+ * @irq:	the virtual interrupt number
+ * @desc:	the interrupt description structure for this irq
+ *
+ * The cpu receives an interrupt on TR2 when a SoC interrupt has occurred. It is
+ * this function's job to demux this irq and figure out exactly which external
+ * irq needs servicing.
+ *
+ * Whilst using TR2 to detect external interrupts is a software convention it is
+ * (hopefully) unlikely to change.
+ */
+static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	irq_hw_number_t hw;
+	unsigned int bank, irq_no, status;
+	void __iomem *stat_addr = meta_intc_stat_addr(0);
+
+	/*
+	 * Locate which interrupt has caused our handler to run.
+	 */
+	for (bank = 0; bank < priv->nr_banks; ++bank) {
+		/* Which interrupts are currently pending in this bank? */
+recalculate:
+		status = metag_in32(stat_addr) & priv->unmasked[bank];
+
+		for (hw = bank*32; status; status >>= 1, ++hw) {
+			if (status & 0x1) {
+				/*
+				 * Map the hardware IRQ number to a virtual
+				 * Linux IRQ number.
+				 */
+				irq_no = irq_linear_revmap(priv->domain, hw);
+
+				/*
+				 * Only fire off external interrupts that are
+				 * registered to be handled by the kernel.
+				 * Other external interrupts are probably being
+				 * handled by other Meta hardware threads.
+				 */
+				generic_handle_irq(irq_no);
+
+				/*
+				 * The handler may have re-enabled interrupts
+				 * which could have caused a nested invocation
+				 * of this code and make the copy of the
+				 * status register we are using invalid.
+				 */
+				goto recalculate;
+			}
+		}
+		stat_addr += HWSTAT_STRIDE;
+	}
+}
+
+#ifdef CONFIG_SMP
+/**
+ * meta_intc_set_affinity() - set the affinity for an interrupt
+ * @data:	data for the external irq to set the affinity of
+ * @cpumask:	cpu mask representing cpus which can handle the interrupt
+ * @force:	whether to force (ignored)
+ *
+ * Revector the specified external irq onto a specific cpu's TR2 trigger, so
+ * that that cpu tends to be the one who handles it.
+ */
+static int meta_intc_set_affinity(struct irq_data *data,
+				  const struct cpumask *cpumask, bool force)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	unsigned int cpu, thread;
+
+	/*
+	 * Wire up this interrupt from HWVECxEXT to the Meta core.
+	 *
+	 * Note that we can't wire up HWVECxEXT to interrupt more than
+	 * one cpu (the interrupt code doesn't support it), so we just
+	 * pick the first cpu we find in 'cpumask'.
+	 */
+	cpu = cpumask_any(cpumask);
+	thread = cpu_2_hwthread_id[cpu];
+
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	return 0;
+}
+#else
+#define meta_intc_set_affinity	NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+#define META_INTC_CHIP_FLAGS	(IRQCHIP_MASK_ON_SUSPEND \
+				| IRQCHIP_SKIP_SET_WAKE)
+#else
+#define META_INTC_CHIP_FLAGS	0
+#endif
+
+/* public edge/level irq chips which SoCs can override */
+
+struct irq_chip meta_intc_edge_chip = {
+	.irq_startup		= meta_intc_startup_irq,
+	.irq_shutdown		= meta_intc_shutdown_irq,
+	.irq_ack		= meta_intc_ack_irq,
+	.irq_mask		= meta_intc_mask_irq,
+	.irq_unmask		= meta_intc_unmask_irq,
+	.irq_set_type		= meta_intc_irq_set_type,
+	.irq_set_affinity	= meta_intc_set_affinity,
+	.flags			= META_INTC_CHIP_FLAGS,
+};
+
+struct irq_chip meta_intc_level_chip = {
+	.irq_startup		= meta_intc_startup_irq,
+	.irq_shutdown		= meta_intc_shutdown_irq,
+	.irq_set_type		= meta_intc_irq_set_type,
+	.irq_mask		= meta_intc_mask_irq,
+	.irq_unmask		= meta_intc_unmask_irq,
+	.irq_set_affinity	= meta_intc_set_affinity,
+	.flags			= META_INTC_CHIP_FLAGS,
+};
+
+/**
+ * meta_intc_map() - map an external irq
+ * @d:		irq domain of external trigger block
+ * @irq:	virtual irq number
+ * @hw:		hardware irq number within external trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured, using the HWLEVELEXT registers to determine
+ * edge/level flow type. These registers will have been set when the irq type is
+ * set (or set to a default at init time).
+ */
+static int meta_intc_map(struct irq_domain *d, unsigned int irq,
+			 irq_hw_number_t hw)
+{
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *level_addr = meta_intc_level_addr(hw);
+
+	/* Go by the current sense in the HWLEVELEXT register */
+	if (metag_in32(level_addr) & bit)
+		irq_set_chip_and_handler(irq, &meta_intc_level_chip,
+					 handle_level_irq);
+	else
+		irq_set_chip_and_handler(irq, &meta_intc_edge_chip,
+					 handle_edge_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops meta_intc_domain_ops = {
+	.map = meta_intc_map,
+	.xlate = irq_domain_xlate_twocell,
+};
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+
+/**
+ * struct meta_intc_context - suspend context
+ * @levels:	State of HWLEVELEXT registers
+ * @masks:	State of HWMASKEXT registers
+ * @vectors:	State of HWVECEXT registers
+ * @txvecint:	State of TxVECINT registers
+ *
+ * This structure stores the IRQ state across suspend.
+ */
+struct meta_intc_context {
+	u32 levels[4];
+	u32 masks[4];
+	u8 vectors[4*32];
+
+	u8 txvecint[4][4];
+};
+
+/* suspend context */
+static struct meta_intc_context *meta_intc_context;
+
+/**
+ * meta_intc_suspend() - store irq state
+ *
+ * To avoid interfering with other threads we only save the IRQ state of IRQs in
+ * use by Linux.
+ */
+static int meta_intc_suspend(void)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	int i, j;
+	irq_hw_number_t hw;
+	unsigned int bank;
+	unsigned long flags;
+	struct meta_intc_context *context;
+	void __iomem *level_addr, *mask_addr, *vec_addr;
+	u32 mask, bit;
+
+	context = kzalloc(sizeof(*context), GFP_ATOMIC);
+	if (!context)
+		return -ENOMEM;
+
+	hw = 0;
+	level_addr = meta_intc_level_addr(0);
+	mask_addr = meta_intc_mask_addr(0);
+	for (bank = 0; bank < priv->nr_banks; ++bank) {
+		vec_addr = meta_intc_vec_addr(hw);
+
+		/* create mask of interrupts in use */
+		mask = 0;
+		for (bit = 1; bit; bit <<= 1) {
+			i = irq_linear_revmap(priv->domain, hw);
+			/* save mapped irqs which are enabled or have actions */
+			if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+				  irq_has_action(i))) {
+				mask |= bit;
+
+				/* save trigger vector */
+				context->vectors[hw] = metag_in32(vec_addr);
+			}
+
+			++hw;
+			vec_addr += HWVECnEXT_STRIDE;
+		}
+
+		/* save level state if any IRQ levels altered */
+		if (priv->levels_altered[bank])
+			context->levels[bank] = metag_in32(level_addr);
+		/* save mask state if any IRQs in use */
+		if (mask)
+			context->masks[bank] = metag_in32(mask_addr);
+
+		level_addr += HWSTAT_STRIDE;
+		mask_addr += HWSTAT_STRIDE;
+	}
+
+	/* save trigger matrixing */
+	__global_lock2(flags);
+	for (i = 0; i < 4; ++i)
+		for (j = 0; j < 4; ++j)
+			context->txvecint[i][j] = metag_in32(T0VECINT_BHALT +
+							     TnVECINT_STRIDE*i +
+							     8*j);
+	__global_unlock2(flags);
+
+	meta_intc_context = context;
+	return 0;
+}
+
+/**
+ * meta_intc_resume() - restore saved irq state
+ *
+ * Restore the saved IRQ state and drop it.
+ */
+static void meta_intc_resume(void)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	int i, j;
+	irq_hw_number_t hw;
+	unsigned int bank;
+	unsigned long flags;
+	struct meta_intc_context *context = meta_intc_context;
+	void __iomem *level_addr, *mask_addr, *vec_addr;
+	u32 mask, bit, tmp;
+
+	meta_intc_context = NULL;
+
+	hw = 0;
+	level_addr = meta_intc_level_addr(0);
+	mask_addr = meta_intc_mask_addr(0);
+	for (bank = 0; bank < priv->nr_banks; ++bank) {
+		vec_addr = meta_intc_vec_addr(hw);
+
+		/* create mask of interrupts in use */
+		mask = 0;
+		for (bit = 1; bit; bit <<= 1) {
+			i = irq_linear_revmap(priv->domain, hw);
+			/* restore mapped irqs, enabled or with actions */
+			if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+				  irq_has_action(i))) {
+				mask |= bit;
+
+				/* restore trigger vector */
+				metag_out32(context->vectors[hw], vec_addr);
+			}
+
+			++hw;
+			vec_addr += HWVECnEXT_STRIDE;
+		}
+
+		if (mask) {
+			/* restore mask state */
+			__global_lock2(flags);
+			tmp = metag_in32(mask_addr);
+			tmp = (tmp & ~mask) | (context->masks[bank] & mask);
+			metag_out32(tmp, mask_addr);
+			__global_unlock2(flags);
+		}
+
+		mask = priv->levels_altered[bank];
+		if (mask) {
+			/* restore level state */
+			__global_lock2(flags);
+			tmp = metag_in32(level_addr);
+			tmp = (tmp & ~mask) | (context->levels[bank] & mask);
+			metag_out32(tmp, level_addr);
+			__global_unlock2(flags);
+		}
+
+		level_addr += HWSTAT_STRIDE;
+		mask_addr += HWSTAT_STRIDE;
+	}
+
+	/* restore trigger matrixing */
+	__global_lock2(flags);
+	for (i = 0; i < 4; ++i) {
+		for (j = 0; j < 4; ++j) {
+			metag_out32(context->txvecint[i][j],
+				    T0VECINT_BHALT +
+				    TnVECINT_STRIDE*i +
+				    8*j);
+		}
+	}
+	__global_unlock2(flags);
+
+	kfree(context);
+}
+
+static struct syscore_ops meta_intc_syscore_ops = {
+	.suspend = meta_intc_suspend,
+	.resume = meta_intc_resume,
+};
+
+static void __init meta_intc_init_syscore_ops(struct meta_intc_priv *priv)
+{
+	register_syscore_ops(&meta_intc_syscore_ops);
+}
+#else
+#define meta_intc_init_syscore_ops(priv) do {} while (0)
+#endif
+
+/**
+ * meta_intc_init_cpu() - register with a Meta cpu
+ * @priv:	private interrupt controller data
+ * @cpu:	the CPU to register on
+ *
+ * Configure @cpu's TR2 irq so that we can demux external irqs.
+ */
+static void __init meta_intc_init_cpu(struct meta_intc_priv *priv, int cpu)
+{
+	unsigned int thread = cpu_2_hwthread_id[cpu];
+	unsigned int signum = TBID_SIGNUM_TR2(thread);
+	int irq = tbisig_map(signum);
+
+	/* Register the multiplexed IRQ handler */
+	irq_set_chained_handler(irq, meta_intc_irq_demux);
+	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * meta_intc_no_mask() - indicate lack of HWMASKEXT registers
+ *
+ * Called from SoC code (or init code below) to dynamically indicate the lack of
+ * HWMASKEXT registers (for example depending on some SoC revision register).
+ * This alters the irq mask and unmask callbacks to use the fallback
+ * unvectoring/retriggering technique instead of using HWMASKEXT registers.
+ */
+void __init meta_intc_no_mask(void)
+{
+	meta_intc_edge_chip.irq_mask	= meta_intc_mask_irq_nomask;
+	meta_intc_edge_chip.irq_unmask	= meta_intc_unmask_edge_irq_nomask;
+	meta_intc_level_chip.irq_mask	= meta_intc_mask_irq_nomask;
+	meta_intc_level_chip.irq_unmask	= meta_intc_unmask_level_irq_nomask;
+}
+
+/**
+ * init_external_IRQ() - initialise the external irq controller
+ *
+ * Set up the external irq controller using device tree properties. This is
+ * called from init_IRQ().
+ */
+int __init init_external_IRQ(void)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	struct device_node *node;
+	int ret, cpu;
+	u32 val;
+	bool no_masks = false;
+
+	node = of_find_compatible_node(NULL, NULL, "img,meta-intc");
+	if (!node)
+		return -ENOENT;
+
+	/* Get number of banks */
+	ret = of_property_read_u32(node, "num-banks", &val);
+	if (ret) {
+		pr_err("meta-intc: No num-banks property found\n");
+		return ret;
+	}
+	if (val < 1 || val > 4) {
+		pr_err("meta-intc: num-banks (%u) out of range\n", val);
+		return -EINVAL;
+	}
+	priv->nr_banks = val;
+
+	/* Are any mask registers present? */
+	if (of_get_property(node, "no-mask", NULL))
+		no_masks = true;
+
+	/* No HWMASKEXT registers present? */
+	if (no_masks)
+		meta_intc_no_mask();
+
+	/* Set up an IRQ domain */
+	/*
+	 * This is a legacy IRQ domain for now until all the platform setup code
+	 * has been converted to devicetree.
+	 */
+	priv->domain = irq_domain_add_linear(node, priv->nr_banks*32,
+					     &meta_intc_domain_ops, priv);
+	if (unlikely(!priv->domain)) {
+		pr_err("meta-intc: cannot add IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	/* Setup TR2 for all cpus. */
+	for_each_possible_cpu(cpu)
+		meta_intc_init_cpu(priv, cpu);
+
+	/* Set up system suspend/resume callbacks */
+	meta_intc_init_syscore_ops(priv);
+
+	pr_info("meta-intc: External IRQ controller initialised (%u IRQs)\n",
+		priv->nr_banks*32);
+
+	return 0;
+}
diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c
new file mode 100644
index 000000000000..8e94d7a3b20d
--- /dev/null
+++ b/drivers/irqchip/irq-metag.c
@@ -0,0 +1,343 @@
+/*
+ * Meta internal (HWSTATMETA) interrupt code.
+ *
+ * Copyright (C) 2011-2012 Imagination Technologies Ltd.
+ *
+ * This code is based on the code in SoC/common/irq.c and SoC/comet/irq.c
+ * The code base could be generalised/merged as a lot of the functionality is
+ * similar. Until this is done, we try to keep the code simple here.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqdomain.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define PERF0VECINT		0x04820580
+#define PERF1VECINT		0x04820588
+#define PERF0TRIG_OFFSET	16
+#define PERF1TRIG_OFFSET	17
+
+/**
+ * struct metag_internal_irq_priv - private meta internal interrupt data
+ * @domain:		IRQ domain for all internal Meta IRQs (HWSTATMETA)
+ * @unmasked:		Record of unmasked IRQs
+ */
+struct metag_internal_irq_priv {
+	struct irq_domain	*domain;
+
+	unsigned long		unmasked;
+};
+
+/* Private data for the one and only internal interrupt controller */
+static struct metag_internal_irq_priv metag_internal_irq_priv;
+
+static unsigned int metag_internal_irq_startup(struct irq_data *data);
+static void metag_internal_irq_shutdown(struct irq_data *data);
+static void metag_internal_irq_ack(struct irq_data *data);
+static void metag_internal_irq_mask(struct irq_data *data);
+static void metag_internal_irq_unmask(struct irq_data *data);
+#ifdef CONFIG_SMP
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+			const struct cpumask *cpumask, bool force);
+#endif
+
+static struct irq_chip internal_irq_edge_chip = {
+	.name = "HWSTATMETA-IRQ",
+	.irq_startup = metag_internal_irq_startup,
+	.irq_shutdown = metag_internal_irq_shutdown,
+	.irq_ack = metag_internal_irq_ack,
+	.irq_mask = metag_internal_irq_mask,
+	.irq_unmask = metag_internal_irq_unmask,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = metag_internal_irq_set_affinity,
+#endif
+};
+
+/*
+ *	metag_hwvec_addr - get the address of *VECINT regs of irq
+ *
+ *	This function is a table of supported triggers on HWSTATMETA
+ *	Could do with a structure, but better keep it simple. Changes
+ *	in this code should be rare.
+ */
+static inline void __iomem *metag_hwvec_addr(irq_hw_number_t hw)
+{
+	void __iomem *addr;
+
+	switch (hw) {
+	case PERF0TRIG_OFFSET:
+		addr = (void __iomem *)PERF0VECINT;
+		break;
+	case PERF1TRIG_OFFSET:
+		addr = (void __iomem *)PERF1VECINT;
+		break;
+	default:
+		addr = NULL;
+		break;
+	}
+	return addr;
+}
+
+/*
+ *	metag_internal_startup - setup an internal irq
+ *	@irq:	the irq to startup
+ *
+ *	Multiplex interrupts for @irq onto TR1. Clear any pending
+ *	interrupts.
+ */
+static unsigned int metag_internal_irq_startup(struct irq_data *data)
+{
+	/* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+	metag_internal_irq_ack(data);
+
+	/* Enable the interrupt by unmasking it */
+	metag_internal_irq_unmask(data);
+
+	return 0;
+}
+
+/*
+ *	metag_internal_irq_shutdown - turn off the irq
+ *	@irq:	the irq number to turn off
+ *
+ *	Mask @irq and clear any pending interrupts.
+ *	Stop muxing @irq onto TR1.
+ */
+static void metag_internal_irq_shutdown(struct irq_data *data)
+{
+	/* Disable the IRQ at the core by masking it. */
+	metag_internal_irq_mask(data);
+
+	/* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+	metag_internal_irq_ack(data);
+}
+
+/*
+ *	metag_internal_irq_ack - acknowledge irq
+ *	@irq:	the irq to ack
+ */
+static void metag_internal_irq_ack(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << hw;
+
+	if (metag_in32(HWSTATMETA) & bit)
+		metag_out32(bit, HWSTATMETA);
+}
+
+/**
+ * metag_internal_irq_mask() - mask an internal irq by unvectoring
+ * @data:	data for the internal irq to mask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is unvectored from the core
+ * and retriggered if necessary later.
+ */
+static void metag_internal_irq_mask(struct irq_data *data)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = metag_hwvec_addr(hw);
+
+	clear_bit(hw, &priv->unmasked);
+
+	/* there is no interrupt mask, so unvector the interrupt */
+	metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data:	data for the internal irq to unmask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is revectored back to the
+ * core and retriggered if necessary.
+ */
+static void metag_internal_irq_unmask(struct irq_data *data)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << hw;
+	void __iomem *vec_addr = metag_hwvec_addr(hw);
+	unsigned int thread = hard_processor_id();
+
+	set_bit(hw, &priv->unmasked);
+
+	/* there is no interrupt mask, so revector the interrupt */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), vec_addr);
+
+	/*
+	 * Re-trigger interrupt
+	 *
+	 * Writing a 1 toggles, and a 0->1 transition triggers. We only
+	 * retrigger if the status bit is already set, which means we
+	 * need to clear it first. Retriggering is fundamentally racy
+	 * because if the interrupt fires again after we clear it we
+	 * could end up clearing it again and the interrupt handler
+	 * thinking it hasn't fired. Therefore we need to keep trying to
+	 * retrigger until the bit is set.
+	 */
+	if (metag_in32(HWSTATMETA) & bit) {
+		metag_out32(bit, HWSTATMETA);
+		while (!(metag_in32(HWSTATMETA) & bit))
+			metag_out32(bit, HWSTATMETA);
+	}
+}
+
+#ifdef CONFIG_SMP
+/*
+ *	metag_internal_irq_set_affinity - set the affinity for an interrupt
+ */
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+			const struct cpumask *cpumask, bool force)
+{
+	unsigned int cpu, thread;
+	irq_hw_number_t hw = data->hwirq;
+	/*
+	 * Wire up this interrupt from *VECINT to the Meta core.
+	 *
+	 * Note that we can't wire up *VECINT to interrupt more than
+	 * one cpu (the interrupt code doesn't support it), so we just
+	 * pick the first cpu we find in 'cpumask'.
+	 */
+	cpu = cpumask_any(cpumask);
+	thread = cpu_2_hwthread_id[cpu];
+
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)),
+		    metag_hwvec_addr(hw));
+
+	return 0;
+}
+#endif
+
+/*
+ *	metag_internal_irq_demux - irq de-multiplexer
+ *	@irq:	the interrupt number
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	The cpu receives an interrupt on TR1 when an interrupt has
+ *	occurred. It is this function's job to demux this irq and
+ *	figure out exactly which trigger needs servicing.
+ */
+static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc);
+	irq_hw_number_t hw;
+	unsigned int irq_no;
+	u32 status;
+
+recalculate:
+	status = metag_in32(HWSTATMETA) & priv->unmasked;
+
+	for (hw = 0; status != 0; status >>= 1, ++hw) {
+		if (status & 0x1) {
+			/*
+			 * Map the hardware IRQ number to a virtual Linux IRQ
+			 * number.
+			 */
+			irq_no = irq_linear_revmap(priv->domain, hw);
+
+			/*
+			 * Only fire off interrupts that are
+			 * registered to be handled by the kernel.
+			 * Other interrupts are probably being
+			 * handled by other Meta hardware threads.
+			 */
+			generic_handle_irq(irq_no);
+
+			/*
+			 * The handler may have re-enabled interrupts
+			 * which could have caused a nested invocation
+			 * of this code and make the copy of the
+			 * status register we are using invalid.
+			 */
+			goto recalculate;
+		}
+	}
+}
+
+/**
+ * internal_irq_map() - Map an internal meta IRQ to a virtual IRQ number.
+ * @hw:		Number of the internal IRQ. Must be in range.
+ *
+ * Returns:	The virtual IRQ number of the Meta internal IRQ specified by
+ *		@hw.
+ */
+int internal_irq_map(unsigned int hw)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	if (!priv->domain)
+		return -ENODEV;
+	return irq_create_mapping(priv->domain, hw);
+}
+
+/**
+ *	metag_internal_irq_init_cpu - regsister with the Meta cpu
+ *	@cpu:	the CPU to register on
+ *
+ *	Configure @cpu's TR1 irq so that we can demux irqs.
+ */
+static void metag_internal_irq_init_cpu(struct metag_internal_irq_priv *priv,
+					int cpu)
+{
+	unsigned int thread = cpu_2_hwthread_id[cpu];
+	unsigned int signum = TBID_SIGNUM_TR1(thread);
+	int irq = tbisig_map(signum);
+
+	/* Register the multiplexed IRQ handler */
+	irq_set_handler_data(irq, priv);
+	irq_set_chained_handler(irq, metag_internal_irq_demux);
+	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * metag_internal_intc_map() - map an internal irq
+ * @d:		irq domain of internal trigger block
+ * @irq:	virtual irq number
+ * @hw:		hardware irq number within internal trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured.
+ */
+static int metag_internal_intc_map(struct irq_domain *d, unsigned int irq,
+				   irq_hw_number_t hw)
+{
+	/* only register interrupt if it is mapped */
+	if (!metag_hwvec_addr(hw))
+		return -EINVAL;
+
+	irq_set_chip_and_handler(irq, &internal_irq_edge_chip,
+				 handle_edge_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops metag_internal_intc_domain_ops = {
+	.map	= metag_internal_intc_map,
+};
+
+/**
+ *	metag_internal_irq_register - register internal IRQs
+ *
+ *	Register the irq chip and handler function for all internal IRQs
+ */
+int __init init_internal_IRQ(void)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	unsigned int cpu;
+
+	/* Set up an IRQ domain */
+	priv->domain = irq_domain_add_linear(NULL, 32,
+					     &metag_internal_intc_domain_ops,
+					     priv);
+	if (unlikely(!priv->domain)) {
+		pr_err("meta-internal-intc: cannot add IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	/* Setup TR1 for all cpus. */
+	for_each_possible_cpu(cpu)
+		metag_internal_irq_init_cpu(priv, cpu);
+
+	return 0;
+};
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index abe2d699b6f3..8b07f83d48ad 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -483,7 +483,6 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr;
 	struct sock *sk = sock->sk;
-	struct hlist_node *node;
 	struct sock *csk;
 	int err = 0;
 
@@ -508,7 +507,7 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 
 	if (sk->sk_protocol < ISDN_P_B_START) {
 		read_lock_bh(&data_sockets.lock);
-		sk_for_each(csk, node, &data_sockets.head) {
+		sk_for_each(csk, &data_sockets.head) {
 			if (sk == csk)
 				continue;
 			if (_pms(csk)->dev != _pms(sk)->dev)
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index deda591f70b9..9cb4b621fbc3 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -64,12 +64,11 @@ unlock:
 static void
 send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb)
 {
-	struct hlist_node	*node;
 	struct sock		*sk;
 	struct sk_buff		*cskb = NULL;
 
 	read_lock(&sl->lock);
-	sk_for_each(sk, node, &sl->head) {
+	sk_for_each(sk, &sl->head) {
 		if (sk->sk_state != MISDN_BOUND)
 			continue;
 		if (!cskb)
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 91a02eeeb319..e30b490055aa 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -210,7 +210,7 @@ config DM_DEBUG
 
 config DM_BUFIO
        tristate
-       depends on BLK_DEV_DM && EXPERIMENTAL
+       depends on BLK_DEV_DM
        ---help---
 	 This interface allows you to do buffered I/O on a device and acts
 	 as a cache, holding recently-read blocks in memory and performing
@@ -218,7 +218,7 @@ config DM_BUFIO
 
 config DM_BIO_PRISON
        tristate
-       depends on BLK_DEV_DM && EXPERIMENTAL
+       depends on BLK_DEV_DM
        ---help---
 	 Some bio locking schemes used by other device-mapper targets
 	 including thin provisioning.
@@ -251,8 +251,8 @@ config DM_SNAPSHOT
          Allow volume managers to take writable snapshots of a device.
 
 config DM_THIN_PROVISIONING
-       tristate "Thin provisioning target (EXPERIMENTAL)"
-       depends on BLK_DEV_DM && EXPERIMENTAL
+       tristate "Thin provisioning target"
+       depends on BLK_DEV_DM
        select DM_PERSISTENT_DATA
        select DM_BIO_PRISON
        ---help---
@@ -268,6 +268,37 @@ config DM_DEBUG_BLOCK_STACK_TRACING
 
 	  If unsure, say N.
 
+config DM_CACHE
+       tristate "Cache target (EXPERIMENTAL)"
+       depends on BLK_DEV_DM
+       default n
+       select DM_PERSISTENT_DATA
+       select DM_BIO_PRISON
+       ---help---
+         dm-cache attempts to improve performance of a block device by
+         moving frequently used data to a smaller, higher performance
+         device.  Different 'policy' plugins can be used to change the
+         algorithms used to select which blocks are promoted, demoted,
+         cleaned etc.  It supports writeback and writethrough modes.
+
+config DM_CACHE_MQ
+       tristate "MQ Cache Policy (EXPERIMENTAL)"
+       depends on DM_CACHE
+       default y
+       ---help---
+         A cache policy that uses a multiqueue ordered by recent hit
+         count to select which blocks should be promoted and demoted.
+         This is meant to be a general purpose policy.  It prioritises
+         reads over writes.
+
+config DM_CACHE_CLEANER
+       tristate "Cleaner Cache Policy (EXPERIMENTAL)"
+       depends on DM_CACHE
+       default y
+       ---help---
+         A simple cache policy that writes back all data to the
+         origin.  Used when decommissioning a dm-cache.
+
 config DM_MIRROR
        tristate "Mirror target"
        depends on BLK_DEV_DM
@@ -302,8 +333,8 @@ config DM_RAID
 	 in one of the available parity distribution methods.
 
 config DM_LOG_USERSPACE
-	tristate "Mirror userspace logging (EXPERIMENTAL)"
-	depends on DM_MIRROR && EXPERIMENTAL && NET
+	tristate "Mirror userspace logging"
+	depends on DM_MIRROR && NET
 	select CONNECTOR
 	---help---
 	  The userspace logging module provides a mechanism for
@@ -350,8 +381,8 @@ config DM_MULTIPATH_ST
 	  If unsure, say N.
 
 config DM_DELAY
-	tristate "I/O delaying target (EXPERIMENTAL)"
-	depends on BLK_DEV_DM && EXPERIMENTAL
+	tristate "I/O delaying target"
+	depends on BLK_DEV_DM
 	---help---
 	A target that delays reads and/or writes and can send
 	them to different devices.  Useful for testing.
@@ -365,14 +396,14 @@ config DM_UEVENT
 	Generate udev events for DM events.
 
 config DM_FLAKEY
-       tristate "Flakey target (EXPERIMENTAL)"
-       depends on BLK_DEV_DM && EXPERIMENTAL
+       tristate "Flakey target"
+       depends on BLK_DEV_DM
        ---help---
          A target that intermittently fails I/O for debugging purposes.
 
 config DM_VERITY
-	tristate "Verity target support (EXPERIMENTAL)"
-	depends on BLK_DEV_DM && EXPERIMENTAL
+	tristate "Verity target support"
+	depends on BLK_DEV_DM
 	select CRYPTO
 	select CRYPTO_HASH
 	select DM_BUFIO
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 94dce8b49324..7ceeaefc0e95 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -11,6 +11,9 @@ dm-mirror-y	+= dm-raid1.o
 dm-log-userspace-y \
 		+= dm-log-userspace-base.o dm-log-userspace-transfer.o
 dm-thin-pool-y	+= dm-thin.o dm-thin-metadata.o
+dm-cache-y	+= dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o
+dm-cache-mq-y   += dm-cache-policy-mq.o
+dm-cache-cleaner-y += dm-cache-policy-cleaner.o
 md-mod-y	+= md.o bitmap.o
 raid456-y	+= raid5.o
 
@@ -44,6 +47,9 @@ obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
 obj-$(CONFIG_DM_RAID)	+= dm-raid.o
 obj-$(CONFIG_DM_THIN_PROVISIONING)	+= dm-thin-pool.o
 obj-$(CONFIG_DM_VERITY)		+= dm-verity.o
+obj-$(CONFIG_DM_CACHE)		+= dm-cache.o
+obj-$(CONFIG_DM_CACHE_MQ)	+= dm-cache-mq.o
+obj-$(CONFIG_DM_CACHE_CLEANER)	+= dm-cache-cleaner.o
 
 ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs			+= dm-uevent.o
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index aefb78e3cbf9..85f0b7074257 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c
@@ -14,14 +14,6 @@
 
 /*----------------------------------------------------------------*/
 
-struct dm_bio_prison_cell {
-	struct hlist_node list;
-	struct dm_bio_prison *prison;
-	struct dm_cell_key key;
-	struct bio *holder;
-	struct bio_list bios;
-};
-
 struct dm_bio_prison {
 	spinlock_t lock;
 	mempool_t *cell_pool;
@@ -87,6 +79,19 @@ void dm_bio_prison_destroy(struct dm_bio_prison *prison)
 }
 EXPORT_SYMBOL_GPL(dm_bio_prison_destroy);
 
+struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp)
+{
+	return mempool_alloc(prison->cell_pool, gfp);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell);
+
+void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
+			     struct dm_bio_prison_cell *cell)
+{
+	mempool_free(cell, prison->cell_pool);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
+
 static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key)
 {
 	const unsigned long BIG_PRIME = 4294967291UL;
@@ -106,100 +111,103 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket,
 						  struct dm_cell_key *key)
 {
 	struct dm_bio_prison_cell *cell;
-	struct hlist_node *tmp;
 
-	hlist_for_each_entry(cell, tmp, bucket, list)
+	hlist_for_each_entry(cell, bucket, list)
 		if (keys_equal(&cell->key, key))
 			return cell;
 
 	return NULL;
 }
 
-/*
- * This may block if a new cell needs allocating.  You must ensure that
- * cells will be unlocked even if the calling thread is blocked.
- *
- * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
- */
-int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key,
-		  struct bio *inmate, struct dm_bio_prison_cell **ref)
+static void __setup_new_cell(struct dm_bio_prison *prison,
+			     struct dm_cell_key *key,
+			     struct bio *holder,
+			     uint32_t hash,
+			     struct dm_bio_prison_cell *cell)
 {
-	int r = 1;
-	unsigned long flags;
-	uint32_t hash = hash_key(prison, key);
-	struct dm_bio_prison_cell *cell, *cell2;
-
-	BUG_ON(hash > prison->nr_buckets);
-
-	spin_lock_irqsave(&prison->lock, flags);
-
-	cell = __search_bucket(prison->cells + hash, key);
-	if (cell) {
-		bio_list_add(&cell->bios, inmate);
-		goto out;
-	}
+	memcpy(&cell->key, key, sizeof(cell->key));
+	cell->holder = holder;
+	bio_list_init(&cell->bios);
+	hlist_add_head(&cell->list, prison->cells + hash);
+}
 
-	/*
-	 * Allocate a new cell
-	 */
-	spin_unlock_irqrestore(&prison->lock, flags);
-	cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
-	spin_lock_irqsave(&prison->lock, flags);
+static int __bio_detain(struct dm_bio_prison *prison,
+			struct dm_cell_key *key,
+			struct bio *inmate,
+			struct dm_bio_prison_cell *cell_prealloc,
+			struct dm_bio_prison_cell **cell_result)
+{
+	uint32_t hash = hash_key(prison, key);
+	struct dm_bio_prison_cell *cell;
 
-	/*
-	 * We've been unlocked, so we have to double check that
-	 * nobody else has inserted this cell in the meantime.
-	 */
 	cell = __search_bucket(prison->cells + hash, key);
 	if (cell) {
-		mempool_free(cell2, prison->cell_pool);
-		bio_list_add(&cell->bios, inmate);
-		goto out;
+		if (inmate)
+			bio_list_add(&cell->bios, inmate);
+		*cell_result = cell;
+		return 1;
 	}
 
-	/*
-	 * Use new cell.
-	 */
-	cell = cell2;
-
-	cell->prison = prison;
-	memcpy(&cell->key, key, sizeof(cell->key));
-	cell->holder = inmate;
-	bio_list_init(&cell->bios);
-	hlist_add_head(&cell->list, prison->cells + hash);
+	__setup_new_cell(prison, key, inmate, hash, cell_prealloc);
+	*cell_result = cell_prealloc;
+	return 0;
+}
 
-	r = 0;
+static int bio_detain(struct dm_bio_prison *prison,
+		      struct dm_cell_key *key,
+		      struct bio *inmate,
+		      struct dm_bio_prison_cell *cell_prealloc,
+		      struct dm_bio_prison_cell **cell_result)
+{
+	int r;
+	unsigned long flags;
 
-out:
+	spin_lock_irqsave(&prison->lock, flags);
+	r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
 	spin_unlock_irqrestore(&prison->lock, flags);
 
-	*ref = cell;
-
 	return r;
 }
+
+int dm_bio_detain(struct dm_bio_prison *prison,
+		  struct dm_cell_key *key,
+		  struct bio *inmate,
+		  struct dm_bio_prison_cell *cell_prealloc,
+		  struct dm_bio_prison_cell **cell_result)
+{
+	return bio_detain(prison, key, inmate, cell_prealloc, cell_result);
+}
 EXPORT_SYMBOL_GPL(dm_bio_detain);
 
+int dm_get_cell(struct dm_bio_prison *prison,
+		struct dm_cell_key *key,
+		struct dm_bio_prison_cell *cell_prealloc,
+		struct dm_bio_prison_cell **cell_result)
+{
+	return bio_detain(prison, key, NULL, cell_prealloc, cell_result);
+}
+EXPORT_SYMBOL_GPL(dm_get_cell);
+
 /*
  * @inmates must have been initialised prior to this call
  */
-static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
+static void __cell_release(struct dm_bio_prison_cell *cell,
+			   struct bio_list *inmates)
 {
-	struct dm_bio_prison *prison = cell->prison;
-
 	hlist_del(&cell->list);
 
 	if (inmates) {
-		bio_list_add(inmates, cell->holder);
+		if (cell->holder)
+			bio_list_add(inmates, cell->holder);
 		bio_list_merge(inmates, &cell->bios);
 	}
-
-	mempool_free(cell, prison->cell_pool);
 }
 
-void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios)
+void dm_cell_release(struct dm_bio_prison *prison,
+		     struct dm_bio_prison_cell *cell,
+		     struct bio_list *bios)
 {
 	unsigned long flags;
-	struct dm_bio_prison *prison = cell->prison;
 
 	spin_lock_irqsave(&prison->lock, flags);
 	__cell_release(cell, bios);
@@ -210,20 +218,18 @@ EXPORT_SYMBOL_GPL(dm_cell_release);
 /*
  * Sometimes we don't want the holder, just the additional bios.
  */
-static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
+static void __cell_release_no_holder(struct dm_bio_prison_cell *cell,
+				     struct bio_list *inmates)
 {
-	struct dm_bio_prison *prison = cell->prison;
-
 	hlist_del(&cell->list);
 	bio_list_merge(inmates, &cell->bios);
-
-	mempool_free(cell, prison->cell_pool);
 }
 
-void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
+void dm_cell_release_no_holder(struct dm_bio_prison *prison,
+			       struct dm_bio_prison_cell *cell,
+			       struct bio_list *inmates)
 {
 	unsigned long flags;
-	struct dm_bio_prison *prison = cell->prison;
 
 	spin_lock_irqsave(&prison->lock, flags);
 	__cell_release_no_holder(cell, inmates);
@@ -231,9 +237,9 @@ void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list
 }
 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
 
-void dm_cell_error(struct dm_bio_prison_cell *cell)
+void dm_cell_error(struct dm_bio_prison *prison,
+		   struct dm_bio_prison_cell *cell)
 {
-	struct dm_bio_prison *prison = cell->prison;
 	struct bio_list bios;
 	struct bio *bio;
 	unsigned long flags;
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h
index 53d1a7a84e2f..3f833190eadf 100644
--- a/drivers/md/dm-bio-prison.h
+++ b/drivers/md/dm-bio-prison.h
@@ -22,7 +22,6 @@
  * subsequently unlocked the bios become available.
  */
 struct dm_bio_prison;
-struct dm_bio_prison_cell;
 
 /* FIXME: this needs to be more abstract */
 struct dm_cell_key {
@@ -31,21 +30,62 @@ struct dm_cell_key {
 	dm_block_t block;
 };
 
+/*
+ * Treat this as opaque, only in header so callers can manage allocation
+ * themselves.
+ */
+struct dm_bio_prison_cell {
+	struct hlist_node list;
+	struct dm_cell_key key;
+	struct bio *holder;
+	struct bio_list bios;
+};
+
 struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells);
 void dm_bio_prison_destroy(struct dm_bio_prison *prison);
 
 /*
- * This may block if a new cell needs allocating.  You must ensure that
- * cells will be unlocked even if the calling thread is blocked.
+ * These two functions just wrap a mempool.  This is a transitory step:
+ * Eventually all bio prison clients should manage their own cell memory.
  *
- * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
+ * Like mempool_alloc(), dm_bio_prison_alloc_cell() can only fail if called
+ * in interrupt context or passed GFP_NOWAIT.
  */
-int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key,
-		  struct bio *inmate, struct dm_bio_prison_cell **ref);
+struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison,
+						    gfp_t gfp);
+void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
+			     struct dm_bio_prison_cell *cell);
 
-void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios);
-void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates);
-void dm_cell_error(struct dm_bio_prison_cell *cell);
+/*
+ * Creates, or retrieves a cell for the given key.
+ *
+ * Returns 1 if pre-existing cell returned, zero if new cell created using
+ * @cell_prealloc.
+ */
+int dm_get_cell(struct dm_bio_prison *prison,
+		struct dm_cell_key *key,
+		struct dm_bio_prison_cell *cell_prealloc,
+		struct dm_bio_prison_cell **cell_result);
+
+/*
+ * An atomic op that combines retrieving a cell, and adding a bio to it.
+ *
+ * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
+ */
+int dm_bio_detain(struct dm_bio_prison *prison,
+		  struct dm_cell_key *key,
+		  struct bio *inmate,
+		  struct dm_bio_prison_cell *cell_prealloc,
+		  struct dm_bio_prison_cell **cell_result);
+
+void dm_cell_release(struct dm_bio_prison *prison,
+		     struct dm_bio_prison_cell *cell,
+		     struct bio_list *bios);
+void dm_cell_release_no_holder(struct dm_bio_prison *prison,
+			       struct dm_bio_prison_cell *cell,
+			       struct bio_list *inmates);
+void dm_cell_error(struct dm_bio_prison *prison,
+		   struct dm_bio_prison_cell *cell);
 
 /*----------------------------------------------------------------*/
 
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 651ca79881dd..3c955e10a618 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -859,9 +859,8 @@ static void __check_watermark(struct dm_bufio_client *c)
 static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
 {
 	struct dm_buffer *b;
-	struct hlist_node *hn;
 
-	hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)],
+	hlist_for_each_entry(b, &c->cache_hash[DM_BUFIO_HASH(block)],
 			     hash_list) {
 		dm_bufio_cond_resched();
 		if (b->block == block)
@@ -1193,7 +1192,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
 int dm_bufio_issue_flush(struct dm_bufio_client *c)
 {
 	struct dm_io_request io_req = {
-		.bi_rw = REQ_FLUSH,
+		.bi_rw = WRITE_FLUSH,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = c->dm_io,
diff --git a/drivers/md/dm-cache-block-types.h b/drivers/md/dm-cache-block-types.h
new file mode 100644
index 000000000000..bed4ad4e1b7c
--- /dev/null
+++ b/drivers/md/dm-cache-block-types.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_CACHE_BLOCK_TYPES_H
+#define DM_CACHE_BLOCK_TYPES_H
+
+#include "persistent-data/dm-block-manager.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * It's helpful to get sparse to differentiate between indexes into the
+ * origin device, indexes into the cache device, and indexes into the
+ * discard bitset.
+ */
+
+typedef dm_block_t __bitwise__ dm_oblock_t;
+typedef uint32_t __bitwise__ dm_cblock_t;
+typedef dm_block_t __bitwise__ dm_dblock_t;
+
+static inline dm_oblock_t to_oblock(dm_block_t b)
+{
+	return (__force dm_oblock_t) b;
+}
+
+static inline dm_block_t from_oblock(dm_oblock_t b)
+{
+	return (__force dm_block_t) b;
+}
+
+static inline dm_cblock_t to_cblock(uint32_t b)
+{
+	return (__force dm_cblock_t) b;
+}
+
+static inline uint32_t from_cblock(dm_cblock_t b)
+{
+	return (__force uint32_t) b;
+}
+
+static inline dm_dblock_t to_dblock(dm_block_t b)
+{
+	return (__force dm_dblock_t) b;
+}
+
+static inline dm_block_t from_dblock(dm_dblock_t b)
+{
+	return (__force dm_block_t) b;
+}
+
+#endif /* DM_CACHE_BLOCK_TYPES_H */
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
new file mode 100644
index 000000000000..fbd3625f2748
--- /dev/null
+++ b/drivers/md/dm-cache-metadata.c
@@ -0,0 +1,1146 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-cache-metadata.h"
+
+#include "persistent-data/dm-array.h"
+#include "persistent-data/dm-bitset.h"
+#include "persistent-data/dm-space-map.h"
+#include "persistent-data/dm-space-map-disk.h"
+#include "persistent-data/dm-transaction-manager.h"
+
+#include <linux/device-mapper.h>
+
+/*----------------------------------------------------------------*/
+
+#define DM_MSG_PREFIX   "cache metadata"
+
+#define CACHE_SUPERBLOCK_MAGIC 06142003
+#define CACHE_SUPERBLOCK_LOCATION 0
+#define CACHE_VERSION 1
+#define CACHE_METADATA_CACHE_SIZE 64
+
+/*
+ *  3 for btree insert +
+ *  2 for btree lookup used within space map
+ */
+#define CACHE_MAX_CONCURRENT_LOCKS 5
+#define SPACE_MAP_ROOT_SIZE 128
+
+enum superblock_flag_bits {
+	/* for spotting crashes that would invalidate the dirty bitset */
+	CLEAN_SHUTDOWN,
+};
+
+/*
+ * Each mapping from cache block -> origin block carries a set of flags.
+ */
+enum mapping_bits {
+	/*
+	 * A valid mapping.  Because we're using an array we clear this
+	 * flag for an non existant mapping.
+	 */
+	M_VALID = 1,
+
+	/*
+	 * The data on the cache is different from that on the origin.
+	 */
+	M_DIRTY = 2
+};
+
+struct cache_disk_superblock {
+	__le32 csum;
+	__le32 flags;
+	__le64 blocknr;
+
+	__u8 uuid[16];
+	__le64 magic;
+	__le32 version;
+
+	__u8 policy_name[CACHE_POLICY_NAME_SIZE];
+	__le32 policy_hint_size;
+
+	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
+	__le64 mapping_root;
+	__le64 hint_root;
+
+	__le64 discard_root;
+	__le64 discard_block_size;
+	__le64 discard_nr_blocks;
+
+	__le32 data_block_size;
+	__le32 metadata_block_size;
+	__le32 cache_blocks;
+
+	__le32 compat_flags;
+	__le32 compat_ro_flags;
+	__le32 incompat_flags;
+
+	__le32 read_hits;
+	__le32 read_misses;
+	__le32 write_hits;
+	__le32 write_misses;
+} __packed;
+
+struct dm_cache_metadata {
+	struct block_device *bdev;
+	struct dm_block_manager *bm;
+	struct dm_space_map *metadata_sm;
+	struct dm_transaction_manager *tm;
+
+	struct dm_array_info info;
+	struct dm_array_info hint_info;
+	struct dm_disk_bitset discard_info;
+
+	struct rw_semaphore root_lock;
+	dm_block_t root;
+	dm_block_t hint_root;
+	dm_block_t discard_root;
+
+	sector_t discard_block_size;
+	dm_dblock_t discard_nr_blocks;
+
+	sector_t data_block_size;
+	dm_cblock_t cache_blocks;
+	bool changed:1;
+	bool clean_when_opened:1;
+
+	char policy_name[CACHE_POLICY_NAME_SIZE];
+	size_t policy_hint_size;
+	struct dm_cache_statistics stats;
+};
+
+/*-------------------------------------------------------------------
+ * superblock validator
+ *-----------------------------------------------------------------*/
+
+#define SUPERBLOCK_CSUM_XOR 9031977
+
+static void sb_prepare_for_write(struct dm_block_validator *v,
+				 struct dm_block *b,
+				 size_t sb_block_size)
+{
+	struct cache_disk_superblock *disk_super = dm_block_data(b);
+
+	disk_super->blocknr = cpu_to_le64(dm_block_location(b));
+	disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
+						      sb_block_size - sizeof(__le32),
+						      SUPERBLOCK_CSUM_XOR));
+}
+
+static int sb_check(struct dm_block_validator *v,
+		    struct dm_block *b,
+		    size_t sb_block_size)
+{
+	struct cache_disk_superblock *disk_super = dm_block_data(b);
+	__le32 csum_le;
+
+	if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
+		DMERR("sb_check failed: blocknr %llu: wanted %llu",
+		      le64_to_cpu(disk_super->blocknr),
+		      (unsigned long long)dm_block_location(b));
+		return -ENOTBLK;
+	}
+
+	if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
+		DMERR("sb_check failed: magic %llu: wanted %llu",
+		      le64_to_cpu(disk_super->magic),
+		      (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
+		return -EILSEQ;
+	}
+
+	csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
+					     sb_block_size - sizeof(__le32),
+					     SUPERBLOCK_CSUM_XOR));
+	if (csum_le != disk_super->csum) {
+		DMERR("sb_check failed: csum %u: wanted %u",
+		      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
+		return -EILSEQ;
+	}
+
+	return 0;
+}
+
+static struct dm_block_validator sb_validator = {
+	.name = "superblock",
+	.prepare_for_write = sb_prepare_for_write,
+	.check = sb_check
+};
+
+/*----------------------------------------------------------------*/
+
+static int superblock_read_lock(struct dm_cache_metadata *cmd,
+				struct dm_block **sblock)
+{
+	return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
+			       &sb_validator, sblock);
+}
+
+static int superblock_lock_zero(struct dm_cache_metadata *cmd,
+				struct dm_block **sblock)
+{
+	return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
+				     &sb_validator, sblock);
+}
+
+static int superblock_lock(struct dm_cache_metadata *cmd,
+			   struct dm_block **sblock)
+{
+	return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
+				&sb_validator, sblock);
+}
+
+/*----------------------------------------------------------------*/
+
+static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result)
+{
+	int r;
+	unsigned i;
+	struct dm_block *b;
+	__le64 *data_le, zero = cpu_to_le64(0);
+	unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
+
+	/*
+	 * We can't use a validator here - it may be all zeroes.
+	 */
+	r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
+	if (r)
+		return r;
+
+	data_le = dm_block_data(b);
+	*result = 1;
+	for (i = 0; i < sb_block_size; i++) {
+		if (data_le[i] != zero) {
+			*result = 0;
+			break;
+		}
+	}
+
+	return dm_bm_unlock(b);
+}
+
+static void __setup_mapping_info(struct dm_cache_metadata *cmd)
+{
+	struct dm_btree_value_type vt;
+
+	vt.context = NULL;
+	vt.size = sizeof(__le64);
+	vt.inc = NULL;
+	vt.dec = NULL;
+	vt.equal = NULL;
+	dm_array_info_init(&cmd->info, cmd->tm, &vt);
+
+	if (cmd->policy_hint_size) {
+		vt.size = sizeof(__le32);
+		dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
+	}
+}
+
+static int __write_initial_superblock(struct dm_cache_metadata *cmd)
+{
+	int r;
+	struct dm_block *sblock;
+	size_t metadata_len;
+	struct cache_disk_superblock *disk_super;
+	sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
+
+	/* FIXME: see if we can lose the max sectors limit */
+	if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
+		bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
+
+	r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
+	if (r < 0)
+		return r;
+
+	r = dm_tm_pre_commit(cmd->tm);
+	if (r < 0)
+		return r;
+
+	r = superblock_lock_zero(cmd, &sblock);
+	if (r)
+		return r;
+
+	disk_super = dm_block_data(sblock);
+	disk_super->flags = 0;
+	memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
+	disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
+	disk_super->version = cpu_to_le32(CACHE_VERSION);
+	memset(disk_super->policy_name, 0, CACHE_POLICY_NAME_SIZE);
+	disk_super->policy_hint_size = 0;
+
+	r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
+			    metadata_len);
+	if (r < 0)
+		goto bad_locked;
+
+	disk_super->mapping_root = cpu_to_le64(cmd->root);
+	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
+	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
+	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
+	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
+	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
+	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
+	disk_super->cache_blocks = cpu_to_le32(0);
+	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
+
+	disk_super->read_hits = cpu_to_le32(0);
+	disk_super->read_misses = cpu_to_le32(0);
+	disk_super->write_hits = cpu_to_le32(0);
+	disk_super->write_misses = cpu_to_le32(0);
+
+	return dm_tm_commit(cmd->tm, sblock);
+
+bad_locked:
+	dm_bm_unlock(sblock);
+	return r;
+}
+
+static int __format_metadata(struct dm_cache_metadata *cmd)
+{
+	int r;
+
+	r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
+				 &cmd->tm, &cmd->metadata_sm);
+	if (r < 0) {
+		DMERR("tm_create_with_sm failed");
+		return r;
+	}
+
+	__setup_mapping_info(cmd);
+
+	r = dm_array_empty(&cmd->info, &cmd->root);
+	if (r < 0)
+		goto bad;
+
+	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
+
+	r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
+	if (r < 0)
+		goto bad;
+
+	cmd->discard_block_size = 0;
+	cmd->discard_nr_blocks = 0;
+
+	r = __write_initial_superblock(cmd);
+	if (r)
+		goto bad;
+
+	cmd->clean_when_opened = true;
+	return 0;
+
+bad:
+	dm_tm_destroy(cmd->tm);
+	dm_sm_destroy(cmd->metadata_sm);
+
+	return r;
+}
+
+static int __check_incompat_features(struct cache_disk_superblock *disk_super,
+				     struct dm_cache_metadata *cmd)
+{
+	uint32_t features;
+
+	features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
+	if (features) {
+		DMERR("could not access metadata due to unsupported optional features (%lx).",
+		      (unsigned long)features);
+		return -EINVAL;
+	}
+
+	/*
+	 * Check for read-only metadata to skip the following RDWR checks.
+	 */
+	if (get_disk_ro(cmd->bdev->bd_disk))
+		return 0;
+
+	features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP;
+	if (features) {
+		DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
+		      (unsigned long)features);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __open_metadata(struct dm_cache_metadata *cmd)
+{
+	int r;
+	struct dm_block *sblock;
+	struct cache_disk_superblock *disk_super;
+	unsigned long sb_flags;
+
+	r = superblock_read_lock(cmd, &sblock);
+	if (r < 0) {
+		DMERR("couldn't read lock superblock");
+		return r;
+	}
+
+	disk_super = dm_block_data(sblock);
+
+	r = __check_incompat_features(disk_super, cmd);
+	if (r < 0)
+		goto bad;
+
+	r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
+			       disk_super->metadata_space_map_root,
+			       sizeof(disk_super->metadata_space_map_root),
+			       &cmd->tm, &cmd->metadata_sm);
+	if (r < 0) {
+		DMERR("tm_open_with_sm failed");
+		goto bad;
+	}
+
+	__setup_mapping_info(cmd);
+	dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
+	sb_flags = le32_to_cpu(disk_super->flags);
+	cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
+	return dm_bm_unlock(sblock);
+
+bad:
+	dm_bm_unlock(sblock);
+	return r;
+}
+
+static int __open_or_format_metadata(struct dm_cache_metadata *cmd,
+				     bool format_device)
+{
+	int r, unformatted;
+
+	r = __superblock_all_zeroes(cmd->bm, &unformatted);
+	if (r)
+		return r;
+
+	if (unformatted)
+		return format_device ? __format_metadata(cmd) : -EPERM;
+
+	return __open_metadata(cmd);
+}
+
+static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
+					    bool may_format_device)
+{
+	int r;
+	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
+					  CACHE_METADATA_CACHE_SIZE,
+					  CACHE_MAX_CONCURRENT_LOCKS);
+	if (IS_ERR(cmd->bm)) {
+		DMERR("could not create block manager");
+		return PTR_ERR(cmd->bm);
+	}
+
+	r = __open_or_format_metadata(cmd, may_format_device);
+	if (r)
+		dm_block_manager_destroy(cmd->bm);
+
+	return r;
+}
+
+static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd)
+{
+	dm_sm_destroy(cmd->metadata_sm);
+	dm_tm_destroy(cmd->tm);
+	dm_block_manager_destroy(cmd->bm);
+}
+
+typedef unsigned long (*flags_mutator)(unsigned long);
+
+static void update_flags(struct cache_disk_superblock *disk_super,
+			 flags_mutator mutator)
+{
+	uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags));
+	disk_super->flags = cpu_to_le32(sb_flags);
+}
+
+static unsigned long set_clean_shutdown(unsigned long flags)
+{
+	set_bit(CLEAN_SHUTDOWN, &flags);
+	return flags;
+}
+
+static unsigned long clear_clean_shutdown(unsigned long flags)
+{
+	clear_bit(CLEAN_SHUTDOWN, &flags);
+	return flags;
+}
+
+static void read_superblock_fields(struct dm_cache_metadata *cmd,
+				   struct cache_disk_superblock *disk_super)
+{
+	cmd->root = le64_to_cpu(disk_super->mapping_root);
+	cmd->hint_root = le64_to_cpu(disk_super->hint_root);
+	cmd->discard_root = le64_to_cpu(disk_super->discard_root);
+	cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
+	cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
+	cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
+	cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
+	strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
+	cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
+
+	cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
+	cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses);
+	cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
+	cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
+
+	cmd->changed = false;
+}
+
+/*
+ * The mutator updates the superblock flags.
+ */
+static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
+				     flags_mutator mutator)
+{
+	int r;
+	struct cache_disk_superblock *disk_super;
+	struct dm_block *sblock;
+
+	r = superblock_lock(cmd, &sblock);
+	if (r)
+		return r;
+
+	disk_super = dm_block_data(sblock);
+	update_flags(disk_super, mutator);
+	read_superblock_fields(cmd, disk_super);
+
+	return dm_bm_flush_and_unlock(cmd->bm, sblock);
+}
+
+static int __begin_transaction(struct dm_cache_metadata *cmd)
+{
+	int r;
+	struct cache_disk_superblock *disk_super;
+	struct dm_block *sblock;
+
+	/*
+	 * We re-read the superblock every time.  Shouldn't need to do this
+	 * really.
+	 */
+	r = superblock_read_lock(cmd, &sblock);
+	if (r)
+		return r;
+
+	disk_super = dm_block_data(sblock);
+	read_superblock_fields(cmd, disk_super);
+	dm_bm_unlock(sblock);
+
+	return 0;
+}
+
+static int __commit_transaction(struct dm_cache_metadata *cmd,
+				flags_mutator mutator)
+{
+	int r;
+	size_t metadata_len;
+	struct cache_disk_superblock *disk_super;
+	struct dm_block *sblock;
+
+	/*
+	 * We need to know if the cache_disk_superblock exceeds a 512-byte sector.
+	 */
+	BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
+
+	r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
+			    &cmd->discard_root);
+	if (r)
+		return r;
+
+	r = dm_tm_pre_commit(cmd->tm);
+	if (r < 0)
+		return r;
+
+	r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
+	if (r < 0)
+		return r;
+
+	r = superblock_lock(cmd, &sblock);
+	if (r)
+		return r;
+
+	disk_super = dm_block_data(sblock);
+
+	if (mutator)
+		update_flags(disk_super, mutator);
+
+	disk_super->mapping_root = cpu_to_le64(cmd->root);
+	disk_super->hint_root = cpu_to_le64(cmd->hint_root);
+	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
+	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
+	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
+	disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
+	strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
+
+	disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
+	disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
+	disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
+	disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
+
+	r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
+			    metadata_len);
+	if (r < 0) {
+		dm_bm_unlock(sblock);
+		return r;
+	}
+
+	return dm_tm_commit(cmd->tm, sblock);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The mappings are held in a dm-array that has 64-bit values stored in
+ * little-endian format.  The index is the cblock, the high 48bits of the
+ * value are the oblock and the low 16 bit the flags.
+ */
+#define FLAGS_MASK ((1 << 16) - 1)
+
+static __le64 pack_value(dm_oblock_t block, unsigned flags)
+{
+	uint64_t value = from_oblock(block);
+	value <<= 16;
+	value = value | (flags & FLAGS_MASK);
+	return cpu_to_le64(value);
+}
+
+static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
+{
+	uint64_t value = le64_to_cpu(value_le);
+	uint64_t b = value >> 16;
+	*block = to_oblock(b);
+	*flags = value & FLAGS_MASK;
+}
+
+/*----------------------------------------------------------------*/
+
+struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
+						 sector_t data_block_size,
+						 bool may_format_device,
+						 size_t policy_hint_size)
+{
+	int r;
+	struct dm_cache_metadata *cmd;
+
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd) {
+		DMERR("could not allocate metadata struct");
+		return NULL;
+	}
+
+	init_rwsem(&cmd->root_lock);
+	cmd->bdev = bdev;
+	cmd->data_block_size = data_block_size;
+	cmd->cache_blocks = 0;
+	cmd->policy_hint_size = policy_hint_size;
+	cmd->changed = true;
+
+	r = __create_persistent_data_objects(cmd, may_format_device);
+	if (r) {
+		kfree(cmd);
+		return ERR_PTR(r);
+	}
+
+	r = __begin_transaction_flags(cmd, clear_clean_shutdown);
+	if (r < 0) {
+		dm_cache_metadata_close(cmd);
+		return ERR_PTR(r);
+	}
+
+	return cmd;
+}
+
+void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
+{
+	__destroy_persistent_data_objects(cmd);
+	kfree(cmd);
+}
+
+int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
+{
+	int r;
+	__le64 null_mapping = pack_value(0, 0);
+
+	down_write(&cmd->root_lock);
+	__dm_bless_for_disk(&null_mapping);
+	r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
+			    from_cblock(new_cache_size),
+			    &null_mapping, &cmd->root);
+	if (!r)
+		cmd->cache_blocks = new_cache_size;
+	cmd->changed = true;
+	up_write(&cmd->root_lock);
+
+	return r;
+}
+
+int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
+				   sector_t discard_block_size,
+				   dm_dblock_t new_nr_entries)
+{
+	int r;
+
+	down_write(&cmd->root_lock);
+	r = dm_bitset_resize(&cmd->discard_info,
+			     cmd->discard_root,
+			     from_dblock(cmd->discard_nr_blocks),
+			     from_dblock(new_nr_entries),
+			     false, &cmd->discard_root);
+	if (!r) {
+		cmd->discard_block_size = discard_block_size;
+		cmd->discard_nr_blocks = new_nr_entries;
+	}
+
+	cmd->changed = true;
+	up_write(&cmd->root_lock);
+
+	return r;
+}
+
+static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
+{
+	return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
+				 from_dblock(b), &cmd->discard_root);
+}
+
+static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
+{
+	return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
+				   from_dblock(b), &cmd->discard_root);
+}
+
+static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
+			  bool *is_discarded)
+{
+	return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
+				  from_dblock(b), &cmd->discard_root,
+				  is_discarded);
+}
+
+static int __discard(struct dm_cache_metadata *cmd,
+		     dm_dblock_t dblock, bool discard)
+{
+	int r;
+
+	r = (discard ? __set_discard : __clear_discard)(cmd, dblock);
+	if (r)
+		return r;
+
+	cmd->changed = true;
+	return 0;
+}
+
+int dm_cache_set_discard(struct dm_cache_metadata *cmd,
+			 dm_dblock_t dblock, bool discard)
+{
+	int r;
+
+	down_write(&cmd->root_lock);
+	r = __discard(cmd, dblock, discard);
+	up_write(&cmd->root_lock);
+
+	return r;
+}
+
+static int __load_discards(struct dm_cache_metadata *cmd,
+			   load_discard_fn fn, void *context)
+{
+	int r = 0;
+	dm_block_t b;
+	bool discard;
+
+	for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
+		dm_dblock_t dblock = to_dblock(b);
+
+		if (cmd->clean_when_opened) {
+			r = __is_discarded(cmd, dblock, &discard);
+			if (r)
+				return r;
+		} else
+			discard = false;
+
+		r = fn(context, cmd->discard_block_size, dblock, discard);
+		if (r)
+			break;
+	}
+
+	return r;
+}
+
+int dm_cache_load_discards(struct dm_cache_metadata *cmd,
+			   load_discard_fn fn, void *context)
+{
+	int r;
+
+	down_read(&cmd->root_lock);
+	r = __load_discards(cmd, fn, context);
+	up_read(&cmd->root_lock);
+
+	return r;
+}
+
+dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd)
+{
+	dm_cblock_t r;
+
+	down_read(&cmd->root_lock);
+	r = cmd->cache_blocks;
+	up_read(&cmd->root_lock);
+
+	return r;
+}
+
+static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
+{
+	int r;
+	__le64 value = pack_value(0, 0);
+
+	__dm_bless_for_disk(&value);
+	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
+			       &value, &cmd->root);
+	if (r)
+		return r;
+
+	cmd->changed = true;
+	return 0;
+}
+
+int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock)
+{
+	int r;
+
+	down_write(&cmd->root_lock);
+	r = __remove(cmd, cblock);
+	up_write(&cmd->root_lock);
+
+	return r;
+}
+
+static int __insert(struct dm_cache_metadata *cmd,
+		    dm_cblock_t cblock, dm_oblock_t oblock)
+{
+	int r;
+	__le64 value = pack_value(oblock, M_VALID);
+	__dm_bless_for_disk(&value);
+
+	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
+			       &value, &cmd->root);
+	if (r)
+		return r;
+
+	cmd->changed = true;
+	return 0;
+}
+
+int dm_cache_insert_mapping(struct dm_cache_metadata *cmd,
+			    dm_cblock_t cblock, dm_oblock_t oblock)
+{
+	int r;
+
+	down_write(&cmd->root_lock);
+	r = __insert(cmd, cblock, oblock);
+	up_write(&cmd->root_lock);
+
+	return r;
+}
+
+struct thunk {
+	load_mapping_fn fn;
+	void *context;
+
+	struct dm_cache_metadata *cmd;
+	bool respect_dirty_flags;
+	bool hints_valid;
+};
+
+static bool hints_array_initialized(struct dm_cache_metadata *cmd)
+{
+	return cmd->hint_root && cmd->policy_hint_size;
+}
+
+static bool hints_array_available(struct dm_cache_metadata *cmd,
+				  const char *policy_name)
+{
+	bool policy_names_match = !strncmp(cmd->policy_name, policy_name,
+					   sizeof(cmd->policy_name));
+
+	return cmd->clean_when_opened && policy_names_match &&
+		hints_array_initialized(cmd);
+}
+
+static int __load_mapping(void *context, uint64_t cblock, void *leaf)
+{
+	int r = 0;
+	bool dirty;
+	__le64 value;
+	__le32 hint_value = 0;
+	dm_oblock_t oblock;
+	unsigned flags;
+	struct thunk *thunk = context;
+	struct dm_cache_metadata *cmd = thunk->cmd;
+
+	memcpy(&value, leaf, sizeof(value));
+	unpack_value(value, &oblock, &flags);
+
+	if (flags & M_VALID) {
+		if (thunk->hints_valid) {
+			r = dm_array_get_value(&cmd->hint_info, cmd->hint_root,
+					       cblock, &hint_value);
+			if (r && r != -ENODATA)
+				return r;
+		}
+
+		dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true;
+		r = thunk->fn(thunk->context, oblock, to_cblock(cblock),
+			      dirty, le32_to_cpu(hint_value), thunk->hints_valid);
+	}
+
+	return r;
+}
+
+static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_name,
+			   load_mapping_fn fn, void *context)
+{
+	struct thunk thunk;
+
+	thunk.fn = fn;
+	thunk.context = context;
+
+	thunk.cmd = cmd;
+	thunk.respect_dirty_flags = cmd->clean_when_opened;
+	thunk.hints_valid = hints_array_available(cmd, policy_name);
+
+	return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
+}
+
+int dm_cache_load_mappings(struct dm_cache_metadata *cmd, const char *policy_name,
+			   load_mapping_fn fn, void *context)
+{
+	int r;
+
+	down_read(&cmd->root_lock);
+	r = __load_mappings(cmd, policy_name, fn, context);
+	up_read(&cmd->root_lock);
+
+	return r;
+}
+
+static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
+{
+	int r = 0;
+	__le64 value;
+	dm_oblock_t oblock;
+	unsigned flags;
+
+	memcpy(&value, leaf, sizeof(value));
+	unpack_value(value, &oblock, &flags);
+
+	return r;
+}
+
+static int __dump_mappings(struct dm_cache_metadata *cmd)
+{
+	return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL);
+}
+
+void dm_cache_dump(struct dm_cache_metadata *cmd)
+{
+	down_read(&cmd->root_lock);
+	__dump_mappings(cmd);
+	up_read(&cmd->root_lock);
+}
+
+int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd)
+{
+	int r;
+
+	down_read(&cmd->root_lock);
+	r = cmd->changed;
+	up_read(&cmd->root_lock);
+
+	return r;
+}
+
+static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty)
+{
+	int r;
+	unsigned flags;
+	dm_oblock_t oblock;
+	__le64 value;
+
+	r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value);
+	if (r)
+		return r;
+
+	unpack_value(value, &oblock, &flags);
+
+	if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty))
+		/* nothing to be done */
+		return 0;
+
+	value = pack_value(oblock, flags | (dirty ? M_DIRTY : 0));
+	__dm_bless_for_disk(&value);
+
+	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
+			       &value, &cmd->root);
+	if (r)
+		return r;
+
+	cmd->changed = true;
+	return 0;
+
+}
+
+int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
+		       dm_cblock_t cblock, bool dirty)
+{
+	int r;
+
+	down_write(&cmd->root_lock);
+	r = __dirty(cmd, cblock, dirty);
+	up_write(&cmd->root_lock);
+
+	return r;
+}
+
+void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
+				 struct dm_cache_statistics *stats)
+{
+	down_read(&cmd->root_lock);
+	memcpy(stats, &cmd->stats, sizeof(*stats));
+	up_read(&cmd->root_lock);
+}
+
+void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
+				 struct dm_cache_statistics *stats)
+{
+	down_write(&cmd->root_lock);
+	memcpy(&cmd->stats, stats, sizeof(*stats));
+	up_write(&cmd->root_lock);
+}
+
+int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
+{
+	int r;
+	flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
+				 clear_clean_shutdown);
+
+	down_write(&cmd->root_lock);
+	r = __commit_transaction(cmd, mutator);
+	if (r)
+		goto out;
+
+	r = __begin_transaction(cmd);
+
+out:
+	up_write(&cmd->root_lock);
+	return r;
+}
+
+int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
+					   dm_block_t *result)
+{
+	int r = -EINVAL;
+
+	down_read(&cmd->root_lock);
+	r = dm_sm_get_nr_free(cmd->metadata_sm, result);
+	up_read(&cmd->root_lock);
+
+	return r;
+}
+
+int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
+				   dm_block_t *result)
+{
+	int r = -EINVAL;
+
+	down_read(&cmd->root_lock);
+	r = dm_sm_get_nr_blocks(cmd->metadata_sm, result);
+	up_read(&cmd->root_lock);
+
+	return r;
+}
+
+/*----------------------------------------------------------------*/
+
+static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
+{
+	int r;
+	__le32 value;
+	size_t hint_size;
+	const char *policy_name = dm_cache_policy_get_name(policy);
+
+	if (!policy_name[0] ||
+	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
+		return -EINVAL;
+
+	if (strcmp(cmd->policy_name, policy_name)) {
+		strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
+
+		hint_size = dm_cache_policy_get_hint_size(policy);
+		if (!hint_size)
+			return 0; /* short-circuit hints initialization */
+		cmd->policy_hint_size = hint_size;
+
+		if (cmd->hint_root) {
+			r = dm_array_del(&cmd->hint_info, cmd->hint_root);
+			if (r)
+				return r;
+		}
+
+		r = dm_array_empty(&cmd->hint_info, &cmd->hint_root);
+		if (r)
+			return r;
+
+		value = cpu_to_le32(0);
+		__dm_bless_for_disk(&value);
+		r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0,
+				    from_cblock(cmd->cache_blocks),
+				    &value, &cmd->hint_root);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
+{
+	int r;
+
+	down_write(&cmd->root_lock);
+	r = begin_hints(cmd, policy);
+	up_write(&cmd->root_lock);
+
+	return r;
+}
+
+static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
+		     uint32_t hint)
+{
+	int r;
+	__le32 value = cpu_to_le32(hint);
+	__dm_bless_for_disk(&value);
+
+	r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
+			       from_cblock(cblock), &value, &cmd->hint_root);
+	cmd->changed = true;
+
+	return r;
+}
+
+int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
+		       uint32_t hint)
+{
+	int r;
+
+	if (!hints_array_initialized(cmd))
+		return 0;
+
+	down_write(&cmd->root_lock);
+	r = save_hint(cmd, cblock, hint);
+	up_write(&cmd->root_lock);
+
+	return r;
+}
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
new file mode 100644
index 000000000000..135864ea0eee
--- /dev/null
+++ b/drivers/md/dm-cache-metadata.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_CACHE_METADATA_H
+#define DM_CACHE_METADATA_H
+
+#include "dm-cache-block-types.h"
+#include "dm-cache-policy-internal.h"
+
+/*----------------------------------------------------------------*/
+
+#define DM_CACHE_METADATA_BLOCK_SIZE 4096
+
+/* FIXME: remove this restriction */
+/*
+ * The metadata device is currently limited in size.
+ *
+ * We have one block of index, which can hold 255 index entries.  Each
+ * index entry contains allocation info about 16k metadata blocks.
+ */
+#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
+
+/*
+ * A metadata device larger than 16GB triggers a warning.
+ */
+#define DM_CACHE_METADATA_MAX_SECTORS_WARNING (16 * (1024 * 1024 * 1024 >> SECTOR_SHIFT))
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Ext[234]-style compat feature flags.
+ *
+ * A new feature which old metadata will still be compatible with should
+ * define a DM_CACHE_FEATURE_COMPAT_* flag (rarely useful).
+ *
+ * A new feature that is not compatible with old code should define a
+ * DM_CACHE_FEATURE_INCOMPAT_* flag and guard the relevant code with
+ * that flag.
+ *
+ * A new feature that is not compatible with old code accessing the
+ * metadata RDWR should define a DM_CACHE_FEATURE_RO_COMPAT_* flag and
+ * guard the relevant code with that flag.
+ *
+ * As these various flags are defined they should be added to the
+ * following masks.
+ */
+#define DM_CACHE_FEATURE_COMPAT_SUPP	  0UL
+#define DM_CACHE_FEATURE_COMPAT_RO_SUPP	  0UL
+#define DM_CACHE_FEATURE_INCOMPAT_SUPP	  0UL
+
+/*
+ * Reopens or creates a new, empty metadata volume.
+ * Returns an ERR_PTR on failure.
+ */
+struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
+						 sector_t data_block_size,
+						 bool may_format_device,
+						 size_t policy_hint_size);
+
+void dm_cache_metadata_close(struct dm_cache_metadata *cmd);
+
+/*
+ * The metadata needs to know how many cache blocks there are.  We don't
+ * care about the origin, assuming the core target is giving us valid
+ * origin blocks to map to.
+ */
+int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size);
+dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd);
+
+int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
+				   sector_t discard_block_size,
+				   dm_dblock_t new_nr_entries);
+
+typedef int (*load_discard_fn)(void *context, sector_t discard_block_size,
+			       dm_dblock_t dblock, bool discarded);
+int dm_cache_load_discards(struct dm_cache_metadata *cmd,
+			   load_discard_fn fn, void *context);
+
+int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_dblock_t dblock, bool discard);
+
+int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock);
+int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock, dm_oblock_t oblock);
+int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd);
+
+typedef int (*load_mapping_fn)(void *context, dm_oblock_t oblock,
+			       dm_cblock_t cblock, bool dirty,
+			       uint32_t hint, bool hint_valid);
+int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
+			   const char *policy_name,
+			   load_mapping_fn fn,
+			   void *context);
+
+int dm_cache_set_dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty);
+
+struct dm_cache_statistics {
+	uint32_t read_hits;
+	uint32_t read_misses;
+	uint32_t write_hits;
+	uint32_t write_misses;
+};
+
+void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
+				 struct dm_cache_statistics *stats);
+void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
+				 struct dm_cache_statistics *stats);
+
+int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown);
+
+int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd,
+					   dm_block_t *result);
+
+int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
+				   dm_block_t *result);
+
+void dm_cache_dump(struct dm_cache_metadata *cmd);
+
+/*
+ * The policy is invited to save a 32bit hint value for every cblock (eg,
+ * for a hit count).  These are stored against the policy name.  If
+ * policies are changed, then hints will be lost.  If the machine crashes,
+ * hints will be lost.
+ *
+ * The hints are indexed by the cblock, but many policies will not
+ * neccessarily have a fast way of accessing efficiently via cblock.  So
+ * rather than querying the policy for each cblock, we let it walk its data
+ * structures and fill in the hints in whatever order it wishes.
+ */
+
+int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *p);
+
+/*
+ * requests hints for every cblock and stores in the metadata device.
+ */
+int dm_cache_save_hint(struct dm_cache_metadata *cmd,
+		       dm_cblock_t cblock, uint32_t hint);
+
+/*----------------------------------------------------------------*/
+
+#endif /* DM_CACHE_METADATA_H */
diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c
new file mode 100644
index 000000000000..cc05d70b3cb8
--- /dev/null
+++ b/drivers/md/dm-cache-policy-cleaner.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright (C) 2012 Red Hat. All rights reserved.
+ *
+ * writeback cache policy supporting flushing out dirty cache blocks.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-cache-policy.h"
+#include "dm.h"
+
+#include <linux/hash.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/*----------------------------------------------------------------*/
+
+#define DM_MSG_PREFIX "cache cleaner"
+#define CLEANER_VERSION "1.0.0"
+
+/* Cache entry struct. */
+struct wb_cache_entry {
+	struct list_head list;
+	struct hlist_node hlist;
+
+	dm_oblock_t oblock;
+	dm_cblock_t cblock;
+	bool dirty:1;
+	bool pending:1;
+};
+
+struct hash {
+	struct hlist_head *table;
+	dm_block_t hash_bits;
+	unsigned nr_buckets;
+};
+
+struct policy {
+	struct dm_cache_policy policy;
+	spinlock_t lock;
+
+	struct list_head free;
+	struct list_head clean;
+	struct list_head clean_pending;
+	struct list_head dirty;
+
+	/*
+	 * We know exactly how many cblocks will be needed,
+	 * so we can allocate them up front.
+	 */
+	dm_cblock_t cache_size, nr_cblocks_allocated;
+	struct wb_cache_entry *cblocks;
+	struct hash chash;
+};
+
+/*----------------------------------------------------------------------------*/
+
+/*
+ * Low-level functions.
+ */
+static unsigned next_power(unsigned n, unsigned min)
+{
+	return roundup_pow_of_two(max(n, min));
+}
+
+static struct policy *to_policy(struct dm_cache_policy *p)
+{
+	return container_of(p, struct policy, policy);
+}
+
+static struct list_head *list_pop(struct list_head *q)
+{
+	struct list_head *r = q->next;
+
+	list_del(r);
+
+	return r;
+}
+
+/*----------------------------------------------------------------------------*/
+
+/* Allocate/free various resources. */
+static int alloc_hash(struct hash *hash, unsigned elts)
+{
+	hash->nr_buckets = next_power(elts >> 4, 16);
+	hash->hash_bits = ffs(hash->nr_buckets) - 1;
+	hash->table = vzalloc(sizeof(*hash->table) * hash->nr_buckets);
+
+	return hash->table ? 0 : -ENOMEM;
+}
+
+static void free_hash(struct hash *hash)
+{
+	vfree(hash->table);
+}
+
+static int alloc_cache_blocks_with_hash(struct policy *p, dm_cblock_t cache_size)
+{
+	int r = -ENOMEM;
+
+	p->cblocks = vzalloc(sizeof(*p->cblocks) * from_cblock(cache_size));
+	if (p->cblocks) {
+		unsigned u = from_cblock(cache_size);
+
+		while (u--)
+			list_add(&p->cblocks[u].list, &p->free);
+
+		p->nr_cblocks_allocated = 0;
+
+		/* Cache entries hash. */
+		r = alloc_hash(&p->chash, from_cblock(cache_size));
+		if (r)
+			vfree(p->cblocks);
+	}
+
+	return r;
+}
+
+static void free_cache_blocks_and_hash(struct policy *p)
+{
+	free_hash(&p->chash);
+	vfree(p->cblocks);
+}
+
+static struct wb_cache_entry *alloc_cache_entry(struct policy *p)
+{
+	struct wb_cache_entry *e;
+
+	BUG_ON(from_cblock(p->nr_cblocks_allocated) >= from_cblock(p->cache_size));
+
+	e = list_entry(list_pop(&p->free), struct wb_cache_entry, list);
+	p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) + 1);
+
+	return e;
+}
+
+/*----------------------------------------------------------------------------*/
+
+/* Hash functions (lookup, insert, remove). */
+static struct wb_cache_entry *lookup_cache_entry(struct policy *p, dm_oblock_t oblock)
+{
+	struct hash *hash = &p->chash;
+	unsigned h = hash_64(from_oblock(oblock), hash->hash_bits);
+	struct wb_cache_entry *cur;
+	struct hlist_head *bucket = &hash->table[h];
+
+	hlist_for_each_entry(cur, bucket, hlist) {
+		if (cur->oblock == oblock) {
+			/* Move upfront bucket for faster access. */
+			hlist_del(&cur->hlist);
+			hlist_add_head(&cur->hlist, bucket);
+			return cur;
+		}
+	}
+
+	return NULL;
+}
+
+static void insert_cache_hash_entry(struct policy *p, struct wb_cache_entry *e)
+{
+	unsigned h = hash_64(from_oblock(e->oblock), p->chash.hash_bits);
+
+	hlist_add_head(&e->hlist, &p->chash.table[h]);
+}
+
+static void remove_cache_hash_entry(struct wb_cache_entry *e)
+{
+	hlist_del(&e->hlist);
+}
+
+/* Public interface (see dm-cache-policy.h */
+static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock,
+		  bool can_block, bool can_migrate, bool discarded_oblock,
+		  struct bio *bio, struct policy_result *result)
+{
+	struct policy *p = to_policy(pe);
+	struct wb_cache_entry *e;
+	unsigned long flags;
+
+	result->op = POLICY_MISS;
+
+	if (can_block)
+		spin_lock_irqsave(&p->lock, flags);
+
+	else if (!spin_trylock_irqsave(&p->lock, flags))
+		return -EWOULDBLOCK;
+
+	e = lookup_cache_entry(p, oblock);
+	if (e) {
+		result->op = POLICY_HIT;
+		result->cblock = e->cblock;
+
+	}
+
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	return 0;
+}
+
+static int wb_lookup(struct dm_cache_policy *pe, dm_oblock_t oblock, dm_cblock_t *cblock)
+{
+	int r;
+	struct policy *p = to_policy(pe);
+	struct wb_cache_entry *e;
+	unsigned long flags;
+
+	if (!spin_trylock_irqsave(&p->lock, flags))
+		return -EWOULDBLOCK;
+
+	e = lookup_cache_entry(p, oblock);
+	if (e) {
+		*cblock = e->cblock;
+		r = 0;
+
+	} else
+		r = -ENOENT;
+
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	return r;
+}
+
+static void __set_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock, bool set)
+{
+	struct policy *p = to_policy(pe);
+	struct wb_cache_entry *e;
+
+	e = lookup_cache_entry(p, oblock);
+	BUG_ON(!e);
+
+	if (set) {
+		if (!e->dirty) {
+			e->dirty = true;
+			list_move(&e->list, &p->dirty);
+		}
+
+	} else {
+		if (e->dirty) {
+			e->pending = false;
+			e->dirty = false;
+			list_move(&e->list, &p->clean);
+		}
+	}
+}
+
+static void wb_set_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock)
+{
+	struct policy *p = to_policy(pe);
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->lock, flags);
+	__set_clear_dirty(pe, oblock, true);
+	spin_unlock_irqrestore(&p->lock, flags);
+}
+
+static void wb_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock)
+{
+	struct policy *p = to_policy(pe);
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->lock, flags);
+	__set_clear_dirty(pe, oblock, false);
+	spin_unlock_irqrestore(&p->lock, flags);
+}
+
+static void add_cache_entry(struct policy *p, struct wb_cache_entry *e)
+{
+	insert_cache_hash_entry(p, e);
+	if (e->dirty)
+		list_add(&e->list, &p->dirty);
+	else
+		list_add(&e->list, &p->clean);
+}
+
+static int wb_load_mapping(struct dm_cache_policy *pe,
+			   dm_oblock_t oblock, dm_cblock_t cblock,
+			   uint32_t hint, bool hint_valid)
+{
+	int r;
+	struct policy *p = to_policy(pe);
+	struct wb_cache_entry *e = alloc_cache_entry(p);
+
+	if (e) {
+		e->cblock = cblock;
+		e->oblock = oblock;
+		e->dirty = false; /* blocks default to clean */
+		add_cache_entry(p, e);
+		r = 0;
+
+	} else
+		r = -ENOMEM;
+
+	return r;
+}
+
+static void wb_destroy(struct dm_cache_policy *pe)
+{
+	struct policy *p = to_policy(pe);
+
+	free_cache_blocks_and_hash(p);
+	kfree(p);
+}
+
+static struct wb_cache_entry *__wb_force_remove_mapping(struct policy *p, dm_oblock_t oblock)
+{
+	struct wb_cache_entry *r = lookup_cache_entry(p, oblock);
+
+	BUG_ON(!r);
+
+	remove_cache_hash_entry(r);
+	list_del(&r->list);
+
+	return r;
+}
+
+static void wb_remove_mapping(struct dm_cache_policy *pe, dm_oblock_t oblock)
+{
+	struct policy *p = to_policy(pe);
+	struct wb_cache_entry *e;
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->lock, flags);
+	e = __wb_force_remove_mapping(p, oblock);
+	list_add_tail(&e->list, &p->free);
+	BUG_ON(!from_cblock(p->nr_cblocks_allocated));
+	p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) - 1);
+	spin_unlock_irqrestore(&p->lock, flags);
+}
+
+static void wb_force_mapping(struct dm_cache_policy *pe,
+				dm_oblock_t current_oblock, dm_oblock_t oblock)
+{
+	struct policy *p = to_policy(pe);
+	struct wb_cache_entry *e;
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->lock, flags);
+	e = __wb_force_remove_mapping(p, current_oblock);
+	e->oblock = oblock;
+	add_cache_entry(p, e);
+	spin_unlock_irqrestore(&p->lock, flags);
+}
+
+static struct wb_cache_entry *get_next_dirty_entry(struct policy *p)
+{
+	struct list_head *l;
+	struct wb_cache_entry *r;
+
+	if (list_empty(&p->dirty))
+		return NULL;
+
+	l = list_pop(&p->dirty);
+	r = container_of(l, struct wb_cache_entry, list);
+	list_add(l, &p->clean_pending);
+
+	return r;
+}
+
+static int wb_writeback_work(struct dm_cache_policy *pe,
+			     dm_oblock_t *oblock,
+			     dm_cblock_t *cblock)
+{
+	int r = -ENOENT;
+	struct policy *p = to_policy(pe);
+	struct wb_cache_entry *e;
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->lock, flags);
+
+	e = get_next_dirty_entry(p);
+	if (e) {
+		*oblock = e->oblock;
+		*cblock = e->cblock;
+		r = 0;
+	}
+
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	return r;
+}
+
+static dm_cblock_t wb_residency(struct dm_cache_policy *pe)
+{
+	return to_policy(pe)->nr_cblocks_allocated;
+}
+
+/* Init the policy plugin interface function pointers. */
+static void init_policy_functions(struct policy *p)
+{
+	p->policy.destroy = wb_destroy;
+	p->policy.map = wb_map;
+	p->policy.lookup = wb_lookup;
+	p->policy.set_dirty = wb_set_dirty;
+	p->policy.clear_dirty = wb_clear_dirty;
+	p->policy.load_mapping = wb_load_mapping;
+	p->policy.walk_mappings = NULL;
+	p->policy.remove_mapping = wb_remove_mapping;
+	p->policy.writeback_work = wb_writeback_work;
+	p->policy.force_mapping = wb_force_mapping;
+	p->policy.residency = wb_residency;
+	p->policy.tick = NULL;
+}
+
+static struct dm_cache_policy *wb_create(dm_cblock_t cache_size,
+					 sector_t origin_size,
+					 sector_t cache_block_size)
+{
+	int r;
+	struct policy *p = kzalloc(sizeof(*p), GFP_KERNEL);
+
+	if (!p)
+		return NULL;
+
+	init_policy_functions(p);
+	INIT_LIST_HEAD(&p->free);
+	INIT_LIST_HEAD(&p->clean);
+	INIT_LIST_HEAD(&p->clean_pending);
+	INIT_LIST_HEAD(&p->dirty);
+
+	p->cache_size = cache_size;
+	spin_lock_init(&p->lock);
+
+	/* Allocate cache entry structs and add them to free list. */
+	r = alloc_cache_blocks_with_hash(p, cache_size);
+	if (!r)
+		return &p->policy;
+
+	kfree(p);
+
+	return NULL;
+}
+/*----------------------------------------------------------------------------*/
+
+static struct dm_cache_policy_type wb_policy_type = {
+	.name = "cleaner",
+	.hint_size = 0,
+	.owner = THIS_MODULE,
+	.create = wb_create
+};
+
+static int __init wb_init(void)
+{
+	int r = dm_cache_policy_register(&wb_policy_type);
+
+	if (r < 0)
+		DMERR("register failed %d", r);
+	else
+		DMINFO("version " CLEANER_VERSION " loaded");
+
+	return r;
+}
+
+static void __exit wb_exit(void)
+{
+	dm_cache_policy_unregister(&wb_policy_type);
+}
+
+module_init(wb_init);
+module_exit(wb_exit);
+
+MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("cleaner cache policy");
diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h
new file mode 100644
index 000000000000..52a75beeced5
--- /dev/null
+++ b/drivers/md/dm-cache-policy-internal.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2012 Red Hat. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_CACHE_POLICY_INTERNAL_H
+#define DM_CACHE_POLICY_INTERNAL_H
+
+#include "dm-cache-policy.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Little inline functions that simplify calling the policy methods.
+ */
+static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock,
+			     bool can_block, bool can_migrate, bool discarded_oblock,
+			     struct bio *bio, struct policy_result *result)
+{
+	return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, result);
+}
+
+static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
+{
+	BUG_ON(!p->lookup);
+	return p->lookup(p, oblock, cblock);
+}
+
+static inline void policy_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
+{
+	if (p->set_dirty)
+		p->set_dirty(p, oblock);
+}
+
+static inline void policy_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
+{
+	if (p->clear_dirty)
+		p->clear_dirty(p, oblock);
+}
+
+static inline int policy_load_mapping(struct dm_cache_policy *p,
+				      dm_oblock_t oblock, dm_cblock_t cblock,
+				      uint32_t hint, bool hint_valid)
+{
+	return p->load_mapping(p, oblock, cblock, hint, hint_valid);
+}
+
+static inline int policy_walk_mappings(struct dm_cache_policy *p,
+				      policy_walk_fn fn, void *context)
+{
+	return p->walk_mappings ? p->walk_mappings(p, fn, context) : 0;
+}
+
+static inline int policy_writeback_work(struct dm_cache_policy *p,
+					dm_oblock_t *oblock,
+					dm_cblock_t *cblock)
+{
+	return p->writeback_work ? p->writeback_work(p, oblock, cblock) : -ENOENT;
+}
+
+static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
+{
+	return p->remove_mapping(p, oblock);
+}
+
+static inline void policy_force_mapping(struct dm_cache_policy *p,
+					dm_oblock_t current_oblock, dm_oblock_t new_oblock)
+{
+	return p->force_mapping(p, current_oblock, new_oblock);
+}
+
+static inline dm_cblock_t policy_residency(struct dm_cache_policy *p)
+{
+	return p->residency(p);
+}
+
+static inline void policy_tick(struct dm_cache_policy *p)
+{
+	if (p->tick)
+		return p->tick(p);
+}
+
+static inline int policy_emit_config_values(struct dm_cache_policy *p, char *result, unsigned maxlen)
+{
+	ssize_t sz = 0;
+	if (p->emit_config_values)
+		return p->emit_config_values(p, result, maxlen);
+
+	DMEMIT("0");
+	return 0;
+}
+
+static inline int policy_set_config_value(struct dm_cache_policy *p,
+					  const char *key, const char *value)
+{
+	return p->set_config_value ? p->set_config_value(p, key, value) : -EINVAL;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Creates a new cache policy given a policy name, a cache size, an origin size and the block size.
+ */
+struct dm_cache_policy *dm_cache_policy_create(const char *name, dm_cblock_t cache_size,
+					       sector_t origin_size, sector_t block_size);
+
+/*
+ * Destroys the policy.  This drops references to the policy module as well
+ * as calling it's destroy method.  So always use this rather than calling
+ * the policy->destroy method directly.
+ */
+void dm_cache_policy_destroy(struct dm_cache_policy *p);
+
+/*
+ * In case we've forgotten.
+ */
+const char *dm_cache_policy_get_name(struct dm_cache_policy *p);
+
+size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p);
+
+/*----------------------------------------------------------------*/
+
+#endif /* DM_CACHE_POLICY_INTERNAL_H */
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
new file mode 100644
index 000000000000..964153255076
--- /dev/null
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -0,0 +1,1195 @@
+/*
+ * Copyright (C) 2012 Red Hat. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-cache-policy.h"
+#include "dm.h"
+
+#include <linux/hash.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#define DM_MSG_PREFIX "cache-policy-mq"
+#define MQ_VERSION	"1.0.0"
+
+static struct kmem_cache *mq_entry_cache;
+
+/*----------------------------------------------------------------*/
+
+static unsigned next_power(unsigned n, unsigned min)
+{
+	return roundup_pow_of_two(max(n, min));
+}
+
+/*----------------------------------------------------------------*/
+
+static unsigned long *alloc_bitset(unsigned nr_entries)
+{
+	size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
+	return vzalloc(s);
+}
+
+static void free_bitset(unsigned long *bits)
+{
+	vfree(bits);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Large, sequential ios are probably better left on the origin device since
+ * spindles tend to have good bandwidth.
+ *
+ * The io_tracker tries to spot when the io is in one of these sequential
+ * modes.
+ *
+ * Two thresholds to switch between random and sequential io mode are defaulting
+ * as follows and can be adjusted via the constructor and message interfaces.
+ */
+#define RANDOM_THRESHOLD_DEFAULT 4
+#define SEQUENTIAL_THRESHOLD_DEFAULT 512
+
+enum io_pattern {
+	PATTERN_SEQUENTIAL,
+	PATTERN_RANDOM
+};
+
+struct io_tracker {
+	enum io_pattern pattern;
+
+	unsigned nr_seq_samples;
+	unsigned nr_rand_samples;
+	unsigned thresholds[2];
+
+	dm_oblock_t last_end_oblock;
+};
+
+static void iot_init(struct io_tracker *t,
+		     int sequential_threshold, int random_threshold)
+{
+	t->pattern = PATTERN_RANDOM;
+	t->nr_seq_samples = 0;
+	t->nr_rand_samples = 0;
+	t->last_end_oblock = 0;
+	t->thresholds[PATTERN_RANDOM] = random_threshold;
+	t->thresholds[PATTERN_SEQUENTIAL] = sequential_threshold;
+}
+
+static enum io_pattern iot_pattern(struct io_tracker *t)
+{
+	return t->pattern;
+}
+
+static void iot_update_stats(struct io_tracker *t, struct bio *bio)
+{
+	if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1)
+		t->nr_seq_samples++;
+	else {
+		/*
+		 * Just one non-sequential IO is enough to reset the
+		 * counters.
+		 */
+		if (t->nr_seq_samples) {
+			t->nr_seq_samples = 0;
+			t->nr_rand_samples = 0;
+		}
+
+		t->nr_rand_samples++;
+	}
+
+	t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1);
+}
+
+static void iot_check_for_pattern_switch(struct io_tracker *t)
+{
+	switch (t->pattern) {
+	case PATTERN_SEQUENTIAL:
+		if (t->nr_rand_samples >= t->thresholds[PATTERN_RANDOM]) {
+			t->pattern = PATTERN_RANDOM;
+			t->nr_seq_samples = t->nr_rand_samples = 0;
+		}
+		break;
+
+	case PATTERN_RANDOM:
+		if (t->nr_seq_samples >= t->thresholds[PATTERN_SEQUENTIAL]) {
+			t->pattern = PATTERN_SEQUENTIAL;
+			t->nr_seq_samples = t->nr_rand_samples = 0;
+		}
+		break;
+	}
+}
+
+static void iot_examine_bio(struct io_tracker *t, struct bio *bio)
+{
+	iot_update_stats(t, bio);
+	iot_check_for_pattern_switch(t);
+}
+
+/*----------------------------------------------------------------*/
+
+
+/*
+ * This queue is divided up into different levels.  Allowing us to push
+ * entries to the back of any of the levels.  Think of it as a partially
+ * sorted queue.
+ */
+#define NR_QUEUE_LEVELS 16u
+
+struct queue {
+	struct list_head qs[NR_QUEUE_LEVELS];
+};
+
+static void queue_init(struct queue *q)
+{
+	unsigned i;
+
+	for (i = 0; i < NR_QUEUE_LEVELS; i++)
+		INIT_LIST_HEAD(q->qs + i);
+}
+
+/*
+ * Insert an entry to the back of the given level.
+ */
+static void queue_push(struct queue *q, unsigned level, struct list_head *elt)
+{
+	list_add_tail(elt, q->qs + level);
+}
+
+static void queue_remove(struct list_head *elt)
+{
+	list_del(elt);
+}
+
+/*
+ * Shifts all regions down one level.  This has no effect on the order of
+ * the queue.
+ */
+static void queue_shift_down(struct queue *q)
+{
+	unsigned level;
+
+	for (level = 1; level < NR_QUEUE_LEVELS; level++)
+		list_splice_init(q->qs + level, q->qs + level - 1);
+}
+
+/*
+ * Gives us the oldest entry of the lowest popoulated level.  If the first
+ * level is emptied then we shift down one level.
+ */
+static struct list_head *queue_pop(struct queue *q)
+{
+	unsigned level;
+	struct list_head *r;
+
+	for (level = 0; level < NR_QUEUE_LEVELS; level++)
+		if (!list_empty(q->qs + level)) {
+			r = q->qs[level].next;
+			list_del(r);
+
+			/* have we just emptied the bottom level? */
+			if (level == 0 && list_empty(q->qs))
+				queue_shift_down(q);
+
+			return r;
+		}
+
+	return NULL;
+}
+
+static struct list_head *list_pop(struct list_head *lh)
+{
+	struct list_head *r = lh->next;
+
+	BUG_ON(!r);
+	list_del_init(r);
+
+	return r;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Describes a cache entry.  Used in both the cache and the pre_cache.
+ */
+struct entry {
+	struct hlist_node hlist;
+	struct list_head list;
+	dm_oblock_t oblock;
+	dm_cblock_t cblock;	/* valid iff in_cache */
+
+	/*
+	 * FIXME: pack these better
+	 */
+	bool in_cache:1;
+	unsigned hit_count;
+	unsigned generation;
+	unsigned tick;
+};
+
+struct mq_policy {
+	struct dm_cache_policy policy;
+
+	/* protects everything */
+	struct mutex lock;
+	dm_cblock_t cache_size;
+	struct io_tracker tracker;
+
+	/*
+	 * We maintain two queues of entries.  The cache proper contains
+	 * the currently active mappings.  Whereas the pre_cache tracks
+	 * blocks that are being hit frequently and potential candidates
+	 * for promotion to the cache.
+	 */
+	struct queue pre_cache;
+	struct queue cache;
+
+	/*
+	 * Keeps track of time, incremented by the core.  We use this to
+	 * avoid attributing multiple hits within the same tick.
+	 *
+	 * Access to tick_protected should be done with the spin lock held.
+	 * It's copied to tick at the start of the map function (within the
+	 * mutex).
+	 */
+	spinlock_t tick_lock;
+	unsigned tick_protected;
+	unsigned tick;
+
+	/*
+	 * A count of the number of times the map function has been called
+	 * and found an entry in the pre_cache or cache.  Currently used to
+	 * calculate the generation.
+	 */
+	unsigned hit_count;
+
+	/*
+	 * A generation is a longish period that is used to trigger some
+	 * book keeping effects.  eg, decrementing hit counts on entries.
+	 * This is needed to allow the cache to evolve as io patterns
+	 * change.
+	 */
+	unsigned generation;
+	unsigned generation_period; /* in lookups (will probably change) */
+
+	/*
+	 * Entries in the pre_cache whose hit count passes the promotion
+	 * threshold move to the cache proper.  Working out the correct
+	 * value for the promotion_threshold is crucial to this policy.
+	 */
+	unsigned promote_threshold;
+
+	/*
+	 * We need cache_size entries for the cache, and choose to have
+	 * cache_size entries for the pre_cache too.  One motivation for
+	 * using the same size is to make the hit counts directly
+	 * comparable between pre_cache and cache.
+	 */
+	unsigned nr_entries;
+	unsigned nr_entries_allocated;
+	struct list_head free;
+
+	/*
+	 * Cache blocks may be unallocated.  We store this info in a
+	 * bitset.
+	 */
+	unsigned long *allocation_bitset;
+	unsigned nr_cblocks_allocated;
+	unsigned find_free_nr_words;
+	unsigned find_free_last_word;
+
+	/*
+	 * The hash table allows us to quickly find an entry by origin
+	 * block.  Both pre_cache and cache entries are in here.
+	 */
+	unsigned nr_buckets;
+	dm_block_t hash_bits;
+	struct hlist_head *table;
+};
+
+/*----------------------------------------------------------------*/
+/* Free/alloc mq cache entry structures. */
+static void takeout_queue(struct list_head *lh, struct queue *q)
+{
+	unsigned level;
+
+	for (level = 0; level < NR_QUEUE_LEVELS; level++)
+		list_splice(q->qs + level, lh);
+}
+
+static void free_entries(struct mq_policy *mq)
+{
+	struct entry *e, *tmp;
+
+	takeout_queue(&mq->free, &mq->pre_cache);
+	takeout_queue(&mq->free, &mq->cache);
+
+	list_for_each_entry_safe(e, tmp, &mq->free, list)
+		kmem_cache_free(mq_entry_cache, e);
+}
+
+static int alloc_entries(struct mq_policy *mq, unsigned elts)
+{
+	unsigned u = mq->nr_entries;
+
+	INIT_LIST_HEAD(&mq->free);
+	mq->nr_entries_allocated = 0;
+
+	while (u--) {
+		struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL);
+
+		if (!e) {
+			free_entries(mq);
+			return -ENOMEM;
+		}
+
+
+		list_add(&e->list, &mq->free);
+	}
+
+	return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Simple hash table implementation.  Should replace with the standard hash
+ * table that's making its way upstream.
+ */
+static void hash_insert(struct mq_policy *mq, struct entry *e)
+{
+	unsigned h = hash_64(from_oblock(e->oblock), mq->hash_bits);
+
+	hlist_add_head(&e->hlist, mq->table + h);
+}
+
+static struct entry *hash_lookup(struct mq_policy *mq, dm_oblock_t oblock)
+{
+	unsigned h = hash_64(from_oblock(oblock), mq->hash_bits);
+	struct hlist_head *bucket = mq->table + h;
+	struct entry *e;
+
+	hlist_for_each_entry(e, bucket, hlist)
+		if (e->oblock == oblock) {
+			hlist_del(&e->hlist);
+			hlist_add_head(&e->hlist, bucket);
+			return e;
+		}
+
+	return NULL;
+}
+
+static void hash_remove(struct entry *e)
+{
+	hlist_del(&e->hlist);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Allocates a new entry structure.  The memory is allocated in one lump,
+ * so we just handing it out here.  Returns NULL if all entries have
+ * already been allocated.  Cannot fail otherwise.
+ */
+static struct entry *alloc_entry(struct mq_policy *mq)
+{
+	struct entry *e;
+
+	if (mq->nr_entries_allocated >= mq->nr_entries) {
+		BUG_ON(!list_empty(&mq->free));
+		return NULL;
+	}
+
+	e = list_entry(list_pop(&mq->free), struct entry, list);
+	INIT_LIST_HEAD(&e->list);
+	INIT_HLIST_NODE(&e->hlist);
+
+	mq->nr_entries_allocated++;
+	return e;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Mark cache blocks allocated or not in the bitset.
+ */
+static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock)
+{
+	BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
+	BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset));
+
+	set_bit(from_cblock(cblock), mq->allocation_bitset);
+	mq->nr_cblocks_allocated++;
+}
+
+static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock)
+{
+	BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
+	BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset));
+
+	clear_bit(from_cblock(cblock), mq->allocation_bitset);
+	mq->nr_cblocks_allocated--;
+}
+
+static bool any_free_cblocks(struct mq_policy *mq)
+{
+	return mq->nr_cblocks_allocated < from_cblock(mq->cache_size);
+}
+
+/*
+ * Fills result out with a cache block that isn't in use, or return
+ * -ENOSPC.  This does _not_ mark the cblock as allocated, the caller is
+ * reponsible for that.
+ */
+static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end,
+			      dm_cblock_t *result, unsigned *last_word)
+{
+	int r = -ENOSPC;
+	unsigned w;
+
+	for (w = begin; w < end; w++) {
+		/*
+		 * ffz is undefined if no zero exists
+		 */
+		if (mq->allocation_bitset[w] != ~0UL) {
+			*last_word = w;
+			*result = to_cblock((w * BITS_PER_LONG) + ffz(mq->allocation_bitset[w]));
+			if (from_cblock(*result) < from_cblock(mq->cache_size))
+				r = 0;
+
+			break;
+		}
+	}
+
+	return r;
+}
+
+static int find_free_cblock(struct mq_policy *mq, dm_cblock_t *result)
+{
+	int r;
+
+	if (!any_free_cblocks(mq))
+		return -ENOSPC;
+
+	r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word);
+	if (r == -ENOSPC && mq->find_free_last_word)
+		r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word);
+
+	return r;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Now we get to the meat of the policy.  This section deals with deciding
+ * when to to add entries to the pre_cache and cache, and move between
+ * them.
+ */
+
+/*
+ * The queue level is based on the log2 of the hit count.
+ */
+static unsigned queue_level(struct entry *e)
+{
+	return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u);
+}
+
+/*
+ * Inserts the entry into the pre_cache or the cache.  Ensures the cache
+ * block is marked as allocated if necc.  Inserts into the hash table.  Sets the
+ * tick which records when the entry was last moved about.
+ */
+static void push(struct mq_policy *mq, struct entry *e)
+{
+	e->tick = mq->tick;
+	hash_insert(mq, e);
+
+	if (e->in_cache) {
+		alloc_cblock(mq, e->cblock);
+		queue_push(&mq->cache, queue_level(e), &e->list);
+	} else
+		queue_push(&mq->pre_cache, queue_level(e), &e->list);
+}
+
+/*
+ * Removes an entry from pre_cache or cache.  Removes from the hash table.
+ * Frees off the cache block if necc.
+ */
+static void del(struct mq_policy *mq, struct entry *e)
+{
+	queue_remove(&e->list);
+	hash_remove(e);
+	if (e->in_cache)
+		free_cblock(mq, e->cblock);
+}
+
+/*
+ * Like del, except it removes the first entry in the queue (ie. the least
+ * recently used).
+ */
+static struct entry *pop(struct mq_policy *mq, struct queue *q)
+{
+	struct entry *e = container_of(queue_pop(q), struct entry, list);
+
+	if (e) {
+		hash_remove(e);
+
+		if (e->in_cache)
+			free_cblock(mq, e->cblock);
+	}
+
+	return e;
+}
+
+/*
+ * Has this entry already been updated?
+ */
+static bool updated_this_tick(struct mq_policy *mq, struct entry *e)
+{
+	return mq->tick == e->tick;
+}
+
+/*
+ * The promotion threshold is adjusted every generation.  As are the counts
+ * of the entries.
+ *
+ * At the moment the threshold is taken by averaging the hit counts of some
+ * of the entries in the cache (the first 20 entries of the first level).
+ *
+ * We can be much cleverer than this though.  For example, each promotion
+ * could bump up the threshold helping to prevent churn.  Much more to do
+ * here.
+ */
+
+#define MAX_TO_AVERAGE 20
+
+static void check_generation(struct mq_policy *mq)
+{
+	unsigned total = 0, nr = 0, count = 0, level;
+	struct list_head *head;
+	struct entry *e;
+
+	if ((mq->hit_count >= mq->generation_period) &&
+	    (mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) {
+
+		mq->hit_count = 0;
+		mq->generation++;
+
+		for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) {
+			head = mq->cache.qs + level;
+			list_for_each_entry(e, head, list) {
+				nr++;
+				total += e->hit_count;
+
+				if (++count >= MAX_TO_AVERAGE)
+					break;
+			}
+		}
+
+		mq->promote_threshold = nr ? total / nr : 1;
+		if (mq->promote_threshold * nr < total)
+			mq->promote_threshold++;
+	}
+}
+
+/*
+ * Whenever we use an entry we bump up it's hit counter, and push it to the
+ * back to it's current level.
+ */
+static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
+{
+	if (updated_this_tick(mq, e))
+		return;
+
+	e->hit_count++;
+	mq->hit_count++;
+	check_generation(mq);
+
+	/* generation adjustment, to stop the counts increasing forever. */
+	/* FIXME: divide? */
+	/* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */
+	e->generation = mq->generation;
+
+	del(mq, e);
+	push(mq, e);
+}
+
+/*
+ * Demote the least recently used entry from the cache to the pre_cache.
+ * Returns the new cache entry to use, and the old origin block it was
+ * mapped to.
+ *
+ * We drop the hit count on the demoted entry back to 1 to stop it bouncing
+ * straight back into the cache if it's subsequently hit.  There are
+ * various options here, and more experimentation would be good:
+ *
+ * - just forget about the demoted entry completely (ie. don't insert it
+     into the pre_cache).
+ * - divide the hit count rather that setting to some hard coded value.
+ * - set the hit count to a hard coded value other than 1, eg, is it better
+ *   if it goes in at level 2?
+ */
+static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
+{
+	dm_cblock_t result;
+	struct entry *demoted = pop(mq, &mq->cache);
+
+	BUG_ON(!demoted);
+	result = demoted->cblock;
+	*oblock = demoted->oblock;
+	demoted->in_cache = false;
+	demoted->hit_count = 1;
+	push(mq, demoted);
+
+	return result;
+}
+
+/*
+ * We modify the basic promotion_threshold depending on the specific io.
+ *
+ * If the origin block has been discarded then there's no cost to copy it
+ * to the cache.
+ *
+ * We bias towards reads, since they can be demoted at no cost if they
+ * haven't been dirtied.
+ */
+#define DISCARDED_PROMOTE_THRESHOLD 1
+#define READ_PROMOTE_THRESHOLD 4
+#define WRITE_PROMOTE_THRESHOLD 8
+
+static unsigned adjusted_promote_threshold(struct mq_policy *mq,
+					   bool discarded_oblock, int data_dir)
+{
+	if (discarded_oblock && any_free_cblocks(mq) && data_dir == WRITE)
+		/*
+		 * We don't need to do any copying at all, so give this a
+		 * very low threshold.  In practice this only triggers
+		 * during initial population after a format.
+		 */
+		return DISCARDED_PROMOTE_THRESHOLD;
+
+	return data_dir == READ ?
+		(mq->promote_threshold + READ_PROMOTE_THRESHOLD) :
+		(mq->promote_threshold + WRITE_PROMOTE_THRESHOLD);
+}
+
+static bool should_promote(struct mq_policy *mq, struct entry *e,
+			   bool discarded_oblock, int data_dir)
+{
+	return e->hit_count >=
+		adjusted_promote_threshold(mq, discarded_oblock, data_dir);
+}
+
+static int cache_entry_found(struct mq_policy *mq,
+			     struct entry *e,
+			     struct policy_result *result)
+{
+	requeue_and_update_tick(mq, e);
+
+	if (e->in_cache) {
+		result->op = POLICY_HIT;
+		result->cblock = e->cblock;
+	}
+
+	return 0;
+}
+
+/*
+ * Moves and entry from the pre_cache to the cache.  The main work is
+ * finding which cache block to use.
+ */
+static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
+			      struct policy_result *result)
+{
+	dm_cblock_t cblock;
+
+	if (find_free_cblock(mq, &cblock) == -ENOSPC) {
+		result->op = POLICY_REPLACE;
+		cblock = demote_cblock(mq, &result->old_oblock);
+	} else
+		result->op = POLICY_NEW;
+
+	result->cblock = e->cblock = cblock;
+
+	del(mq, e);
+	e->in_cache = true;
+	push(mq, e);
+
+	return 0;
+}
+
+static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
+				 bool can_migrate, bool discarded_oblock,
+				 int data_dir, struct policy_result *result)
+{
+	int r = 0;
+	bool updated = updated_this_tick(mq, e);
+
+	requeue_and_update_tick(mq, e);
+
+	if ((!discarded_oblock && updated) ||
+	    !should_promote(mq, e, discarded_oblock, data_dir))
+		result->op = POLICY_MISS;
+	else if (!can_migrate)
+		r = -EWOULDBLOCK;
+	else
+		r = pre_cache_to_cache(mq, e, result);
+
+	return r;
+}
+
+static void insert_in_pre_cache(struct mq_policy *mq,
+				dm_oblock_t oblock)
+{
+	struct entry *e = alloc_entry(mq);
+
+	if (!e)
+		/*
+		 * There's no spare entry structure, so we grab the least
+		 * used one from the pre_cache.
+		 */
+		e = pop(mq, &mq->pre_cache);
+
+	if (unlikely(!e)) {
+		DMWARN("couldn't pop from pre cache");
+		return;
+	}
+
+	e->in_cache = false;
+	e->oblock = oblock;
+	e->hit_count = 1;
+	e->generation = mq->generation;
+	push(mq, e);
+}
+
+static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
+			    struct policy_result *result)
+{
+	struct entry *e;
+	dm_cblock_t cblock;
+
+	if (find_free_cblock(mq, &cblock) == -ENOSPC) {
+		result->op = POLICY_MISS;
+		insert_in_pre_cache(mq, oblock);
+		return;
+	}
+
+	e = alloc_entry(mq);
+	if (unlikely(!e)) {
+		result->op = POLICY_MISS;
+		return;
+	}
+
+	e->oblock = oblock;
+	e->cblock = cblock;
+	e->in_cache = true;
+	e->hit_count = 1;
+	e->generation = mq->generation;
+	push(mq, e);
+
+	result->op = POLICY_NEW;
+	result->cblock = e->cblock;
+}
+
+static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
+			  bool can_migrate, bool discarded_oblock,
+			  int data_dir, struct policy_result *result)
+{
+	if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) == 1) {
+		if (can_migrate)
+			insert_in_cache(mq, oblock, result);
+		else
+			return -EWOULDBLOCK;
+	} else {
+		insert_in_pre_cache(mq, oblock);
+		result->op = POLICY_MISS;
+	}
+
+	return 0;
+}
+
+/*
+ * Looks the oblock up in the hash table, then decides whether to put in
+ * pre_cache, or cache etc.
+ */
+static int map(struct mq_policy *mq, dm_oblock_t oblock,
+	       bool can_migrate, bool discarded_oblock,
+	       int data_dir, struct policy_result *result)
+{
+	int r = 0;
+	struct entry *e = hash_lookup(mq, oblock);
+
+	if (e && e->in_cache)
+		r = cache_entry_found(mq, e, result);
+	else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
+		result->op = POLICY_MISS;
+	else if (e)
+		r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
+					  data_dir, result);
+	else
+		r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
+				   data_dir, result);
+
+	if (r == -EWOULDBLOCK)
+		result->op = POLICY_MISS;
+
+	return r;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Public interface, via the policy struct.  See dm-cache-policy.h for a
+ * description of these.
+ */
+
+static struct mq_policy *to_mq_policy(struct dm_cache_policy *p)
+{
+	return container_of(p, struct mq_policy, policy);
+}
+
+static void mq_destroy(struct dm_cache_policy *p)
+{
+	struct mq_policy *mq = to_mq_policy(p);
+
+	free_bitset(mq->allocation_bitset);
+	kfree(mq->table);
+	free_entries(mq);
+	kfree(mq);
+}
+
+static void copy_tick(struct mq_policy *mq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mq->tick_lock, flags);
+	mq->tick = mq->tick_protected;
+	spin_unlock_irqrestore(&mq->tick_lock, flags);
+}
+
+static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
+		  bool can_block, bool can_migrate, bool discarded_oblock,
+		  struct bio *bio, struct policy_result *result)
+{
+	int r;
+	struct mq_policy *mq = to_mq_policy(p);
+
+	result->op = POLICY_MISS;
+
+	if (can_block)
+		mutex_lock(&mq->lock);
+	else if (!mutex_trylock(&mq->lock))
+		return -EWOULDBLOCK;
+
+	copy_tick(mq);
+
+	iot_examine_bio(&mq->tracker, bio);
+	r = map(mq, oblock, can_migrate, discarded_oblock,
+		bio_data_dir(bio), result);
+
+	mutex_unlock(&mq->lock);
+
+	return r;
+}
+
+static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
+{
+	int r;
+	struct mq_policy *mq = to_mq_policy(p);
+	struct entry *e;
+
+	if (!mutex_trylock(&mq->lock))
+		return -EWOULDBLOCK;
+
+	e = hash_lookup(mq, oblock);
+	if (e && e->in_cache) {
+		*cblock = e->cblock;
+		r = 0;
+	} else
+		r = -ENOENT;
+
+	mutex_unlock(&mq->lock);
+
+	return r;
+}
+
+static int mq_load_mapping(struct dm_cache_policy *p,
+			   dm_oblock_t oblock, dm_cblock_t cblock,
+			   uint32_t hint, bool hint_valid)
+{
+	struct mq_policy *mq = to_mq_policy(p);
+	struct entry *e;
+
+	e = alloc_entry(mq);
+	if (!e)
+		return -ENOMEM;
+
+	e->cblock = cblock;
+	e->oblock = oblock;
+	e->in_cache = true;
+	e->hit_count = hint_valid ? hint : 1;
+	e->generation = mq->generation;
+	push(mq, e);
+
+	return 0;
+}
+
+static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
+			    void *context)
+{
+	struct mq_policy *mq = to_mq_policy(p);
+	int r = 0;
+	struct entry *e;
+	unsigned level;
+
+	mutex_lock(&mq->lock);
+
+	for (level = 0; level < NR_QUEUE_LEVELS; level++)
+		list_for_each_entry(e, &mq->cache.qs[level], list) {
+			r = fn(context, e->cblock, e->oblock, e->hit_count);
+			if (r)
+				goto out;
+		}
+
+out:
+	mutex_unlock(&mq->lock);
+
+	return r;
+}
+
+static void remove_mapping(struct mq_policy *mq, dm_oblock_t oblock)
+{
+	struct entry *e = hash_lookup(mq, oblock);
+
+	BUG_ON(!e || !e->in_cache);
+
+	del(mq, e);
+	e->in_cache = false;
+	push(mq, e);
+}
+
+static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
+{
+	struct mq_policy *mq = to_mq_policy(p);
+
+	mutex_lock(&mq->lock);
+	remove_mapping(mq, oblock);
+	mutex_unlock(&mq->lock);
+}
+
+static void force_mapping(struct mq_policy *mq,
+			  dm_oblock_t current_oblock, dm_oblock_t new_oblock)
+{
+	struct entry *e = hash_lookup(mq, current_oblock);
+
+	BUG_ON(!e || !e->in_cache);
+
+	del(mq, e);
+	e->oblock = new_oblock;
+	push(mq, e);
+}
+
+static void mq_force_mapping(struct dm_cache_policy *p,
+			     dm_oblock_t current_oblock, dm_oblock_t new_oblock)
+{
+	struct mq_policy *mq = to_mq_policy(p);
+
+	mutex_lock(&mq->lock);
+	force_mapping(mq, current_oblock, new_oblock);
+	mutex_unlock(&mq->lock);
+}
+
+static dm_cblock_t mq_residency(struct dm_cache_policy *p)
+{
+	struct mq_policy *mq = to_mq_policy(p);
+
+	/* FIXME: lock mutex, not sure we can block here */
+	return to_cblock(mq->nr_cblocks_allocated);
+}
+
+static void mq_tick(struct dm_cache_policy *p)
+{
+	struct mq_policy *mq = to_mq_policy(p);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mq->tick_lock, flags);
+	mq->tick_protected++;
+	spin_unlock_irqrestore(&mq->tick_lock, flags);
+}
+
+static int mq_set_config_value(struct dm_cache_policy *p,
+			       const char *key, const char *value)
+{
+	struct mq_policy *mq = to_mq_policy(p);
+	enum io_pattern pattern;
+	unsigned long tmp;
+
+	if (!strcasecmp(key, "random_threshold"))
+		pattern = PATTERN_RANDOM;
+	else if (!strcasecmp(key, "sequential_threshold"))
+		pattern = PATTERN_SEQUENTIAL;
+	else
+		return -EINVAL;
+
+	if (kstrtoul(value, 10, &tmp))
+		return -EINVAL;
+
+	mq->tracker.thresholds[pattern] = tmp;
+
+	return 0;
+}
+
+static int mq_emit_config_values(struct dm_cache_policy *p, char *result, unsigned maxlen)
+{
+	ssize_t sz = 0;
+	struct mq_policy *mq = to_mq_policy(p);
+
+	DMEMIT("4 random_threshold %u sequential_threshold %u",
+	       mq->tracker.thresholds[PATTERN_RANDOM],
+	       mq->tracker.thresholds[PATTERN_SEQUENTIAL]);
+
+	return 0;
+}
+
+/* Init the policy plugin interface function pointers. */
+static void init_policy_functions(struct mq_policy *mq)
+{
+	mq->policy.destroy = mq_destroy;
+	mq->policy.map = mq_map;
+	mq->policy.lookup = mq_lookup;
+	mq->policy.load_mapping = mq_load_mapping;
+	mq->policy.walk_mappings = mq_walk_mappings;
+	mq->policy.remove_mapping = mq_remove_mapping;
+	mq->policy.writeback_work = NULL;
+	mq->policy.force_mapping = mq_force_mapping;
+	mq->policy.residency = mq_residency;
+	mq->policy.tick = mq_tick;
+	mq->policy.emit_config_values = mq_emit_config_values;
+	mq->policy.set_config_value = mq_set_config_value;
+}
+
+static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
+					 sector_t origin_size,
+					 sector_t cache_block_size)
+{
+	int r;
+	struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);
+
+	if (!mq)
+		return NULL;
+
+	init_policy_functions(mq);
+	iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT);
+
+	mq->cache_size = cache_size;
+	mq->tick_protected = 0;
+	mq->tick = 0;
+	mq->hit_count = 0;
+	mq->generation = 0;
+	mq->promote_threshold = 0;
+	mutex_init(&mq->lock);
+	spin_lock_init(&mq->tick_lock);
+	mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG);
+	mq->find_free_last_word = 0;
+
+	queue_init(&mq->pre_cache);
+	queue_init(&mq->cache);
+	mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
+
+	mq->nr_entries = 2 * from_cblock(cache_size);
+	r = alloc_entries(mq, mq->nr_entries);
+	if (r)
+		goto bad_cache_alloc;
+
+	mq->nr_entries_allocated = 0;
+	mq->nr_cblocks_allocated = 0;
+
+	mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);
+	mq->hash_bits = ffs(mq->nr_buckets) - 1;
+	mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL);
+	if (!mq->table)
+		goto bad_alloc_table;
+
+	mq->allocation_bitset = alloc_bitset(from_cblock(cache_size));
+	if (!mq->allocation_bitset)
+		goto bad_alloc_bitset;
+
+	return &mq->policy;
+
+bad_alloc_bitset:
+	kfree(mq->table);
+bad_alloc_table:
+	free_entries(mq);
+bad_cache_alloc:
+	kfree(mq);
+
+	return NULL;
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_cache_policy_type mq_policy_type = {
+	.name = "mq",
+	.hint_size = 4,
+	.owner = THIS_MODULE,
+	.create = mq_create
+};
+
+static struct dm_cache_policy_type default_policy_type = {
+	.name = "default",
+	.hint_size = 4,
+	.owner = THIS_MODULE,
+	.create = mq_create
+};
+
+static int __init mq_init(void)
+{
+	int r;
+
+	mq_entry_cache = kmem_cache_create("dm_mq_policy_cache_entry",
+					   sizeof(struct entry),
+					   __alignof__(struct entry),
+					   0, NULL);
+	if (!mq_entry_cache)
+		goto bad;
+
+	r = dm_cache_policy_register(&mq_policy_type);
+	if (r) {
+		DMERR("register failed %d", r);
+		goto bad_register_mq;
+	}
+
+	r = dm_cache_policy_register(&default_policy_type);
+	if (!r) {
+		DMINFO("version " MQ_VERSION " loaded");
+		return 0;
+	}
+
+	DMERR("register failed (as default) %d", r);
+
+	dm_cache_policy_unregister(&mq_policy_type);
+bad_register_mq:
+	kmem_cache_destroy(mq_entry_cache);
+bad:
+	return -ENOMEM;
+}
+
+static void __exit mq_exit(void)
+{
+	dm_cache_policy_unregister(&mq_policy_type);
+	dm_cache_policy_unregister(&default_policy_type);
+
+	kmem_cache_destroy(mq_entry_cache);
+}
+
+module_init(mq_init);
+module_exit(mq_exit);
+
+MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("mq cache policy");
+
+MODULE_ALIAS("dm-cache-default");
diff --git a/drivers/md/dm-cache-policy.c b/drivers/md/dm-cache-policy.c
new file mode 100644
index 000000000000..2cbf5fdaac52
--- /dev/null
+++ b/drivers/md/dm-cache-policy.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2012 Red Hat. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-cache-policy-internal.h"
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/slab.h>
+
+/*----------------------------------------------------------------*/
+
+#define DM_MSG_PREFIX "cache-policy"
+
+static DEFINE_SPINLOCK(register_lock);
+static LIST_HEAD(register_list);
+
+static struct dm_cache_policy_type *__find_policy(const char *name)
+{
+	struct dm_cache_policy_type *t;
+
+	list_for_each_entry(t, &register_list, list)
+		if (!strcmp(t->name, name))
+			return t;
+
+	return NULL;
+}
+
+static struct dm_cache_policy_type *__get_policy_once(const char *name)
+{
+	struct dm_cache_policy_type *t = __find_policy(name);
+
+	if (t && !try_module_get(t->owner)) {
+		DMWARN("couldn't get module %s", name);
+		t = ERR_PTR(-EINVAL);
+	}
+
+	return t;
+}
+
+static struct dm_cache_policy_type *get_policy_once(const char *name)
+{
+	struct dm_cache_policy_type *t;
+
+	spin_lock(&register_lock);
+	t = __get_policy_once(name);
+	spin_unlock(&register_lock);
+
+	return t;
+}
+
+static struct dm_cache_policy_type *get_policy(const char *name)
+{
+	struct dm_cache_policy_type *t;
+
+	t = get_policy_once(name);
+	if (IS_ERR(t))
+		return NULL;
+
+	if (t)
+		return t;
+
+	request_module("dm-cache-%s", name);
+
+	t = get_policy_once(name);
+	if (IS_ERR(t))
+		return NULL;
+
+	return t;
+}
+
+static void put_policy(struct dm_cache_policy_type *t)
+{
+	module_put(t->owner);
+}
+
+int dm_cache_policy_register(struct dm_cache_policy_type *type)
+{
+	int r;
+
+	/* One size fits all for now */
+	if (type->hint_size != 0 && type->hint_size != 4) {
+		DMWARN("hint size must be 0 or 4 but %llu supplied.", (unsigned long long) type->hint_size);
+		return -EINVAL;
+	}
+
+	spin_lock(&register_lock);
+	if (__find_policy(type->name)) {
+		DMWARN("attempt to register policy under duplicate name %s", type->name);
+		r = -EINVAL;
+	} else {
+		list_add(&type->list, &register_list);
+		r = 0;
+	}
+	spin_unlock(&register_lock);
+
+	return r;
+}
+EXPORT_SYMBOL_GPL(dm_cache_policy_register);
+
+void dm_cache_policy_unregister(struct dm_cache_policy_type *type)
+{
+	spin_lock(&register_lock);
+	list_del_init(&type->list);
+	spin_unlock(&register_lock);
+}
+EXPORT_SYMBOL_GPL(dm_cache_policy_unregister);
+
+struct dm_cache_policy *dm_cache_policy_create(const char *name,
+					       dm_cblock_t cache_size,
+					       sector_t origin_size,
+					       sector_t cache_block_size)
+{
+	struct dm_cache_policy *p = NULL;
+	struct dm_cache_policy_type *type;
+
+	type = get_policy(name);
+	if (!type) {
+		DMWARN("unknown policy type");
+		return NULL;
+	}
+
+	p = type->create(cache_size, origin_size, cache_block_size);
+	if (!p) {
+		put_policy(type);
+		return NULL;
+	}
+	p->private = type;
+
+	return p;
+}
+EXPORT_SYMBOL_GPL(dm_cache_policy_create);
+
+void dm_cache_policy_destroy(struct dm_cache_policy *p)
+{
+	struct dm_cache_policy_type *t = p->private;
+
+	p->destroy(p);
+	put_policy(t);
+}
+EXPORT_SYMBOL_GPL(dm_cache_policy_destroy);
+
+const char *dm_cache_policy_get_name(struct dm_cache_policy *p)
+{
+	struct dm_cache_policy_type *t = p->private;
+
+	return t->name;
+}
+EXPORT_SYMBOL_GPL(dm_cache_policy_get_name);
+
+size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p)
+{
+	struct dm_cache_policy_type *t = p->private;
+
+	return t->hint_size;
+}
+EXPORT_SYMBOL_GPL(dm_cache_policy_get_hint_size);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h
new file mode 100644
index 000000000000..f0f51b260544
--- /dev/null
+++ b/drivers/md/dm-cache-policy.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2012 Red Hat. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_CACHE_POLICY_H
+#define DM_CACHE_POLICY_H
+
+#include "dm-cache-block-types.h"
+
+#include <linux/device-mapper.h>
+
+/*----------------------------------------------------------------*/
+
+/* FIXME: make it clear which methods are optional.  Get debug policy to
+ * double check this at start.
+ */
+
+/*
+ * The cache policy makes the important decisions about which blocks get to
+ * live on the faster cache device.
+ *
+ * When the core target has to remap a bio it calls the 'map' method of the
+ * policy.  This returns an instruction telling the core target what to do.
+ *
+ * POLICY_HIT:
+ *   That block is in the cache.  Remap to the cache and carry on.
+ *
+ * POLICY_MISS:
+ *   This block is on the origin device.  Remap and carry on.
+ *
+ * POLICY_NEW:
+ *   This block is currently on the origin device, but the policy wants to
+ *   move it.  The core should:
+ *
+ *   - hold any further io to this origin block
+ *   - copy the origin to the given cache block
+ *   - release all the held blocks
+ *   - remap the original block to the cache
+ *
+ * POLICY_REPLACE:
+ *   This block is currently on the origin device.  The policy wants to
+ *   move it to the cache, with the added complication that the destination
+ *   cache block needs a writeback first.  The core should:
+ *
+ *   - hold any further io to this origin block
+ *   - hold any further io to the origin block that's being written back
+ *   - writeback
+ *   - copy new block to cache
+ *   - release held blocks
+ *   - remap bio to cache and reissue.
+ *
+ * Should the core run into trouble while processing a POLICY_NEW or
+ * POLICY_REPLACE instruction it will roll back the policies mapping using
+ * remove_mapping() or force_mapping().  These methods must not fail.  This
+ * approach avoids having transactional semantics in the policy (ie, the
+ * core informing the policy when a migration is complete), and hence makes
+ * it easier to write new policies.
+ *
+ * In general policy methods should never block, except in the case of the
+ * map function when can_migrate is set.  So be careful to implement using
+ * bounded, preallocated memory.
+ */
+enum policy_operation {
+	POLICY_HIT,
+	POLICY_MISS,
+	POLICY_NEW,
+	POLICY_REPLACE
+};
+
+/*
+ * This is the instruction passed back to the core target.
+ */
+struct policy_result {
+	enum policy_operation op;
+	dm_oblock_t old_oblock;	/* POLICY_REPLACE */
+	dm_cblock_t cblock;	/* POLICY_HIT, POLICY_NEW, POLICY_REPLACE */
+};
+
+typedef int (*policy_walk_fn)(void *context, dm_cblock_t cblock,
+			      dm_oblock_t oblock, uint32_t hint);
+
+/*
+ * The cache policy object.  Just a bunch of methods.  It is envisaged that
+ * this structure will be embedded in a bigger, policy specific structure
+ * (ie. use container_of()).
+ */
+struct dm_cache_policy {
+
+	/*
+	 * FIXME: make it clear which methods are optional, and which may
+	 * block.
+	 */
+
+	/*
+	 * Destroys this object.
+	 */
+	void (*destroy)(struct dm_cache_policy *p);
+
+	/*
+	 * See large comment above.
+	 *
+	 * oblock      - the origin block we're interested in.
+	 *
+	 * can_block - indicates whether the current thread is allowed to
+	 *             block.  -EWOULDBLOCK returned if it can't and would.
+	 *
+	 * can_migrate - gives permission for POLICY_NEW or POLICY_REPLACE
+	 *               instructions.  If denied and the policy would have
+	 *               returned one of these instructions it should
+	 *               return -EWOULDBLOCK.
+	 *
+	 * discarded_oblock - indicates whether the whole origin block is
+	 *               in a discarded state (FIXME: better to tell the
+	 *               policy about this sooner, so it can recycle that
+	 *               cache block if it wants.)
+	 * bio         - the bio that triggered this call.
+	 * result      - gets filled in with the instruction.
+	 *
+	 * May only return 0, or -EWOULDBLOCK (if !can_migrate)
+	 */
+	int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock,
+		   bool can_block, bool can_migrate, bool discarded_oblock,
+		   struct bio *bio, struct policy_result *result);
+
+	/*
+	 * Sometimes we want to see if a block is in the cache, without
+	 * triggering any update of stats.  (ie. it's not a real hit).
+	 *
+	 * Must not block.
+	 *
+	 * Returns 1 iff in cache, 0 iff not, < 0 on error (-EWOULDBLOCK
+	 * would be typical).
+	 */
+	int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock);
+
+	/*
+	 * oblock must be a mapped block.  Must not block.
+	 */
+	void (*set_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
+	void (*clear_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
+
+	/*
+	 * Called when a cache target is first created.  Used to load a
+	 * mapping from the metadata device into the policy.
+	 */
+	int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock,
+			    dm_cblock_t cblock, uint32_t hint, bool hint_valid);
+
+	int (*walk_mappings)(struct dm_cache_policy *p, policy_walk_fn fn,
+			     void *context);
+
+	/*
+	 * Override functions used on the error paths of the core target.
+	 * They must succeed.
+	 */
+	void (*remove_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock);
+	void (*force_mapping)(struct dm_cache_policy *p, dm_oblock_t current_oblock,
+			      dm_oblock_t new_oblock);
+
+	int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock);
+
+
+	/*
+	 * How full is the cache?
+	 */
+	dm_cblock_t (*residency)(struct dm_cache_policy *p);
+
+	/*
+	 * Because of where we sit in the block layer, we can be asked to
+	 * map a lot of little bios that are all in the same block (no
+	 * queue merging has occurred).  To stop the policy being fooled by
+	 * these the core target sends regular tick() calls to the policy.
+	 * The policy should only count an entry as hit once per tick.
+	 */
+	void (*tick)(struct dm_cache_policy *p);
+
+	/*
+	 * Configuration.
+	 */
+	int (*emit_config_values)(struct dm_cache_policy *p,
+				  char *result, unsigned maxlen);
+	int (*set_config_value)(struct dm_cache_policy *p,
+				const char *key, const char *value);
+
+	/*
+	 * Book keeping ptr for the policy register, not for general use.
+	 */
+	void *private;
+};
+
+/*----------------------------------------------------------------*/
+
+/*
+ * We maintain a little register of the different policy types.
+ */
+#define CACHE_POLICY_NAME_SIZE 16
+
+struct dm_cache_policy_type {
+	/* For use by the register code only. */
+	struct list_head list;
+
+	/*
+	 * Policy writers should fill in these fields.  The name field is
+	 * what gets passed on the target line to select your policy.
+	 */
+	char name[CACHE_POLICY_NAME_SIZE];
+
+	/*
+	 * Policies may store a hint for each each cache block.
+	 * Currently the size of this hint must be 0 or 4 bytes but we
+	 * expect to relax this in future.
+	 */
+	size_t hint_size;
+
+	struct module *owner;
+	struct dm_cache_policy *(*create)(dm_cblock_t cache_size,
+					  sector_t origin_size,
+					  sector_t block_size);
+};
+
+int dm_cache_policy_register(struct dm_cache_policy_type *type);
+void dm_cache_policy_unregister(struct dm_cache_policy_type *type);
+
+/*----------------------------------------------------------------*/
+
+#endif	/* DM_CACHE_POLICY_H */
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
new file mode 100644
index 000000000000..0f4e84b15c30
--- /dev/null
+++ b/drivers/md/dm-cache-target.c
@@ -0,0 +1,2584 @@
+/*
+ * Copyright (C) 2012 Red Hat. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+#include "dm-bio-prison.h"
+#include "dm-cache-metadata.h"
+
+#include <linux/dm-io.h>
+#include <linux/dm-kcopyd.h>
+#include <linux/init.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#define DM_MSG_PREFIX "cache"
+
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
+	"A percentage of time allocated for copying to and/or from cache");
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Glossary:
+ *
+ * oblock: index of an origin block
+ * cblock: index of a cache block
+ * promotion: movement of a block from origin to cache
+ * demotion: movement of a block from cache to origin
+ * migration: movement of a block between the origin and cache device,
+ *	      either direction
+ */
+
+/*----------------------------------------------------------------*/
+
+static size_t bitset_size_in_bytes(unsigned nr_entries)
+{
+	return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
+}
+
+static unsigned long *alloc_bitset(unsigned nr_entries)
+{
+	size_t s = bitset_size_in_bytes(nr_entries);
+	return vzalloc(s);
+}
+
+static void clear_bitset(void *bitset, unsigned nr_entries)
+{
+	size_t s = bitset_size_in_bytes(nr_entries);
+	memset(bitset, 0, s);
+}
+
+static void free_bitset(unsigned long *bits)
+{
+	vfree(bits);
+}
+
+/*----------------------------------------------------------------*/
+
+#define PRISON_CELLS 1024
+#define MIGRATION_POOL_SIZE 128
+#define COMMIT_PERIOD HZ
+#define MIGRATION_COUNT_WINDOW 10
+
+/*
+ * The block size of the device holding cache data must be >= 32KB
+ */
+#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
+
+/*
+ * FIXME: the cache is read/write for the time being.
+ */
+enum cache_mode {
+	CM_WRITE,		/* metadata may be changed */
+	CM_READ_ONLY,		/* metadata may not be changed */
+};
+
+struct cache_features {
+	enum cache_mode mode;
+	bool write_through:1;
+};
+
+struct cache_stats {
+	atomic_t read_hit;
+	atomic_t read_miss;
+	atomic_t write_hit;
+	atomic_t write_miss;
+	atomic_t demotion;
+	atomic_t promotion;
+	atomic_t copies_avoided;
+	atomic_t cache_cell_clash;
+	atomic_t commit_count;
+	atomic_t discard_count;
+};
+
+struct cache {
+	struct dm_target *ti;
+	struct dm_target_callbacks callbacks;
+
+	/*
+	 * Metadata is written to this device.
+	 */
+	struct dm_dev *metadata_dev;
+
+	/*
+	 * The slower of the two data devices.  Typically a spindle.
+	 */
+	struct dm_dev *origin_dev;
+
+	/*
+	 * The faster of the two data devices.  Typically an SSD.
+	 */
+	struct dm_dev *cache_dev;
+
+	/*
+	 * Cache features such as write-through.
+	 */
+	struct cache_features features;
+
+	/*
+	 * Size of the origin device in _complete_ blocks and native sectors.
+	 */
+	dm_oblock_t origin_blocks;
+	sector_t origin_sectors;
+
+	/*
+	 * Size of the cache device in blocks.
+	 */
+	dm_cblock_t cache_size;
+
+	/*
+	 * Fields for converting from sectors to blocks.
+	 */
+	uint32_t sectors_per_block;
+	int sectors_per_block_shift;
+
+	struct dm_cache_metadata *cmd;
+
+	spinlock_t lock;
+	struct bio_list deferred_bios;
+	struct bio_list deferred_flush_bios;
+	struct list_head quiesced_migrations;
+	struct list_head completed_migrations;
+	struct list_head need_commit_migrations;
+	sector_t migration_threshold;
+	atomic_t nr_migrations;
+	wait_queue_head_t migration_wait;
+
+	/*
+	 * cache_size entries, dirty if set
+	 */
+	dm_cblock_t nr_dirty;
+	unsigned long *dirty_bitset;
+
+	/*
+	 * origin_blocks entries, discarded if set.
+	 */
+	sector_t discard_block_size; /* a power of 2 times sectors per block */
+	dm_dblock_t discard_nr_blocks;
+	unsigned long *discard_bitset;
+
+	struct dm_kcopyd_client *copier;
+	struct workqueue_struct *wq;
+	struct work_struct worker;
+
+	struct delayed_work waker;
+	unsigned long last_commit_jiffies;
+
+	struct dm_bio_prison *prison;
+	struct dm_deferred_set *all_io_ds;
+
+	mempool_t *migration_pool;
+	struct dm_cache_migration *next_migration;
+
+	struct dm_cache_policy *policy;
+	unsigned policy_nr_args;
+
+	bool need_tick_bio:1;
+	bool sized:1;
+	bool quiescing:1;
+	bool commit_requested:1;
+	bool loaded_mappings:1;
+	bool loaded_discards:1;
+
+	struct cache_stats stats;
+
+	/*
+	 * Rather than reconstructing the table line for the status we just
+	 * save it and regurgitate.
+	 */
+	unsigned nr_ctr_args;
+	const char **ctr_args;
+};
+
+struct per_bio_data {
+	bool tick:1;
+	unsigned req_nr:2;
+	struct dm_deferred_entry *all_io_entry;
+};
+
+struct dm_cache_migration {
+	struct list_head list;
+	struct cache *cache;
+
+	unsigned long start_jiffies;
+	dm_oblock_t old_oblock;
+	dm_oblock_t new_oblock;
+	dm_cblock_t cblock;
+
+	bool err:1;
+	bool writeback:1;
+	bool demote:1;
+	bool promote:1;
+
+	struct dm_bio_prison_cell *old_ocell;
+	struct dm_bio_prison_cell *new_ocell;
+};
+
+/*
+ * Processing a bio in the worker thread may require these memory
+ * allocations.  We prealloc to avoid deadlocks (the same worker thread
+ * frees them back to the mempool).
+ */
+struct prealloc {
+	struct dm_cache_migration *mg;
+	struct dm_bio_prison_cell *cell1;
+	struct dm_bio_prison_cell *cell2;
+};
+
+static void wake_worker(struct cache *cache)
+{
+	queue_work(cache->wq, &cache->worker);
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
+{
+	/* FIXME: change to use a local slab. */
+	return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
+}
+
+static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
+{
+	dm_bio_prison_free_cell(cache->prison, cell);
+}
+
+static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
+{
+	if (!p->mg) {
+		p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+		if (!p->mg)
+			return -ENOMEM;
+	}
+
+	if (!p->cell1) {
+		p->cell1 = alloc_prison_cell(cache);
+		if (!p->cell1)
+			return -ENOMEM;
+	}
+
+	if (!p->cell2) {
+		p->cell2 = alloc_prison_cell(cache);
+		if (!p->cell2)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
+{
+	if (p->cell2)
+		free_prison_cell(cache, p->cell2);
+
+	if (p->cell1)
+		free_prison_cell(cache, p->cell1);
+
+	if (p->mg)
+		mempool_free(p->mg, cache->migration_pool);
+}
+
+static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
+{
+	struct dm_cache_migration *mg = p->mg;
+
+	BUG_ON(!mg);
+	p->mg = NULL;
+
+	return mg;
+}
+
+/*
+ * You must have a cell within the prealloc struct to return.  If not this
+ * function will BUG() rather than returning NULL.
+ */
+static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
+{
+	struct dm_bio_prison_cell *r = NULL;
+
+	if (p->cell1) {
+		r = p->cell1;
+		p->cell1 = NULL;
+
+	} else if (p->cell2) {
+		r = p->cell2;
+		p->cell2 = NULL;
+	} else
+		BUG();
+
+	return r;
+}
+
+/*
+ * You can't have more than two cells in a prealloc struct.  BUG() will be
+ * called if you try and overfill.
+ */
+static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
+{
+	if (!p->cell2)
+		p->cell2 = cell;
+
+	else if (!p->cell1)
+		p->cell1 = cell;
+
+	else
+		BUG();
+}
+
+/*----------------------------------------------------------------*/
+
+static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
+{
+	key->virtual = 0;
+	key->dev = 0;
+	key->block = from_oblock(oblock);
+}
+
+/*
+ * The caller hands in a preallocated cell, and a free function for it.
+ * The cell will be freed if there's an error, or if it wasn't used because
+ * a cell with that key already exists.
+ */
+typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
+
+static int bio_detain(struct cache *cache, dm_oblock_t oblock,
+		      struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+		      cell_free_fn free_fn, void *free_context,
+		      struct dm_bio_prison_cell **cell_result)
+{
+	int r;
+	struct dm_cell_key key;
+
+	build_key(oblock, &key);
+	r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
+	if (r)
+		free_fn(free_context, cell_prealloc);
+
+	return r;
+}
+
+static int get_cell(struct cache *cache,
+		    dm_oblock_t oblock,
+		    struct prealloc *structs,
+		    struct dm_bio_prison_cell **cell_result)
+{
+	int r;
+	struct dm_cell_key key;
+	struct dm_bio_prison_cell *cell_prealloc;
+
+	cell_prealloc = prealloc_get_cell(structs);
+
+	build_key(oblock, &key);
+	r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
+	if (r)
+		prealloc_put_cell(structs, cell_prealloc);
+
+	return r;
+}
+
+ /*----------------------------------------------------------------*/
+
+static bool is_dirty(struct cache *cache, dm_cblock_t b)
+{
+	return test_bit(from_cblock(b), cache->dirty_bitset);
+}
+
+static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
+{
+	if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
+		cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1);
+		policy_set_dirty(cache->policy, oblock);
+	}
+}
+
+static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
+{
+	if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
+		policy_clear_dirty(cache->policy, oblock);
+		cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1);
+		if (!from_cblock(cache->nr_dirty))
+			dm_table_event(cache->ti->table);
+	}
+}
+
+/*----------------------------------------------------------------*/
+static bool block_size_is_power_of_two(struct cache *cache)
+{
+	return cache->sectors_per_block_shift >= 0;
+}
+
+static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
+{
+	sector_t discard_blocks = cache->discard_block_size;
+	dm_block_t b = from_oblock(oblock);
+
+	if (!block_size_is_power_of_two(cache))
+		(void) sector_div(discard_blocks, cache->sectors_per_block);
+	else
+		discard_blocks >>= cache->sectors_per_block_shift;
+
+	(void) sector_div(b, discard_blocks);
+
+	return to_dblock(b);
+}
+
+static void set_discard(struct cache *cache, dm_dblock_t b)
+{
+	unsigned long flags;
+
+	atomic_inc(&cache->stats.discard_count);
+
+	spin_lock_irqsave(&cache->lock, flags);
+	set_bit(from_dblock(b), cache->discard_bitset);
+	spin_unlock_irqrestore(&cache->lock, flags);
+}
+
+static void clear_discard(struct cache *cache, dm_dblock_t b)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	clear_bit(from_dblock(b), cache->discard_bitset);
+	spin_unlock_irqrestore(&cache->lock, flags);
+}
+
+static bool is_discarded(struct cache *cache, dm_dblock_t b)
+{
+	int r;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	r = test_bit(from_dblock(b), cache->discard_bitset);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	return r;
+}
+
+static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
+{
+	int r;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
+		     cache->discard_bitset);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	return r;
+}
+
+/*----------------------------------------------------------------*/
+
+static void load_stats(struct cache *cache)
+{
+	struct dm_cache_statistics stats;
+
+	dm_cache_metadata_get_stats(cache->cmd, &stats);
+	atomic_set(&cache->stats.read_hit, stats.read_hits);
+	atomic_set(&cache->stats.read_miss, stats.read_misses);
+	atomic_set(&cache->stats.write_hit, stats.write_hits);
+	atomic_set(&cache->stats.write_miss, stats.write_misses);
+}
+
+static void save_stats(struct cache *cache)
+{
+	struct dm_cache_statistics stats;
+
+	stats.read_hits = atomic_read(&cache->stats.read_hit);
+	stats.read_misses = atomic_read(&cache->stats.read_miss);
+	stats.write_hits = atomic_read(&cache->stats.write_hit);
+	stats.write_misses = atomic_read(&cache->stats.write_miss);
+
+	dm_cache_metadata_set_stats(cache->cmd, &stats);
+}
+
+/*----------------------------------------------------------------
+ * Per bio data
+ *--------------------------------------------------------------*/
+static struct per_bio_data *get_per_bio_data(struct bio *bio)
+{
+	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
+	BUG_ON(!pb);
+	return pb;
+}
+
+static struct per_bio_data *init_per_bio_data(struct bio *bio)
+{
+	struct per_bio_data *pb = get_per_bio_data(bio);
+
+	pb->tick = false;
+	pb->req_nr = dm_bio_get_target_bio_nr(bio);
+	pb->all_io_entry = NULL;
+
+	return pb;
+}
+
+/*----------------------------------------------------------------
+ * Remapping
+ *--------------------------------------------------------------*/
+static void remap_to_origin(struct cache *cache, struct bio *bio)
+{
+	bio->bi_bdev = cache->origin_dev->bdev;
+}
+
+static void remap_to_cache(struct cache *cache, struct bio *bio,
+			   dm_cblock_t cblock)
+{
+	sector_t bi_sector = bio->bi_sector;
+
+	bio->bi_bdev = cache->cache_dev->bdev;
+	if (!block_size_is_power_of_two(cache))
+		bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) +
+				sector_div(bi_sector, cache->sectors_per_block);
+	else
+		bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) |
+				(bi_sector & (cache->sectors_per_block - 1));
+}
+
+static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
+{
+	unsigned long flags;
+	struct per_bio_data *pb = get_per_bio_data(bio);
+
+	spin_lock_irqsave(&cache->lock, flags);
+	if (cache->need_tick_bio &&
+	    !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
+		pb->tick = true;
+		cache->need_tick_bio = false;
+	}
+	spin_unlock_irqrestore(&cache->lock, flags);
+}
+
+static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
+				  dm_oblock_t oblock)
+{
+	check_if_tick_bio_needed(cache, bio);
+	remap_to_origin(cache, bio);
+	if (bio_data_dir(bio) == WRITE)
+		clear_discard(cache, oblock_to_dblock(cache, oblock));
+}
+
+static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
+				 dm_oblock_t oblock, dm_cblock_t cblock)
+{
+	remap_to_cache(cache, bio, cblock);
+	if (bio_data_dir(bio) == WRITE) {
+		set_dirty(cache, oblock, cblock);
+		clear_discard(cache, oblock_to_dblock(cache, oblock));
+	}
+}
+
+static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
+{
+	sector_t block_nr = bio->bi_sector;
+
+	if (!block_size_is_power_of_two(cache))
+		(void) sector_div(block_nr, cache->sectors_per_block);
+	else
+		block_nr >>= cache->sectors_per_block_shift;
+
+	return to_oblock(block_nr);
+}
+
+static int bio_triggers_commit(struct cache *cache, struct bio *bio)
+{
+	return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+}
+
+static void issue(struct cache *cache, struct bio *bio)
+{
+	unsigned long flags;
+
+	if (!bio_triggers_commit(cache, bio)) {
+		generic_make_request(bio);
+		return;
+	}
+
+	/*
+	 * Batch together any bios that trigger commits and then issue a
+	 * single commit for them in do_worker().
+	 */
+	spin_lock_irqsave(&cache->lock, flags);
+	cache->commit_requested = true;
+	bio_list_add(&cache->deferred_flush_bios, bio);
+	spin_unlock_irqrestore(&cache->lock, flags);
+}
+
+/*----------------------------------------------------------------
+ * Migration processing
+ *
+ * Migration covers moving data from the origin device to the cache, or
+ * vice versa.
+ *--------------------------------------------------------------*/
+static void free_migration(struct dm_cache_migration *mg)
+{
+	mempool_free(mg, mg->cache->migration_pool);
+}
+
+static void inc_nr_migrations(struct cache *cache)
+{
+	atomic_inc(&cache->nr_migrations);
+}
+
+static void dec_nr_migrations(struct cache *cache)
+{
+	atomic_dec(&cache->nr_migrations);
+
+	/*
+	 * Wake the worker in case we're suspending the target.
+	 */
+	wake_up(&cache->migration_wait);
+}
+
+static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
+			 bool holder)
+{
+	(holder ? dm_cell_release : dm_cell_release_no_holder)
+		(cache->prison, cell, &cache->deferred_bios);
+	free_prison_cell(cache, cell);
+}
+
+static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
+		       bool holder)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	__cell_defer(cache, cell, holder);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	wake_worker(cache);
+}
+
+static void cleanup_migration(struct dm_cache_migration *mg)
+{
+	dec_nr_migrations(mg->cache);
+	free_migration(mg);
+}
+
+static void migration_failure(struct dm_cache_migration *mg)
+{
+	struct cache *cache = mg->cache;
+
+	if (mg->writeback) {
+		DMWARN_LIMIT("writeback failed; couldn't copy block");
+		set_dirty(cache, mg->old_oblock, mg->cblock);
+		cell_defer(cache, mg->old_ocell, false);
+
+	} else if (mg->demote) {
+		DMWARN_LIMIT("demotion failed; couldn't copy block");
+		policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
+
+		cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1);
+		if (mg->promote)
+			cell_defer(cache, mg->new_ocell, 1);
+	} else {
+		DMWARN_LIMIT("promotion failed; couldn't copy block");
+		policy_remove_mapping(cache->policy, mg->new_oblock);
+		cell_defer(cache, mg->new_ocell, 1);
+	}
+
+	cleanup_migration(mg);
+}
+
+static void migration_success_pre_commit(struct dm_cache_migration *mg)
+{
+	unsigned long flags;
+	struct cache *cache = mg->cache;
+
+	if (mg->writeback) {
+		cell_defer(cache, mg->old_ocell, false);
+		clear_dirty(cache, mg->old_oblock, mg->cblock);
+		cleanup_migration(mg);
+		return;
+
+	} else if (mg->demote) {
+		if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) {
+			DMWARN_LIMIT("demotion failed; couldn't update on disk metadata");
+			policy_force_mapping(cache->policy, mg->new_oblock,
+					     mg->old_oblock);
+			if (mg->promote)
+				cell_defer(cache, mg->new_ocell, true);
+			cleanup_migration(mg);
+			return;
+		}
+	} else {
+		if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
+			DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
+			policy_remove_mapping(cache->policy, mg->new_oblock);
+			cleanup_migration(mg);
+			return;
+		}
+	}
+
+	spin_lock_irqsave(&cache->lock, flags);
+	list_add_tail(&mg->list, &cache->need_commit_migrations);
+	cache->commit_requested = true;
+	spin_unlock_irqrestore(&cache->lock, flags);
+}
+
+static void migration_success_post_commit(struct dm_cache_migration *mg)
+{
+	unsigned long flags;
+	struct cache *cache = mg->cache;
+
+	if (mg->writeback) {
+		DMWARN("writeback unexpectedly triggered commit");
+		return;
+
+	} else if (mg->demote) {
+		cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1);
+
+		if (mg->promote) {
+			mg->demote = false;
+
+			spin_lock_irqsave(&cache->lock, flags);
+			list_add_tail(&mg->list, &cache->quiesced_migrations);
+			spin_unlock_irqrestore(&cache->lock, flags);
+
+		} else
+			cleanup_migration(mg);
+
+	} else {
+		cell_defer(cache, mg->new_ocell, true);
+		clear_dirty(cache, mg->new_oblock, mg->cblock);
+		cleanup_migration(mg);
+	}
+}
+
+static void copy_complete(int read_err, unsigned long write_err, void *context)
+{
+	unsigned long flags;
+	struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
+	struct cache *cache = mg->cache;
+
+	if (read_err || write_err)
+		mg->err = true;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	list_add_tail(&mg->list, &cache->completed_migrations);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	wake_worker(cache);
+}
+
+static void issue_copy_real(struct dm_cache_migration *mg)
+{
+	int r;
+	struct dm_io_region o_region, c_region;
+	struct cache *cache = mg->cache;
+
+	o_region.bdev = cache->origin_dev->bdev;
+	o_region.count = cache->sectors_per_block;
+
+	c_region.bdev = cache->cache_dev->bdev;
+	c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block;
+	c_region.count = cache->sectors_per_block;
+
+	if (mg->writeback || mg->demote) {
+		/* demote */
+		o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
+		r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
+	} else {
+		/* promote */
+		o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
+		r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
+	}
+
+	if (r < 0)
+		migration_failure(mg);
+}
+
+static void avoid_copy(struct dm_cache_migration *mg)
+{
+	atomic_inc(&mg->cache->stats.copies_avoided);
+	migration_success_pre_commit(mg);
+}
+
+static void issue_copy(struct dm_cache_migration *mg)
+{
+	bool avoid;
+	struct cache *cache = mg->cache;
+
+	if (mg->writeback || mg->demote)
+		avoid = !is_dirty(cache, mg->cblock) ||
+			is_discarded_oblock(cache, mg->old_oblock);
+	else
+		avoid = is_discarded_oblock(cache, mg->new_oblock);
+
+	avoid ? avoid_copy(mg) : issue_copy_real(mg);
+}
+
+static void complete_migration(struct dm_cache_migration *mg)
+{
+	if (mg->err)
+		migration_failure(mg);
+	else
+		migration_success_pre_commit(mg);
+}
+
+static void process_migrations(struct cache *cache, struct list_head *head,
+			       void (*fn)(struct dm_cache_migration *))
+{
+	unsigned long flags;
+	struct list_head list;
+	struct dm_cache_migration *mg, *tmp;
+
+	INIT_LIST_HEAD(&list);
+	spin_lock_irqsave(&cache->lock, flags);
+	list_splice_init(head, &list);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	list_for_each_entry_safe(mg, tmp, &list, list)
+		fn(mg);
+}
+
+static void __queue_quiesced_migration(struct dm_cache_migration *mg)
+{
+	list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
+}
+
+static void queue_quiesced_migration(struct dm_cache_migration *mg)
+{
+	unsigned long flags;
+	struct cache *cache = mg->cache;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	__queue_quiesced_migration(mg);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	wake_worker(cache);
+}
+
+static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
+{
+	unsigned long flags;
+	struct dm_cache_migration *mg, *tmp;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	list_for_each_entry_safe(mg, tmp, work, list)
+		__queue_quiesced_migration(mg);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	wake_worker(cache);
+}
+
+static void check_for_quiesced_migrations(struct cache *cache,
+					  struct per_bio_data *pb)
+{
+	struct list_head work;
+
+	if (!pb->all_io_entry)
+		return;
+
+	INIT_LIST_HEAD(&work);
+	if (pb->all_io_entry)
+		dm_deferred_entry_dec(pb->all_io_entry, &work);
+
+	if (!list_empty(&work))
+		queue_quiesced_migrations(cache, &work);
+}
+
+static void quiesce_migration(struct dm_cache_migration *mg)
+{
+	if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
+		queue_quiesced_migration(mg);
+}
+
+static void promote(struct cache *cache, struct prealloc *structs,
+		    dm_oblock_t oblock, dm_cblock_t cblock,
+		    struct dm_bio_prison_cell *cell)
+{
+	struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+	mg->err = false;
+	mg->writeback = false;
+	mg->demote = false;
+	mg->promote = true;
+	mg->cache = cache;
+	mg->new_oblock = oblock;
+	mg->cblock = cblock;
+	mg->old_ocell = NULL;
+	mg->new_ocell = cell;
+	mg->start_jiffies = jiffies;
+
+	inc_nr_migrations(cache);
+	quiesce_migration(mg);
+}
+
+static void writeback(struct cache *cache, struct prealloc *structs,
+		      dm_oblock_t oblock, dm_cblock_t cblock,
+		      struct dm_bio_prison_cell *cell)
+{
+	struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+	mg->err = false;
+	mg->writeback = true;
+	mg->demote = false;
+	mg->promote = false;
+	mg->cache = cache;
+	mg->old_oblock = oblock;
+	mg->cblock = cblock;
+	mg->old_ocell = cell;
+	mg->new_ocell = NULL;
+	mg->start_jiffies = jiffies;
+
+	inc_nr_migrations(cache);
+	quiesce_migration(mg);
+}
+
+static void demote_then_promote(struct cache *cache, struct prealloc *structs,
+				dm_oblock_t old_oblock, dm_oblock_t new_oblock,
+				dm_cblock_t cblock,
+				struct dm_bio_prison_cell *old_ocell,
+				struct dm_bio_prison_cell *new_ocell)
+{
+	struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+	mg->err = false;
+	mg->writeback = false;
+	mg->demote = true;
+	mg->promote = true;
+	mg->cache = cache;
+	mg->old_oblock = old_oblock;
+	mg->new_oblock = new_oblock;
+	mg->cblock = cblock;
+	mg->old_ocell = old_ocell;
+	mg->new_ocell = new_ocell;
+	mg->start_jiffies = jiffies;
+
+	inc_nr_migrations(cache);
+	quiesce_migration(mg);
+}
+
+/*----------------------------------------------------------------
+ * bio processing
+ *--------------------------------------------------------------*/
+static void defer_bio(struct cache *cache, struct bio *bio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	bio_list_add(&cache->deferred_bios, bio);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	wake_worker(cache);
+}
+
+static void process_flush_bio(struct cache *cache, struct bio *bio)
+{
+	struct per_bio_data *pb = get_per_bio_data(bio);
+
+	BUG_ON(bio->bi_size);
+	if (!pb->req_nr)
+		remap_to_origin(cache, bio);
+	else
+		remap_to_cache(cache, bio, 0);
+
+	issue(cache, bio);
+}
+
+/*
+ * People generally discard large parts of a device, eg, the whole device
+ * when formatting.  Splitting these large discards up into cache block
+ * sized ios and then quiescing (always neccessary for discard) takes too
+ * long.
+ *
+ * We keep it simple, and allow any size of discard to come in, and just
+ * mark off blocks on the discard bitset.  No passdown occurs!
+ *
+ * To implement passdown we need to change the bio_prison such that a cell
+ * can have a key that spans many blocks.
+ */
+static void process_discard_bio(struct cache *cache, struct bio *bio)
+{
+	dm_block_t start_block = dm_sector_div_up(bio->bi_sector,
+						  cache->discard_block_size);
+	dm_block_t end_block = bio->bi_sector + bio_sectors(bio);
+	dm_block_t b;
+
+	(void) sector_div(end_block, cache->discard_block_size);
+
+	for (b = start_block; b < end_block; b++)
+		set_discard(cache, to_dblock(b));
+
+	bio_endio(bio, 0);
+}
+
+static bool spare_migration_bandwidth(struct cache *cache)
+{
+	sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
+		cache->sectors_per_block;
+	return current_volume < cache->migration_threshold;
+}
+
+static bool is_writethrough_io(struct cache *cache, struct bio *bio,
+			       dm_cblock_t cblock)
+{
+	return bio_data_dir(bio) == WRITE &&
+		cache->features.write_through && !is_dirty(cache, cblock);
+}
+
+static void inc_hit_counter(struct cache *cache, struct bio *bio)
+{
+	atomic_inc(bio_data_dir(bio) == READ ?
+		   &cache->stats.read_hit : &cache->stats.write_hit);
+}
+
+static void inc_miss_counter(struct cache *cache, struct bio *bio)
+{
+	atomic_inc(bio_data_dir(bio) == READ ?
+		   &cache->stats.read_miss : &cache->stats.write_miss);
+}
+
+static void process_bio(struct cache *cache, struct prealloc *structs,
+			struct bio *bio)
+{
+	int r;
+	bool release_cell = true;
+	dm_oblock_t block = get_bio_block(cache, bio);
+	struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
+	struct policy_result lookup_result;
+	struct per_bio_data *pb = get_per_bio_data(bio);
+	bool discarded_block = is_discarded_oblock(cache, block);
+	bool can_migrate = discarded_block || spare_migration_bandwidth(cache);
+
+	/*
+	 * Check to see if that block is currently migrating.
+	 */
+	cell_prealloc = prealloc_get_cell(structs);
+	r = bio_detain(cache, block, bio, cell_prealloc,
+		       (cell_free_fn) prealloc_put_cell,
+		       structs, &new_ocell);
+	if (r > 0)
+		return;
+
+	r = policy_map(cache->policy, block, true, can_migrate, discarded_block,
+		       bio, &lookup_result);
+
+	if (r == -EWOULDBLOCK)
+		/* migration has been denied */
+		lookup_result.op = POLICY_MISS;
+
+	switch (lookup_result.op) {
+	case POLICY_HIT:
+		inc_hit_counter(cache, bio);
+		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+
+		if (is_writethrough_io(cache, bio, lookup_result.cblock)) {
+			/*
+			 * No need to mark anything dirty in write through mode.
+			 */
+			pb->req_nr == 0 ?
+				remap_to_cache(cache, bio, lookup_result.cblock) :
+				remap_to_origin_clear_discard(cache, bio, block);
+		} else
+			remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+
+		issue(cache, bio);
+		break;
+
+	case POLICY_MISS:
+		inc_miss_counter(cache, bio);
+		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+
+		if (pb->req_nr != 0) {
+			/*
+			 * This is a duplicate writethrough io that is no
+			 * longer needed because the block has been demoted.
+			 */
+			bio_endio(bio, 0);
+		} else {
+			remap_to_origin_clear_discard(cache, bio, block);
+			issue(cache, bio);
+		}
+		break;
+
+	case POLICY_NEW:
+		atomic_inc(&cache->stats.promotion);
+		promote(cache, structs, block, lookup_result.cblock, new_ocell);
+		release_cell = false;
+		break;
+
+	case POLICY_REPLACE:
+		cell_prealloc = prealloc_get_cell(structs);
+		r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc,
+			       (cell_free_fn) prealloc_put_cell,
+			       structs, &old_ocell);
+		if (r > 0) {
+			/*
+			 * We have to be careful to avoid lock inversion of
+			 * the cells.  So we back off, and wait for the
+			 * old_ocell to become free.
+			 */
+			policy_force_mapping(cache->policy, block,
+					     lookup_result.old_oblock);
+			atomic_inc(&cache->stats.cache_cell_clash);
+			break;
+		}
+		atomic_inc(&cache->stats.demotion);
+		atomic_inc(&cache->stats.promotion);
+
+		demote_then_promote(cache, structs, lookup_result.old_oblock,
+				    block, lookup_result.cblock,
+				    old_ocell, new_ocell);
+		release_cell = false;
+		break;
+
+	default:
+		DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__,
+			    (unsigned) lookup_result.op);
+		bio_io_error(bio);
+	}
+
+	if (release_cell)
+		cell_defer(cache, new_ocell, false);
+}
+
+static int need_commit_due_to_time(struct cache *cache)
+{
+	return jiffies < cache->last_commit_jiffies ||
+	       jiffies > cache->last_commit_jiffies + COMMIT_PERIOD;
+}
+
+static int commit_if_needed(struct cache *cache)
+{
+	if (dm_cache_changed_this_transaction(cache->cmd) &&
+	    (cache->commit_requested || need_commit_due_to_time(cache))) {
+		atomic_inc(&cache->stats.commit_count);
+		cache->last_commit_jiffies = jiffies;
+		cache->commit_requested = false;
+		return dm_cache_commit(cache->cmd, false);
+	}
+
+	return 0;
+}
+
+static void process_deferred_bios(struct cache *cache)
+{
+	unsigned long flags;
+	struct bio_list bios;
+	struct bio *bio;
+	struct prealloc structs;
+
+	memset(&structs, 0, sizeof(structs));
+	bio_list_init(&bios);
+
+	spin_lock_irqsave(&cache->lock, flags);
+	bio_list_merge(&bios, &cache->deferred_bios);
+	bio_list_init(&cache->deferred_bios);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	while (!bio_list_empty(&bios)) {
+		/*
+		 * If we've got no free migration structs, and processing
+		 * this bio might require one, we pause until there are some
+		 * prepared mappings to process.
+		 */
+		if (prealloc_data_structs(cache, &structs)) {
+			spin_lock_irqsave(&cache->lock, flags);
+			bio_list_merge(&cache->deferred_bios, &bios);
+			spin_unlock_irqrestore(&cache->lock, flags);
+			break;
+		}
+
+		bio = bio_list_pop(&bios);
+
+		if (bio->bi_rw & REQ_FLUSH)
+			process_flush_bio(cache, bio);
+		else if (bio->bi_rw & REQ_DISCARD)
+			process_discard_bio(cache, bio);
+		else
+			process_bio(cache, &structs, bio);
+	}
+
+	prealloc_free_structs(cache, &structs);
+}
+
+static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
+{
+	unsigned long flags;
+	struct bio_list bios;
+	struct bio *bio;
+
+	bio_list_init(&bios);
+
+	spin_lock_irqsave(&cache->lock, flags);
+	bio_list_merge(&bios, &cache->deferred_flush_bios);
+	bio_list_init(&cache->deferred_flush_bios);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	while ((bio = bio_list_pop(&bios)))
+		submit_bios ? generic_make_request(bio) : bio_io_error(bio);
+}
+
+static void writeback_some_dirty_blocks(struct cache *cache)
+{
+	int r = 0;
+	dm_oblock_t oblock;
+	dm_cblock_t cblock;
+	struct prealloc structs;
+	struct dm_bio_prison_cell *old_ocell;
+
+	memset(&structs, 0, sizeof(structs));
+
+	while (spare_migration_bandwidth(cache)) {
+		if (prealloc_data_structs(cache, &structs))
+			break;
+
+		r = policy_writeback_work(cache->policy, &oblock, &cblock);
+		if (r)
+			break;
+
+		r = get_cell(cache, oblock, &structs, &old_ocell);
+		if (r) {
+			policy_set_dirty(cache->policy, oblock);
+			break;
+		}
+
+		writeback(cache, &structs, oblock, cblock, old_ocell);
+	}
+
+	prealloc_free_structs(cache, &structs);
+}
+
+/*----------------------------------------------------------------
+ * Main worker loop
+ *--------------------------------------------------------------*/
+static void start_quiescing(struct cache *cache)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	cache->quiescing = 1;
+	spin_unlock_irqrestore(&cache->lock, flags);
+}
+
+static void stop_quiescing(struct cache *cache)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	cache->quiescing = 0;
+	spin_unlock_irqrestore(&cache->lock, flags);
+}
+
+static bool is_quiescing(struct cache *cache)
+{
+	int r;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	r = cache->quiescing;
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	return r;
+}
+
+static void wait_for_migrations(struct cache *cache)
+{
+	wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
+}
+
+static void stop_worker(struct cache *cache)
+{
+	cancel_delayed_work(&cache->waker);
+	flush_workqueue(cache->wq);
+}
+
+static void requeue_deferred_io(struct cache *cache)
+{
+	struct bio *bio;
+	struct bio_list bios;
+
+	bio_list_init(&bios);
+	bio_list_merge(&bios, &cache->deferred_bios);
+	bio_list_init(&cache->deferred_bios);
+
+	while ((bio = bio_list_pop(&bios)))
+		bio_endio(bio, DM_ENDIO_REQUEUE);
+}
+
+static int more_work(struct cache *cache)
+{
+	if (is_quiescing(cache))
+		return !list_empty(&cache->quiesced_migrations) ||
+			!list_empty(&cache->completed_migrations) ||
+			!list_empty(&cache->need_commit_migrations);
+	else
+		return !bio_list_empty(&cache->deferred_bios) ||
+			!bio_list_empty(&cache->deferred_flush_bios) ||
+			!list_empty(&cache->quiesced_migrations) ||
+			!list_empty(&cache->completed_migrations) ||
+			!list_empty(&cache->need_commit_migrations);
+}
+
+static void do_worker(struct work_struct *ws)
+{
+	struct cache *cache = container_of(ws, struct cache, worker);
+
+	do {
+		if (!is_quiescing(cache))
+			process_deferred_bios(cache);
+
+		process_migrations(cache, &cache->quiesced_migrations, issue_copy);
+		process_migrations(cache, &cache->completed_migrations, complete_migration);
+
+		writeback_some_dirty_blocks(cache);
+
+		if (commit_if_needed(cache)) {
+			process_deferred_flush_bios(cache, false);
+
+			/*
+			 * FIXME: rollback metadata or just go into a
+			 * failure mode and error everything
+			 */
+		} else {
+			process_deferred_flush_bios(cache, true);
+			process_migrations(cache, &cache->need_commit_migrations,
+					   migration_success_post_commit);
+		}
+	} while (more_work(cache));
+}
+
+/*
+ * We want to commit periodically so that not too much
+ * unwritten metadata builds up.
+ */
+static void do_waker(struct work_struct *ws)
+{
+	struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
+	wake_worker(cache);
+	queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
+}
+
+/*----------------------------------------------------------------*/
+
+static int is_congested(struct dm_dev *dev, int bdi_bits)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+	return bdi_congested(&q->backing_dev_info, bdi_bits);
+}
+
+static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
+{
+	struct cache *cache = container_of(cb, struct cache, callbacks);
+
+	return is_congested(cache->origin_dev, bdi_bits) ||
+		is_congested(cache->cache_dev, bdi_bits);
+}
+
+/*----------------------------------------------------------------
+ * Target methods
+ *--------------------------------------------------------------*/
+
+/*
+ * This function gets called on the error paths of the constructor, so we
+ * have to cope with a partially initialised struct.
+ */
+static void destroy(struct cache *cache)
+{
+	unsigned i;
+
+	if (cache->next_migration)
+		mempool_free(cache->next_migration, cache->migration_pool);
+
+	if (cache->migration_pool)
+		mempool_destroy(cache->migration_pool);
+
+	if (cache->all_io_ds)
+		dm_deferred_set_destroy(cache->all_io_ds);
+
+	if (cache->prison)
+		dm_bio_prison_destroy(cache->prison);
+
+	if (cache->wq)
+		destroy_workqueue(cache->wq);
+
+	if (cache->dirty_bitset)
+		free_bitset(cache->dirty_bitset);
+
+	if (cache->discard_bitset)
+		free_bitset(cache->discard_bitset);
+
+	if (cache->copier)
+		dm_kcopyd_client_destroy(cache->copier);
+
+	if (cache->cmd)
+		dm_cache_metadata_close(cache->cmd);
+
+	if (cache->metadata_dev)
+		dm_put_device(cache->ti, cache->metadata_dev);
+
+	if (cache->origin_dev)
+		dm_put_device(cache->ti, cache->origin_dev);
+
+	if (cache->cache_dev)
+		dm_put_device(cache->ti, cache->cache_dev);
+
+	if (cache->policy)
+		dm_cache_policy_destroy(cache->policy);
+
+	for (i = 0; i < cache->nr_ctr_args ; i++)
+		kfree(cache->ctr_args[i]);
+	kfree(cache->ctr_args);
+
+	kfree(cache);
+}
+
+static void cache_dtr(struct dm_target *ti)
+{
+	struct cache *cache = ti->private;
+
+	destroy(cache);
+}
+
+static sector_t get_dev_size(struct dm_dev *dev)
+{
+	return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Construct a cache device mapping.
+ *
+ * cache <metadata dev> <cache dev> <origin dev> <block size>
+ *       <#feature args> [<feature arg>]*
+ *       <policy> <#policy args> [<policy arg>]*
+ *
+ * metadata dev    : fast device holding the persistent metadata
+ * cache dev	   : fast device holding cached data blocks
+ * origin dev	   : slow device holding original data blocks
+ * block size	   : cache unit size in sectors
+ *
+ * #feature args   : number of feature arguments passed
+ * feature args    : writethrough.  (The default is writeback.)
+ *
+ * policy	   : the replacement policy to use
+ * #policy args    : an even number of policy arguments corresponding
+ *		     to key/value pairs passed to the policy
+ * policy args	   : key/value pairs passed to the policy
+ *		     E.g. 'sequential_threshold 1024'
+ *		     See cache-policies.txt for details.
+ *
+ * Optional feature arguments are:
+ *   writethrough  : write through caching that prohibits cache block
+ *		     content from being different from origin block content.
+ *		     Without this argument, the default behaviour is to write
+ *		     back cache block contents later for performance reasons,
+ *		     so they may differ from the corresponding origin blocks.
+ */
+struct cache_args {
+	struct dm_target *ti;
+
+	struct dm_dev *metadata_dev;
+
+	struct dm_dev *cache_dev;
+	sector_t cache_sectors;
+
+	struct dm_dev *origin_dev;
+	sector_t origin_sectors;
+
+	uint32_t block_size;
+
+	const char *policy_name;
+	int policy_argc;
+	const char **policy_argv;
+
+	struct cache_features features;
+};
+
+static void destroy_cache_args(struct cache_args *ca)
+{
+	if (ca->metadata_dev)
+		dm_put_device(ca->ti, ca->metadata_dev);
+
+	if (ca->cache_dev)
+		dm_put_device(ca->ti, ca->cache_dev);
+
+	if (ca->origin_dev)
+		dm_put_device(ca->ti, ca->origin_dev);
+
+	kfree(ca);
+}
+
+static bool at_least_one_arg(struct dm_arg_set *as, char **error)
+{
+	if (!as->argc) {
+		*error = "Insufficient args";
+		return false;
+	}
+
+	return true;
+}
+
+static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
+			      char **error)
+{
+	int r;
+	sector_t metadata_dev_size;
+	char b[BDEVNAME_SIZE];
+
+	if (!at_least_one_arg(as, error))
+		return -EINVAL;
+
+	r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
+			  &ca->metadata_dev);
+	if (r) {
+		*error = "Error opening metadata device";
+		return r;
+	}
+
+	metadata_dev_size = get_dev_size(ca->metadata_dev);
+	if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
+		DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
+		       bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
+
+	return 0;
+}
+
+static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
+			   char **error)
+{
+	int r;
+
+	if (!at_least_one_arg(as, error))
+		return -EINVAL;
+
+	r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
+			  &ca->cache_dev);
+	if (r) {
+		*error = "Error opening cache device";
+		return r;
+	}
+	ca->cache_sectors = get_dev_size(ca->cache_dev);
+
+	return 0;
+}
+
+static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
+			    char **error)
+{
+	int r;
+
+	if (!at_least_one_arg(as, error))
+		return -EINVAL;
+
+	r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
+			  &ca->origin_dev);
+	if (r) {
+		*error = "Error opening origin device";
+		return r;
+	}
+
+	ca->origin_sectors = get_dev_size(ca->origin_dev);
+	if (ca->ti->len > ca->origin_sectors) {
+		*error = "Device size larger than cached device";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
+			    char **error)
+{
+	unsigned long tmp;
+
+	if (!at_least_one_arg(as, error))
+		return -EINVAL;
+
+	if (kstrtoul(dm_shift_arg(as), 10, &tmp) || !tmp ||
+	    tmp < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
+	    tmp & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
+		*error = "Invalid data block size";
+		return -EINVAL;
+	}
+
+	if (tmp > ca->cache_sectors) {
+		*error = "Data block size is larger than the cache device";
+		return -EINVAL;
+	}
+
+	ca->block_size = tmp;
+
+	return 0;
+}
+
+static void init_features(struct cache_features *cf)
+{
+	cf->mode = CM_WRITE;
+	cf->write_through = false;
+}
+
+static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
+			  char **error)
+{
+	static struct dm_arg _args[] = {
+		{0, 1, "Invalid number of cache feature arguments"},
+	};
+
+	int r;
+	unsigned argc;
+	const char *arg;
+	struct cache_features *cf = &ca->features;
+
+	init_features(cf);
+
+	r = dm_read_arg_group(_args, as, &argc, error);
+	if (r)
+		return -EINVAL;
+
+	while (argc--) {
+		arg = dm_shift_arg(as);
+
+		if (!strcasecmp(arg, "writeback"))
+			cf->write_through = false;
+
+		else if (!strcasecmp(arg, "writethrough"))
+			cf->write_through = true;
+
+		else {
+			*error = "Unrecognised cache feature requested";
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
+			char **error)
+{
+	static struct dm_arg _args[] = {
+		{0, 1024, "Invalid number of policy arguments"},
+	};
+
+	int r;
+
+	if (!at_least_one_arg(as, error))
+		return -EINVAL;
+
+	ca->policy_name = dm_shift_arg(as);
+
+	r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
+	if (r)
+		return -EINVAL;
+
+	ca->policy_argv = (const char **)as->argv;
+	dm_consume_args(as, ca->policy_argc);
+
+	return 0;
+}
+
+static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
+			    char **error)
+{
+	int r;
+	struct dm_arg_set as;
+
+	as.argc = argc;
+	as.argv = argv;
+
+	r = parse_metadata_dev(ca, &as, error);
+	if (r)
+		return r;
+
+	r = parse_cache_dev(ca, &as, error);
+	if (r)
+		return r;
+
+	r = parse_origin_dev(ca, &as, error);
+	if (r)
+		return r;
+
+	r = parse_block_size(ca, &as, error);
+	if (r)
+		return r;
+
+	r = parse_features(ca, &as, error);
+	if (r)
+		return r;
+
+	r = parse_policy(ca, &as, error);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+static struct kmem_cache *migration_cache;
+
+static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv)
+{
+	int r = 0;
+
+	if (argc & 1) {
+		DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
+		return -EINVAL;
+	}
+
+	while (argc) {
+		r = policy_set_config_value(p, argv[0], argv[1]);
+		if (r) {
+			DMWARN("policy_set_config_value failed: key = '%s', value = '%s'",
+			       argv[0], argv[1]);
+			return r;
+		}
+
+		argc -= 2;
+		argv += 2;
+	}
+
+	return r;
+}
+
+static int create_cache_policy(struct cache *cache, struct cache_args *ca,
+			       char **error)
+{
+	int r;
+
+	cache->policy =	dm_cache_policy_create(ca->policy_name,
+					       cache->cache_size,
+					       cache->origin_sectors,
+					       cache->sectors_per_block);
+	if (!cache->policy) {
+		*error = "Error creating cache's policy";
+		return -ENOMEM;
+	}
+
+	r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv);
+	if (r)
+		dm_cache_policy_destroy(cache->policy);
+
+	return r;
+}
+
+/*
+ * We want the discard block size to be a power of two, at least the size
+ * of the cache block size, and have no more than 2^14 discard blocks
+ * across the origin.
+ */
+#define MAX_DISCARD_BLOCKS (1 << 14)
+
+static bool too_many_discard_blocks(sector_t discard_block_size,
+				    sector_t origin_size)
+{
+	(void) sector_div(origin_size, discard_block_size);
+
+	return origin_size > MAX_DISCARD_BLOCKS;
+}
+
+static sector_t calculate_discard_block_size(sector_t cache_block_size,
+					     sector_t origin_size)
+{
+	sector_t discard_block_size;
+
+	discard_block_size = roundup_pow_of_two(cache_block_size);
+
+	if (origin_size)
+		while (too_many_discard_blocks(discard_block_size, origin_size))
+			discard_block_size *= 2;
+
+	return discard_block_size;
+}
+
+#define DEFAULT_MIGRATION_THRESHOLD (2048 * 100)
+
+static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio);
+
+static int cache_create(struct cache_args *ca, struct cache **result)
+{
+	int r = 0;
+	char **error = &ca->ti->error;
+	struct cache *cache;
+	struct dm_target *ti = ca->ti;
+	dm_block_t origin_blocks;
+	struct dm_cache_metadata *cmd;
+	bool may_format = ca->features.mode == CM_WRITE;
+
+	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+	if (!cache)
+		return -ENOMEM;
+
+	cache->ti = ca->ti;
+	ti->private = cache;
+	ti->per_bio_data_size = sizeof(struct per_bio_data);
+	ti->num_flush_bios = 2;
+	ti->flush_supported = true;
+
+	ti->num_discard_bios = 1;
+	ti->discards_supported = true;
+	ti->discard_zeroes_data_unsupported = true;
+
+	memcpy(&cache->features, &ca->features, sizeof(cache->features));
+
+	if (cache->features.write_through)
+		ti->num_write_bios = cache_num_write_bios;
+
+	cache->callbacks.congested_fn = cache_is_congested;
+	dm_table_add_target_callbacks(ti->table, &cache->callbacks);
+
+	cache->metadata_dev = ca->metadata_dev;
+	cache->origin_dev = ca->origin_dev;
+	cache->cache_dev = ca->cache_dev;
+
+	ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
+
+	/* FIXME: factor out this whole section */
+	origin_blocks = cache->origin_sectors = ca->origin_sectors;
+	(void) sector_div(origin_blocks, ca->block_size);
+	cache->origin_blocks = to_oblock(origin_blocks);
+
+	cache->sectors_per_block = ca->block_size;
+	if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
+		r = -EINVAL;
+		goto bad;
+	}
+
+	if (ca->block_size & (ca->block_size - 1)) {
+		dm_block_t cache_size = ca->cache_sectors;
+
+		cache->sectors_per_block_shift = -1;
+		(void) sector_div(cache_size, ca->block_size);
+		cache->cache_size = to_cblock(cache_size);
+	} else {
+		cache->sectors_per_block_shift = __ffs(ca->block_size);
+		cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift);
+	}
+
+	r = create_cache_policy(cache, ca, error);
+	if (r)
+		goto bad;
+	cache->policy_nr_args = ca->policy_argc;
+
+	cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
+				     ca->block_size, may_format,
+				     dm_cache_policy_get_hint_size(cache->policy));
+	if (IS_ERR(cmd)) {
+		*error = "Error creating metadata object";
+		r = PTR_ERR(cmd);
+		goto bad;
+	}
+	cache->cmd = cmd;
+
+	spin_lock_init(&cache->lock);
+	bio_list_init(&cache->deferred_bios);
+	bio_list_init(&cache->deferred_flush_bios);
+	INIT_LIST_HEAD(&cache->quiesced_migrations);
+	INIT_LIST_HEAD(&cache->completed_migrations);
+	INIT_LIST_HEAD(&cache->need_commit_migrations);
+	cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
+	atomic_set(&cache->nr_migrations, 0);
+	init_waitqueue_head(&cache->migration_wait);
+
+	cache->nr_dirty = 0;
+	cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
+	if (!cache->dirty_bitset) {
+		*error = "could not allocate dirty bitset";
+		goto bad;
+	}
+	clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
+
+	cache->discard_block_size =
+		calculate_discard_block_size(cache->sectors_per_block,
+					     cache->origin_sectors);
+	cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks);
+	cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
+	if (!cache->discard_bitset) {
+		*error = "could not allocate discard bitset";
+		goto bad;
+	}
+	clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
+
+	cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
+	if (IS_ERR(cache->copier)) {
+		*error = "could not create kcopyd client";
+		r = PTR_ERR(cache->copier);
+		goto bad;
+	}
+
+	cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
+	if (!cache->wq) {
+		*error = "could not create workqueue for metadata object";
+		goto bad;
+	}
+	INIT_WORK(&cache->worker, do_worker);
+	INIT_DELAYED_WORK(&cache->waker, do_waker);
+	cache->last_commit_jiffies = jiffies;
+
+	cache->prison = dm_bio_prison_create(PRISON_CELLS);
+	if (!cache->prison) {
+		*error = "could not create bio prison";
+		goto bad;
+	}
+
+	cache->all_io_ds = dm_deferred_set_create();
+	if (!cache->all_io_ds) {
+		*error = "could not create all_io deferred set";
+		goto bad;
+	}
+
+	cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
+							 migration_cache);
+	if (!cache->migration_pool) {
+		*error = "Error creating cache's migration mempool";
+		goto bad;
+	}
+
+	cache->next_migration = NULL;
+
+	cache->need_tick_bio = true;
+	cache->sized = false;
+	cache->quiescing = false;
+	cache->commit_requested = false;
+	cache->loaded_mappings = false;
+	cache->loaded_discards = false;
+
+	load_stats(cache);
+
+	atomic_set(&cache->stats.demotion, 0);
+	atomic_set(&cache->stats.promotion, 0);
+	atomic_set(&cache->stats.copies_avoided, 0);
+	atomic_set(&cache->stats.cache_cell_clash, 0);
+	atomic_set(&cache->stats.commit_count, 0);
+	atomic_set(&cache->stats.discard_count, 0);
+
+	*result = cache;
+	return 0;
+
+bad:
+	destroy(cache);
+	return r;
+}
+
+static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
+{
+	unsigned i;
+	const char **copy;
+
+	copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
+	if (!copy)
+		return -ENOMEM;
+	for (i = 0; i < argc; i++) {
+		copy[i] = kstrdup(argv[i], GFP_KERNEL);
+		if (!copy[i]) {
+			while (i--)
+				kfree(copy[i]);
+			kfree(copy);
+			return -ENOMEM;
+		}
+	}
+
+	cache->nr_ctr_args = argc;
+	cache->ctr_args = copy;
+
+	return 0;
+}
+
+static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+	int r = -EINVAL;
+	struct cache_args *ca;
+	struct cache *cache = NULL;
+
+	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+	if (!ca) {
+		ti->error = "Error allocating memory for cache";
+		return -ENOMEM;
+	}
+	ca->ti = ti;
+
+	r = parse_cache_args(ca, argc, argv, &ti->error);
+	if (r)
+		goto out;
+
+	r = cache_create(ca, &cache);
+
+	r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
+	if (r) {
+		destroy(cache);
+		goto out;
+	}
+
+	ti->private = cache;
+
+out:
+	destroy_cache_args(ca);
+	return r;
+}
+
+static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio)
+{
+	int r;
+	struct cache *cache = ti->private;
+	dm_oblock_t block = get_bio_block(cache, bio);
+	dm_cblock_t cblock;
+
+	r = policy_lookup(cache->policy, block, &cblock);
+	if (r < 0)
+		return 2;	/* assume the worst */
+
+	return (!r && !is_dirty(cache, cblock)) ? 2 : 1;
+}
+
+static int cache_map(struct dm_target *ti, struct bio *bio)
+{
+	struct cache *cache = ti->private;
+
+	int r;
+	dm_oblock_t block = get_bio_block(cache, bio);
+	bool can_migrate = false;
+	bool discarded_block;
+	struct dm_bio_prison_cell *cell;
+	struct policy_result lookup_result;
+	struct per_bio_data *pb;
+
+	if (from_oblock(block) > from_oblock(cache->origin_blocks)) {
+		/*
+		 * This can only occur if the io goes to a partial block at
+		 * the end of the origin device.  We don't cache these.
+		 * Just remap to the origin and carry on.
+		 */
+		remap_to_origin_clear_discard(cache, bio, block);
+		return DM_MAPIO_REMAPPED;
+	}
+
+	pb = init_per_bio_data(bio);
+
+	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) {
+		defer_bio(cache, bio);
+		return DM_MAPIO_SUBMITTED;
+	}
+
+	/*
+	 * Check to see if that block is currently migrating.
+	 */
+	cell = alloc_prison_cell(cache);
+	if (!cell) {
+		defer_bio(cache, bio);
+		return DM_MAPIO_SUBMITTED;
+	}
+
+	r = bio_detain(cache, block, bio, cell,
+		       (cell_free_fn) free_prison_cell,
+		       cache, &cell);
+	if (r) {
+		if (r < 0)
+			defer_bio(cache, bio);
+
+		return DM_MAPIO_SUBMITTED;
+	}
+
+	discarded_block = is_discarded_oblock(cache, block);
+
+	r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
+		       bio, &lookup_result);
+	if (r == -EWOULDBLOCK) {
+		cell_defer(cache, cell, true);
+		return DM_MAPIO_SUBMITTED;
+
+	} else if (r) {
+		DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
+		bio_io_error(bio);
+		return DM_MAPIO_SUBMITTED;
+	}
+
+	switch (lookup_result.op) {
+	case POLICY_HIT:
+		inc_hit_counter(cache, bio);
+		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+
+		if (is_writethrough_io(cache, bio, lookup_result.cblock)) {
+			/*
+			 * No need to mark anything dirty in write through mode.
+			 */
+			pb->req_nr == 0 ?
+				remap_to_cache(cache, bio, lookup_result.cblock) :
+				remap_to_origin_clear_discard(cache, bio, block);
+			cell_defer(cache, cell, false);
+		} else {
+			remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+			cell_defer(cache, cell, false);
+		}
+		break;
+
+	case POLICY_MISS:
+		inc_miss_counter(cache, bio);
+		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+
+		if (pb->req_nr != 0) {
+			/*
+			 * This is a duplicate writethrough io that is no
+			 * longer needed because the block has been demoted.
+			 */
+			bio_endio(bio, 0);
+			cell_defer(cache, cell, false);
+			return DM_MAPIO_SUBMITTED;
+		} else {
+			remap_to_origin_clear_discard(cache, bio, block);
+			cell_defer(cache, cell, false);
+		}
+		break;
+
+	default:
+		DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
+			    (unsigned) lookup_result.op);
+		bio_io_error(bio);
+		return DM_MAPIO_SUBMITTED;
+	}
+
+	return DM_MAPIO_REMAPPED;
+}
+
+static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
+{
+	struct cache *cache = ti->private;
+	unsigned long flags;
+	struct per_bio_data *pb = get_per_bio_data(bio);
+
+	if (pb->tick) {
+		policy_tick(cache->policy);
+
+		spin_lock_irqsave(&cache->lock, flags);
+		cache->need_tick_bio = true;
+		spin_unlock_irqrestore(&cache->lock, flags);
+	}
+
+	check_for_quiesced_migrations(cache, pb);
+
+	return 0;
+}
+
+static int write_dirty_bitset(struct cache *cache)
+{
+	unsigned i, r;
+
+	for (i = 0; i < from_cblock(cache->cache_size); i++) {
+		r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
+				       is_dirty(cache, to_cblock(i)));
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int write_discard_bitset(struct cache *cache)
+{
+	unsigned i, r;
+
+	r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
+					   cache->discard_nr_blocks);
+	if (r) {
+		DMERR("could not resize on-disk discard bitset");
+		return r;
+	}
+
+	for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
+		r = dm_cache_set_discard(cache->cmd, to_dblock(i),
+					 is_discarded(cache, to_dblock(i)));
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock,
+		     uint32_t hint)
+{
+	struct cache *cache = context;
+	return dm_cache_save_hint(cache->cmd, cblock, hint);
+}
+
+static int write_hints(struct cache *cache)
+{
+	int r;
+
+	r = dm_cache_begin_hints(cache->cmd, cache->policy);
+	if (r) {
+		DMERR("dm_cache_begin_hints failed");
+		return r;
+	}
+
+	r = policy_walk_mappings(cache->policy, save_hint, cache);
+	if (r)
+		DMERR("policy_walk_mappings failed");
+
+	return r;
+}
+
+/*
+ * returns true on success
+ */
+static bool sync_metadata(struct cache *cache)
+{
+	int r1, r2, r3, r4;
+
+	r1 = write_dirty_bitset(cache);
+	if (r1)
+		DMERR("could not write dirty bitset");
+
+	r2 = write_discard_bitset(cache);
+	if (r2)
+		DMERR("could not write discard bitset");
+
+	save_stats(cache);
+
+	r3 = write_hints(cache);
+	if (r3)
+		DMERR("could not write hints");
+
+	/*
+	 * If writing the above metadata failed, we still commit, but don't
+	 * set the clean shutdown flag.  This will effectively force every
+	 * dirty bit to be set on reload.
+	 */
+	r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3);
+	if (r4)
+		DMERR("could not write cache metadata.  Data loss may occur.");
+
+	return !r1 && !r2 && !r3 && !r4;
+}
+
+static void cache_postsuspend(struct dm_target *ti)
+{
+	struct cache *cache = ti->private;
+
+	start_quiescing(cache);
+	wait_for_migrations(cache);
+	stop_worker(cache);
+	requeue_deferred_io(cache);
+	stop_quiescing(cache);
+
+	(void) sync_metadata(cache);
+}
+
+static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
+			bool dirty, uint32_t hint, bool hint_valid)
+{
+	int r;
+	struct cache *cache = context;
+
+	r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
+	if (r)
+		return r;
+
+	if (dirty)
+		set_dirty(cache, oblock, cblock);
+	else
+		clear_dirty(cache, oblock, cblock);
+
+	return 0;
+}
+
+static int load_discard(void *context, sector_t discard_block_size,
+			dm_dblock_t dblock, bool discard)
+{
+	struct cache *cache = context;
+
+	/* FIXME: handle mis-matched block size */
+
+	if (discard)
+		set_discard(cache, dblock);
+	else
+		clear_discard(cache, dblock);
+
+	return 0;
+}
+
+static int cache_preresume(struct dm_target *ti)
+{
+	int r = 0;
+	struct cache *cache = ti->private;
+	sector_t actual_cache_size = get_dev_size(cache->cache_dev);
+	(void) sector_div(actual_cache_size, cache->sectors_per_block);
+
+	/*
+	 * Check to see if the cache has resized.
+	 */
+	if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) {
+		cache->cache_size = to_cblock(actual_cache_size);
+
+		r = dm_cache_resize(cache->cmd, cache->cache_size);
+		if (r) {
+			DMERR("could not resize cache metadata");
+			return r;
+		}
+
+		cache->sized = true;
+	}
+
+	if (!cache->loaded_mappings) {
+		r = dm_cache_load_mappings(cache->cmd,
+					   dm_cache_policy_get_name(cache->policy),
+					   load_mapping, cache);
+		if (r) {
+			DMERR("could not load cache mappings");
+			return r;
+		}
+
+		cache->loaded_mappings = true;
+	}
+
+	if (!cache->loaded_discards) {
+		r = dm_cache_load_discards(cache->cmd, load_discard, cache);
+		if (r) {
+			DMERR("could not load origin discards");
+			return r;
+		}
+
+		cache->loaded_discards = true;
+	}
+
+	return r;
+}
+
+static void cache_resume(struct dm_target *ti)
+{
+	struct cache *cache = ti->private;
+
+	cache->need_tick_bio = true;
+	do_waker(&cache->waker.work);
+}
+
+/*
+ * Status format:
+ *
+ * <#used metadata blocks>/<#total metadata blocks>
+ * <#read hits> <#read misses> <#write hits> <#write misses>
+ * <#demotions> <#promotions> <#blocks in cache> <#dirty>
+ * <#features> <features>*
+ * <#core args> <core args>
+ * <#policy args> <policy args>*
+ */
+static void cache_status(struct dm_target *ti, status_type_t type,
+			 unsigned status_flags, char *result, unsigned maxlen)
+{
+	int r = 0;
+	unsigned i;
+	ssize_t sz = 0;
+	dm_block_t nr_free_blocks_metadata = 0;
+	dm_block_t nr_blocks_metadata = 0;
+	char buf[BDEVNAME_SIZE];
+	struct cache *cache = ti->private;
+	dm_cblock_t residency;
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		/* Commit to ensure statistics aren't out-of-date */
+		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) {
+			r = dm_cache_commit(cache->cmd, false);
+			if (r)
+				DMERR("could not commit metadata for accurate status");
+		}
+
+		r = dm_cache_get_free_metadata_block_count(cache->cmd,
+							   &nr_free_blocks_metadata);
+		if (r) {
+			DMERR("could not get metadata free block count");
+			goto err;
+		}
+
+		r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
+		if (r) {
+			DMERR("could not get metadata device size");
+			goto err;
+		}
+
+		residency = policy_residency(cache->policy);
+
+		DMEMIT("%llu/%llu %u %u %u %u %u %u %llu %u ",
+		       (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
+		       (unsigned long long)nr_blocks_metadata,
+		       (unsigned) atomic_read(&cache->stats.read_hit),
+		       (unsigned) atomic_read(&cache->stats.read_miss),
+		       (unsigned) atomic_read(&cache->stats.write_hit),
+		       (unsigned) atomic_read(&cache->stats.write_miss),
+		       (unsigned) atomic_read(&cache->stats.demotion),
+		       (unsigned) atomic_read(&cache->stats.promotion),
+		       (unsigned long long) from_cblock(residency),
+		       cache->nr_dirty);
+
+		if (cache->features.write_through)
+			DMEMIT("1 writethrough ");
+		else
+			DMEMIT("0 ");
+
+		DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
+		if (sz < maxlen) {
+			r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz);
+			if (r)
+				DMERR("policy_emit_config_values returned %d", r);
+		}
+
+		break;
+
+	case STATUSTYPE_TABLE:
+		format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
+		DMEMIT("%s ", buf);
+		format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
+		DMEMIT("%s ", buf);
+		format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
+		DMEMIT("%s", buf);
+
+		for (i = 0; i < cache->nr_ctr_args - 1; i++)
+			DMEMIT(" %s", cache->ctr_args[i]);
+		if (cache->nr_ctr_args)
+			DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
+	}
+
+	return;
+
+err:
+	DMEMIT("Error");
+}
+
+#define NOT_CORE_OPTION 1
+
+static int process_config_option(struct cache *cache, char **argv)
+{
+	unsigned long tmp;
+
+	if (!strcasecmp(argv[0], "migration_threshold")) {
+		if (kstrtoul(argv[1], 10, &tmp))
+			return -EINVAL;
+
+		cache->migration_threshold = tmp;
+		return 0;
+	}
+
+	return NOT_CORE_OPTION;
+}
+
+/*
+ * Supports <key> <value>.
+ *
+ * The key migration_threshold is supported by the cache target core.
+ */
+static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
+{
+	int r;
+	struct cache *cache = ti->private;
+
+	if (argc != 2)
+		return -EINVAL;
+
+	r = process_config_option(cache, argv);
+	if (r == NOT_CORE_OPTION)
+		return policy_set_config_value(cache->policy, argv[0], argv[1]);
+
+	return r;
+}
+
+static int cache_iterate_devices(struct dm_target *ti,
+				 iterate_devices_callout_fn fn, void *data)
+{
+	int r = 0;
+	struct cache *cache = ti->private;
+
+	r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
+	if (!r)
+		r = fn(ti, cache->origin_dev, 0, ti->len, data);
+
+	return r;
+}
+
+/*
+ * We assume I/O is going to the origin (which is the volume
+ * more likely to have restrictions e.g. by being striped).
+ * (Looking up the exact location of the data would be expensive
+ * and could always be out of date by the time the bio is submitted.)
+ */
+static int cache_bvec_merge(struct dm_target *ti,
+			    struct bvec_merge_data *bvm,
+			    struct bio_vec *biovec, int max_size)
+{
+	struct cache *cache = ti->private;
+	struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev);
+
+	if (!q->merge_bvec_fn)
+		return max_size;
+
+	bvm->bi_bdev = cache->origin_dev->bdev;
+	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
+}
+
+static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
+{
+	/*
+	 * FIXME: these limits may be incompatible with the cache device
+	 */
+	limits->max_discard_sectors = cache->discard_block_size * 1024;
+	limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
+}
+
+static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+	struct cache *cache = ti->private;
+
+	blk_limits_io_min(limits, 0);
+	blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
+	set_discard_limits(cache, limits);
+}
+
+/*----------------------------------------------------------------*/
+
+static struct target_type cache_target = {
+	.name = "cache",
+	.version = {1, 0, 0},
+	.module = THIS_MODULE,
+	.ctr = cache_ctr,
+	.dtr = cache_dtr,
+	.map = cache_map,
+	.end_io = cache_end_io,
+	.postsuspend = cache_postsuspend,
+	.preresume = cache_preresume,
+	.resume = cache_resume,
+	.status = cache_status,
+	.message = cache_message,
+	.iterate_devices = cache_iterate_devices,
+	.merge = cache_bvec_merge,
+	.io_hints = cache_io_hints,
+};
+
+static int __init dm_cache_init(void)
+{
+	int r;
+
+	r = dm_register_target(&cache_target);
+	if (r) {
+		DMERR("cache target registration failed: %d", r);
+		return r;
+	}
+
+	migration_cache = KMEM_CACHE(dm_cache_migration, 0);
+	if (!migration_cache) {
+		dm_unregister_target(&cache_target);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void __exit dm_cache_exit(void)
+{
+	dm_unregister_target(&cache_target);
+	kmem_cache_destroy(migration_cache);
+}
+
+module_init(dm_cache_init);
+module_exit(dm_cache_exit);
+
+MODULE_DESCRIPTION(DM_NAME " cache target");
+MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f7369f9d8595..13c15480d940 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1234,20 +1234,6 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size)
 	return 0;
 }
 
-/*
- * Encode key into its hex representation
- */
-static void crypt_encode_key(char *hex, u8 *key, unsigned int size)
-{
-	unsigned int i;
-
-	for (i = 0; i < size; i++) {
-		sprintf(hex, "%02x", *key);
-		hex += 2;
-		key++;
-	}
-}
-
 static void crypt_free_tfms(struct crypt_config *cc)
 {
 	unsigned i;
@@ -1651,7 +1637,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 		if (opt_params == 1 && opt_string &&
 		    !strcasecmp(opt_string, "allow_discards"))
-			ti->num_discard_requests = 1;
+			ti->num_discard_bios = 1;
 		else if (opt_params) {
 			ret = -EINVAL;
 			ti->error = "Invalid feature arguments";
@@ -1679,7 +1665,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 	ti->discard_zeroes_data_unsupported = true;
 
 	return 0;
@@ -1717,11 +1703,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_SUBMITTED;
 }
 
-static int crypt_status(struct dm_target *ti, status_type_t type,
-			unsigned status_flags, char *result, unsigned maxlen)
+static void crypt_status(struct dm_target *ti, status_type_t type,
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct crypt_config *cc = ti->private;
-	unsigned int sz = 0;
+	unsigned i, sz = 0;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
@@ -1731,27 +1717,20 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
 	case STATUSTYPE_TABLE:
 		DMEMIT("%s ", cc->cipher_string);
 
-		if (cc->key_size > 0) {
-			if ((maxlen - sz) < ((cc->key_size << 1) + 1))
-				return -ENOMEM;
-
-			crypt_encode_key(result + sz, cc->key, cc->key_size);
-			sz += cc->key_size << 1;
-		} else {
-			if (sz >= maxlen)
-				return -ENOMEM;
-			result[sz++] = '-';
-		}
+		if (cc->key_size > 0)
+			for (i = 0; i < cc->key_size; i++)
+				DMEMIT("%02x", cc->key[i]);
+		else
+			DMEMIT("-");
 
 		DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
 				cc->dev->name, (unsigned long long)cc->start);
 
-		if (ti->num_discard_requests)
+		if (ti->num_discard_bios)
 			DMEMIT(" 1 allow_discards");
 
 		break;
 	}
-	return 0;
 }
 
 static void crypt_postsuspend(struct dm_target *ti)
@@ -1845,7 +1824,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 12, 0},
+	.version = {1, 12, 1},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index cc1bd048acb2..496d5f3646a5 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -198,8 +198,8 @@ out:
 	mutex_init(&dc->timer_lock);
 	atomic_set(&dc->may_delay, 1);
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
 	ti->private = dc;
 	return 0;
 
@@ -293,8 +293,8 @@ static int delay_map(struct dm_target *ti, struct bio *bio)
 	return delay_bio(dc, dc->read_delay, bio);
 }
 
-static int delay_status(struct dm_target *ti, status_type_t type,
-			unsigned status_flags, char *result, unsigned maxlen)
+static void delay_status(struct dm_target *ti, status_type_t type,
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct delay_c *dc = ti->private;
 	int sz = 0;
@@ -314,8 +314,6 @@ static int delay_status(struct dm_target *ti, status_type_t type,
 			       dc->write_delay);
 		break;
 	}
-
-	return 0;
 }
 
 static int delay_iterate_devices(struct dm_target *ti,
@@ -337,7 +335,7 @@ out:
 
 static struct target_type delay_target = {
 	.name	     = "delay",
-	.version     = {1, 2, 0},
+	.version     = {1, 2, 1},
 	.module      = THIS_MODULE,
 	.ctr	     = delay_ctr,
 	.dtr	     = delay_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 9721f2ffb1a2..7fcf21cb4ff8 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -216,8 +216,8 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
 	ti->per_bio_data_size = sizeof(struct per_bio_data);
 	ti->private = fc;
 	return 0;
@@ -337,8 +337,8 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
 	return error;
 }
 
-static int flakey_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void flakey_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	unsigned sz = 0;
 	struct flakey_c *fc = ti->private;
@@ -368,7 +368,6 @@ static int flakey_status(struct dm_target *ti, status_type_t type,
 
 		break;
 	}
-	return 0;
 }
 
 static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg)
@@ -411,7 +410,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
 
 static struct target_type flakey_target = {
 	.name   = "flakey",
-	.version = {1, 3, 0},
+	.version = {1, 3, 1},
 	.module = THIS_MODULE,
 	.ctr    = flakey_ctr,
 	.dtr    = flakey_dtr,
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 0666b5d14b88..aa04f0224642 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1067,6 +1067,7 @@ static void retrieve_status(struct dm_table *table,
 	num_targets = dm_table_get_num_targets(table);
 	for (i = 0; i < num_targets; i++) {
 		struct dm_target *ti = dm_table_get_target(table, i);
+		size_t l;
 
 		remaining = len - (outptr - outbuf);
 		if (remaining <= sizeof(struct dm_target_spec)) {
@@ -1093,14 +1094,17 @@ static void retrieve_status(struct dm_table *table,
 		if (ti->type->status) {
 			if (param->flags & DM_NOFLUSH_FLAG)
 				status_flags |= DM_STATUS_NOFLUSH_FLAG;
-			if (ti->type->status(ti, type, status_flags, outptr, remaining)) {
-				param->flags |= DM_BUFFER_FULL_FLAG;
-				break;
-			}
+			ti->type->status(ti, type, status_flags, outptr, remaining);
 		} else
 			outptr[0] = '\0';
 
-		outptr += strlen(outptr) + 1;
+		l = strlen(outptr) + 1;
+		if (l == remaining) {
+			param->flags |= DM_BUFFER_FULL_FLAG;
+			break;
+		}
+
+		outptr += l;
 		used = param->data_start + (outptr - outbuf);
 
 		outptr = align_ptr(outptr);
@@ -1410,6 +1414,22 @@ static int table_status(struct dm_ioctl *param, size_t param_size)
 	return 0;
 }
 
+static bool buffer_test_overflow(char *result, unsigned maxlen)
+{
+	return !maxlen || strlen(result) + 1 >= maxlen;
+}
+
+/*
+ * Process device-mapper dependent messages.
+ * Returns a number <= 1 if message was processed by device mapper.
+ * Returns 2 if message should be delivered to the target.
+ */
+static int message_for_md(struct mapped_device *md, unsigned argc, char **argv,
+			  char *result, unsigned maxlen)
+{
+	return 2;
+}
+
 /*
  * Pass a message to the target that's at the supplied device offset.
  */
@@ -1421,6 +1441,8 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
 	struct dm_table *table;
 	struct dm_target *ti;
 	struct dm_target_msg *tmsg = (void *) param + param->data_start;
+	size_t maxlen;
+	char *result = get_result_buffer(param, param_size, &maxlen);
 
 	md = find_device(param);
 	if (!md)
@@ -1444,6 +1466,10 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
 		goto out_argv;
 	}
 
+	r = message_for_md(md, argc, argv, result, maxlen);
+	if (r <= 1)
+		goto out_argv;
+
 	table = dm_get_live_table(md);
 	if (!table)
 		goto out_argv;
@@ -1469,44 +1495,68 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
  out_argv:
 	kfree(argv);
  out:
-	param->data_size = 0;
+	if (r >= 0)
+		__dev_status(md, param);
+
+	if (r == 1) {
+		param->flags |= DM_DATA_OUT_FLAG;
+		if (buffer_test_overflow(result, maxlen))
+			param->flags |= DM_BUFFER_FULL_FLAG;
+		else
+			param->data_size = param->data_start + strlen(result) + 1;
+		r = 0;
+	}
+
 	dm_put(md);
 	return r;
 }
 
+/*
+ * The ioctl parameter block consists of two parts, a dm_ioctl struct
+ * followed by a data buffer.  This flag is set if the second part,
+ * which has a variable size, is not used by the function processing
+ * the ioctl.
+ */
+#define IOCTL_FLAGS_NO_PARAMS	1
+
 /*-----------------------------------------------------------------
  * Implementation of open/close/ioctl on the special char
  * device.
  *---------------------------------------------------------------*/
-static ioctl_fn lookup_ioctl(unsigned int cmd)
+static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
 {
 	static struct {
 		int cmd;
+		int flags;
 		ioctl_fn fn;
 	} _ioctls[] = {
-		{DM_VERSION_CMD, NULL},	/* version is dealt with elsewhere */
-		{DM_REMOVE_ALL_CMD, remove_all},
-		{DM_LIST_DEVICES_CMD, list_devices},
-
-		{DM_DEV_CREATE_CMD, dev_create},
-		{DM_DEV_REMOVE_CMD, dev_remove},
-		{DM_DEV_RENAME_CMD, dev_rename},
-		{DM_DEV_SUSPEND_CMD, dev_suspend},
-		{DM_DEV_STATUS_CMD, dev_status},
-		{DM_DEV_WAIT_CMD, dev_wait},
-
-		{DM_TABLE_LOAD_CMD, table_load},
-		{DM_TABLE_CLEAR_CMD, table_clear},
-		{DM_TABLE_DEPS_CMD, table_deps},
-		{DM_TABLE_STATUS_CMD, table_status},
-
-		{DM_LIST_VERSIONS_CMD, list_versions},
-
-		{DM_TARGET_MSG_CMD, target_message},
-		{DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry}
+		{DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */
+		{DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS, remove_all},
+		{DM_LIST_DEVICES_CMD, 0, list_devices},
+
+		{DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_create},
+		{DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_remove},
+		{DM_DEV_RENAME_CMD, 0, dev_rename},
+		{DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend},
+		{DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status},
+		{DM_DEV_WAIT_CMD, 0, dev_wait},
+
+		{DM_TABLE_LOAD_CMD, 0, table_load},
+		{DM_TABLE_CLEAR_CMD, IOCTL_FLAGS_NO_PARAMS, table_clear},
+		{DM_TABLE_DEPS_CMD, 0, table_deps},
+		{DM_TABLE_STATUS_CMD, 0, table_status},
+
+		{DM_LIST_VERSIONS_CMD, 0, list_versions},
+
+		{DM_TARGET_MSG_CMD, 0, target_message},
+		{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry}
 	};
 
-	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
+	if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
+		return NULL;
+
+	*ioctl_flags = _ioctls[cmd].flags;
+	return _ioctls[cmd].fn;
 }
 
 /*
@@ -1543,7 +1593,8 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
 	return r;
 }
 
-#define DM_PARAMS_VMALLOC	0x0001	/* Params alloced with vmalloc not kmalloc */
+#define DM_PARAMS_KMALLOC	0x0001	/* Params alloced with kmalloc */
+#define DM_PARAMS_VMALLOC	0x0002	/* Params alloced with vmalloc */
 #define DM_WIPE_BUFFER		0x0010	/* Wipe input buffer before returning from ioctl */
 
 static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags)
@@ -1551,66 +1602,80 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla
 	if (param_flags & DM_WIPE_BUFFER)
 		memset(param, 0, param_size);
 
+	if (param_flags & DM_PARAMS_KMALLOC)
+		kfree(param);
 	if (param_flags & DM_PARAMS_VMALLOC)
 		vfree(param);
-	else
-		kfree(param);
 }
 
-static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags)
+static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel,
+		       int ioctl_flags,
+		       struct dm_ioctl **param, int *param_flags)
 {
-	struct dm_ioctl tmp, *dmi;
+	struct dm_ioctl *dmi;
 	int secure_data;
+	const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data);
 
-	if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data)))
+	if (copy_from_user(param_kernel, user, minimum_data_size))
 		return -EFAULT;
 
-	if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data)))
+	if (param_kernel->data_size < minimum_data_size)
 		return -EINVAL;
 
-	secure_data = tmp.flags & DM_SECURE_DATA_FLAG;
+	secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG;
 
 	*param_flags = secure_data ? DM_WIPE_BUFFER : 0;
 
+	if (ioctl_flags & IOCTL_FLAGS_NO_PARAMS) {
+		dmi = param_kernel;
+		dmi->data_size = minimum_data_size;
+		goto data_copied;
+	}
+
 	/*
 	 * Try to avoid low memory issues when a device is suspended.
 	 * Use kmalloc() rather than vmalloc() when we can.
 	 */
 	dmi = NULL;
-	if (tmp.data_size <= KMALLOC_MAX_SIZE)
-		dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+	if (param_kernel->data_size <= KMALLOC_MAX_SIZE) {
+		dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+		if (dmi)
+			*param_flags |= DM_PARAMS_KMALLOC;
+	}
 
 	if (!dmi) {
-		dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL);
-		*param_flags |= DM_PARAMS_VMALLOC;
+		dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL);
+		if (dmi)
+			*param_flags |= DM_PARAMS_VMALLOC;
 	}
 
 	if (!dmi) {
-		if (secure_data && clear_user(user, tmp.data_size))
+		if (secure_data && clear_user(user, param_kernel->data_size))
 			return -EFAULT;
 		return -ENOMEM;
 	}
 
-	if (copy_from_user(dmi, user, tmp.data_size))
+	if (copy_from_user(dmi, user, param_kernel->data_size))
 		goto bad;
 
+data_copied:
 	/*
 	 * Abort if something changed the ioctl data while it was being copied.
 	 */
-	if (dmi->data_size != tmp.data_size) {
+	if (dmi->data_size != param_kernel->data_size) {
 		DMERR("rejecting ioctl: data size modified while processing parameters");
 		goto bad;
 	}
 
 	/* Wipe the user buffer so we do not return it to userspace */
-	if (secure_data && clear_user(user, tmp.data_size))
+	if (secure_data && clear_user(user, param_kernel->data_size))
 		goto bad;
 
 	*param = dmi;
 	return 0;
 
 bad:
-	free_params(dmi, tmp.data_size, *param_flags);
+	free_params(dmi, param_kernel->data_size, *param_flags);
 
 	return -EFAULT;
 }
@@ -1621,6 +1686,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
 	param->flags &= ~DM_BUFFER_FULL_FLAG;
 	param->flags &= ~DM_UEVENT_GENERATED_FLAG;
 	param->flags &= ~DM_SECURE_DATA_FLAG;
+	param->flags &= ~DM_DATA_OUT_FLAG;
 
 	/* Ignores parameters */
 	if (cmd == DM_REMOVE_ALL_CMD ||
@@ -1648,11 +1714,13 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
 static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
 {
 	int r = 0;
+	int ioctl_flags;
 	int param_flags;
 	unsigned int cmd;
 	struct dm_ioctl *uninitialized_var(param);
 	ioctl_fn fn = NULL;
 	size_t input_param_size;
+	struct dm_ioctl param_kernel;
 
 	/* only root can play with this */
 	if (!capable(CAP_SYS_ADMIN))
@@ -1677,7 +1745,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
 	if (cmd == DM_VERSION_CMD)
 		return 0;
 
-	fn = lookup_ioctl(cmd);
+	fn = lookup_ioctl(cmd, &ioctl_flags);
 	if (!fn) {
 		DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
 		return -ENOTTY;
@@ -1686,7 +1754,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
 	/*
 	 * Copy the parameters into kernel space.
 	 */
-	r = copy_params(user, &param, &param_flags);
+	r = copy_params(user, &param_kernel, ioctl_flags, &param, &param_flags);
 
 	if (r)
 		return r;
@@ -1699,6 +1767,10 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
 	param->data_size = sizeof(*param);
 	r = fn(param, input_param_size);
 
+	if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) &&
+	    unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS))
+		DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd);
+
 	/*
 	 * Copy the results back to userland.
 	 */
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 68c02673263b..d581fe5d2faf 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <linux/delay.h>
 #include <linux/device-mapper.h>
 #include <linux/dm-kcopyd.h>
 
@@ -51,6 +52,8 @@ struct dm_kcopyd_client {
 	struct workqueue_struct *kcopyd_wq;
 	struct work_struct kcopyd_work;
 
+	struct dm_kcopyd_throttle *throttle;
+
 /*
  * We maintain three lists of jobs:
  *
@@ -68,6 +71,117 @@ struct dm_kcopyd_client {
 
 static struct page_list zero_page_list;
 
+static DEFINE_SPINLOCK(throttle_spinlock);
+
+/*
+ * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
+ * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
+ * by 2.
+ */
+#define ACCOUNT_INTERVAL_SHIFT		SHIFT_HZ
+
+/*
+ * Sleep this number of milliseconds.
+ *
+ * The value was decided experimentally.
+ * Smaller values seem to cause an increased copy rate above the limit.
+ * The reason for this is unknown but possibly due to jiffies rounding errors
+ * or read/write cache inside the disk.
+ */
+#define SLEEP_MSEC			100
+
+/*
+ * Maximum number of sleep events. There is a theoretical livelock if more
+ * kcopyd clients do work simultaneously which this limit avoids.
+ */
+#define MAX_SLEEPS			10
+
+static void io_job_start(struct dm_kcopyd_throttle *t)
+{
+	unsigned throttle, now, difference;
+	int slept = 0, skew;
+
+	if (unlikely(!t))
+		return;
+
+try_again:
+	spin_lock_irq(&throttle_spinlock);
+
+	throttle = ACCESS_ONCE(t->throttle);
+
+	if (likely(throttle >= 100))
+		goto skip_limit;
+
+	now = jiffies;
+	difference = now - t->last_jiffies;
+	t->last_jiffies = now;
+	if (t->num_io_jobs)
+		t->io_period += difference;
+	t->total_period += difference;
+
+	/*
+	 * Maintain sane values if we got a temporary overflow.
+	 */
+	if (unlikely(t->io_period > t->total_period))
+		t->io_period = t->total_period;
+
+	if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) {
+		int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT);
+		t->total_period >>= shift;
+		t->io_period >>= shift;
+	}
+
+	skew = t->io_period - throttle * t->total_period / 100;
+
+	if (unlikely(skew > 0) && slept < MAX_SLEEPS) {
+		slept++;
+		spin_unlock_irq(&throttle_spinlock);
+		msleep(SLEEP_MSEC);
+		goto try_again;
+	}
+
+skip_limit:
+	t->num_io_jobs++;
+
+	spin_unlock_irq(&throttle_spinlock);
+}
+
+static void io_job_finish(struct dm_kcopyd_throttle *t)
+{
+	unsigned long flags;
+
+	if (unlikely(!t))
+		return;
+
+	spin_lock_irqsave(&throttle_spinlock, flags);
+
+	t->num_io_jobs--;
+
+	if (likely(ACCESS_ONCE(t->throttle) >= 100))
+		goto skip_limit;
+
+	if (!t->num_io_jobs) {
+		unsigned now, difference;
+
+		now = jiffies;
+		difference = now - t->last_jiffies;
+		t->last_jiffies = now;
+
+		t->io_period += difference;
+		t->total_period += difference;
+
+		/*
+		 * Maintain sane values if we got a temporary overflow.
+		 */
+		if (unlikely(t->io_period > t->total_period))
+			t->io_period = t->total_period;
+	}
+
+skip_limit:
+	spin_unlock_irqrestore(&throttle_spinlock, flags);
+}
+
+
 static void wake(struct dm_kcopyd_client *kc)
 {
 	queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
@@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context)
 	struct kcopyd_job *job = (struct kcopyd_job *) context;
 	struct dm_kcopyd_client *kc = job->kc;
 
+	io_job_finish(kc->throttle);
+
 	if (error) {
 		if (job->rw & WRITE)
 			job->write_err |= error;
@@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job)
 		.client = job->kc->io_client,
 	};
 
+	io_job_start(job->kc->throttle);
+
 	if (job->rw == READ)
 		r = dm_io(&io_req, 1, &job->source, NULL);
 	else
@@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
 /*-----------------------------------------------------------------
  * Client setup
  *---------------------------------------------------------------*/
-struct dm_kcopyd_client *dm_kcopyd_client_create(void)
+struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
 {
 	int r = -ENOMEM;
 	struct dm_kcopyd_client *kc;
@@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void)
 	INIT_LIST_HEAD(&kc->complete_jobs);
 	INIT_LIST_HEAD(&kc->io_jobs);
 	INIT_LIST_HEAD(&kc->pages_jobs);
+	kc->throttle = throttle;
 
 	kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
 	if (!kc->job_pool)
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 328cad5617ab..4f99d267340c 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -53,9 +53,9 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
-	ti->num_write_same_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
+	ti->num_write_same_bios = 1;
 	ti->private = lc;
 	return 0;
 
@@ -95,8 +95,8 @@ static int linear_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-static int linear_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void linear_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct linear_c *lc = (struct linear_c *) ti->private;
 
@@ -110,7 +110,6 @@ static int linear_status(struct dm_target *ti, status_type_t type,
 				(unsigned long long)lc->start);
 		break;
 	}
-	return 0;
 }
 
 static int linear_ioctl(struct dm_target *ti, unsigned int cmd,
@@ -155,7 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti,
 
 static struct target_type linear_target = {
 	.name   = "linear",
-	.version = {1, 2, 0},
+	.version = {1, 2, 1},
 	.module = THIS_MODULE,
 	.ctr    = linear_ctr,
 	.dtr    = linear_dtr,
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 573bd04591bf..51bb81676be3 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -905,8 +905,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 		goto bad;
 	}
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
 
 	return 0;
 
@@ -1378,8 +1378,8 @@ static void multipath_resume(struct dm_target *ti)
  *     [priority selector-name num_ps_args [ps_args]*
  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
  */
-static int multipath_status(struct dm_target *ti, status_type_t type,
-			    unsigned status_flags, char *result, unsigned maxlen)
+static void multipath_status(struct dm_target *ti, status_type_t type,
+			     unsigned status_flags, char *result, unsigned maxlen)
 {
 	int sz = 0;
 	unsigned long flags;
@@ -1485,8 +1485,6 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
 	}
 
 	spin_unlock_irqrestore(&m->lock, flags);
-
-	return 0;
 }
 
 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
@@ -1695,7 +1693,7 @@ out:
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 5, 0},
+	.version = {1, 5, 1},
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 9e58dbd8d8cb..9a01d1e4c783 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1151,7 +1151,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
 	INIT_WORK(&rs->md.event_work, do_table_event);
 	ti->private = rs;
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 
 	mutex_lock(&rs->md.reconfig_mutex);
 	ret = md_run(&rs->md);
@@ -1201,8 +1201,8 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_SUBMITTED;
 }
 
-static int raid_status(struct dm_target *ti, status_type_t type,
-		       unsigned status_flags, char *result, unsigned maxlen)
+static void raid_status(struct dm_target *ti, status_type_t type,
+			unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct raid_set *rs = ti->private;
 	unsigned raid_param_cnt = 1; /* at least 1 for chunksize */
@@ -1344,8 +1344,6 @@ static int raid_status(struct dm_target *ti, status_type_t type,
 				DMEMIT(" -");
 		}
 	}
-
-	return 0;
 }
 
 static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
@@ -1405,7 +1403,7 @@ static void raid_resume(struct dm_target *ti)
 
 static struct target_type raid_target = {
 	.name = "raid",
-	.version = {1, 4, 1},
+	.version = {1, 4, 2},
 	.module = THIS_MODULE,
 	.ctr = raid_ctr,
 	.dtr = raid_dtr,
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index fa519185ebba..d053098c6a91 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -82,6 +82,9 @@ struct mirror_set {
 	struct mirror mirror[0];
 };
 
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle,
+		"A percentage of time allocated for raid resynchronization");
+
 static void wakeup_mirrord(void *context)
 {
 	struct mirror_set *ms = context;
@@ -1072,8 +1075,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (r)
 		goto err_free_context;
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
 	ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record);
 	ti->discard_zeroes_data_unsupported = true;
 
@@ -1111,7 +1114,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto err_destroy_wq;
 	}
 
-	ms->kcopyd_client = dm_kcopyd_client_create();
+	ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
 	if (IS_ERR(ms->kcopyd_client)) {
 		r = PTR_ERR(ms->kcopyd_client);
 		goto err_destroy_wq;
@@ -1347,8 +1350,8 @@ static char device_status_char(struct mirror *m)
 }
 
 
-static int mirror_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void mirror_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	unsigned int m, sz = 0;
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1383,8 +1386,6 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
 		if (ms->features & DM_RAID1_HANDLE_ERRORS)
 			DMEMIT(" 1 handle_errors");
 	}
-
-	return 0;
 }
 
 static int mirror_iterate_devices(struct dm_target *ti,
@@ -1403,7 +1404,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
 
 static struct target_type mirror_target = {
 	.name	 = "mirror",
-	.version = {1, 13, 1},
+	.version = {1, 13, 2},
 	.module	 = THIS_MODULE,
 	.ctr	 = mirror_ctr,
 	.dtr	 = mirror_dtr,
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 59fc18ae52c2..c0e07026a8d1 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -124,6 +124,9 @@ struct dm_snapshot {
 #define RUNNING_MERGE          0
 #define SHUTDOWN_MERGE         1
 
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
+		"A percentage of time allocated for copy on write");
+
 struct dm_dev *dm_snap_origin(struct dm_snapshot *s)
 {
 	return s->origin;
@@ -227,12 +230,11 @@ static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio)
 static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
 {
 	struct dm_snap_tracked_chunk *c;
-	struct hlist_node *hn;
 	int found = 0;
 
 	spin_lock_irq(&s->tracked_chunk_lock);
 
-	hlist_for_each_entry(c, hn,
+	hlist_for_each_entry(c,
 	    &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
 		if (c->chunk == chunk) {
 			found = 1;
@@ -1038,7 +1040,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	int i;
 	int r = -EINVAL;
 	char *origin_path, *cow_path;
-	unsigned args_used, num_flush_requests = 1;
+	unsigned args_used, num_flush_bios = 1;
 	fmode_t origin_mode = FMODE_READ;
 
 	if (argc != 4) {
@@ -1048,7 +1050,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	if (dm_target_is_snapshot_merge(ti)) {
-		num_flush_requests = 2;
+		num_flush_bios = 2;
 		origin_mode = FMODE_WRITE;
 	}
 
@@ -1109,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_hash_tables;
 	}
 
-	s->kcopyd_client = dm_kcopyd_client_create();
+	s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
 	if (IS_ERR(s->kcopyd_client)) {
 		r = PTR_ERR(s->kcopyd_client);
 		ti->error = "Could not create kcopyd client";
@@ -1128,7 +1130,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	spin_lock_init(&s->tracked_chunk_lock);
 
 	ti->private = s;
-	ti->num_flush_requests = num_flush_requests;
+	ti->num_flush_bios = num_flush_bios;
 	ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk);
 
 	/* Add snapshot to the list of snapshots for this origin */
@@ -1692,7 +1694,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 	init_tracked_chunk(bio);
 
 	if (bio->bi_rw & REQ_FLUSH) {
-		if (!dm_bio_get_target_request_nr(bio))
+		if (!dm_bio_get_target_bio_nr(bio))
 			bio->bi_bdev = s->origin->bdev;
 		else
 			bio->bi_bdev = s->cow->bdev;
@@ -1837,8 +1839,8 @@ static void snapshot_merge_resume(struct dm_target *ti)
 	start_merge(s);
 }
 
-static int snapshot_status(struct dm_target *ti, status_type_t type,
-			   unsigned status_flags, char *result, unsigned maxlen)
+static void snapshot_status(struct dm_target *ti, status_type_t type,
+			    unsigned status_flags, char *result, unsigned maxlen)
 {
 	unsigned sz = 0;
 	struct dm_snapshot *snap = ti->private;
@@ -1884,8 +1886,6 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 					  maxlen - sz);
 		break;
 	}
-
-	return 0;
 }
 
 static int snapshot_iterate_devices(struct dm_target *ti,
@@ -2105,7 +2105,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ti->private = dev;
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 
 	return 0;
 }
@@ -2139,8 +2139,8 @@ static void origin_resume(struct dm_target *ti)
 	ti->max_io_len = get_origin_minimum_chunksize(dev->bdev);
 }
 
-static int origin_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void origin_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct dm_dev *dev = ti->private;
 
@@ -2153,8 +2153,6 @@ static int origin_status(struct dm_target *ti, status_type_t type,
 		snprintf(result, maxlen, "%s", dev->name);
 		break;
 	}
-
-	return 0;
 }
 
 static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
@@ -2181,7 +2179,7 @@ static int origin_iterate_devices(struct dm_target *ti,
 
 static struct target_type origin_target = {
 	.name    = "snapshot-origin",
-	.version = {1, 8, 0},
+	.version = {1, 8, 1},
 	.module  = THIS_MODULE,
 	.ctr     = origin_ctr,
 	.dtr     = origin_dtr,
@@ -2194,7 +2192,7 @@ static struct target_type origin_target = {
 
 static struct target_type snapshot_target = {
 	.name    = "snapshot",
-	.version = {1, 11, 0},
+	.version = {1, 11, 1},
 	.module  = THIS_MODULE,
 	.ctr     = snapshot_ctr,
 	.dtr     = snapshot_dtr,
@@ -2307,3 +2305,5 @@ module_exit(dm_snapshot_exit);
 MODULE_DESCRIPTION(DM_NAME " snapshot target");
 MODULE_AUTHOR("Joe Thornber");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("dm-snapshot-origin");
+MODULE_ALIAS("dm-snapshot-merge");
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index c89cde86d400..d8837d313f54 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -160,9 +160,9 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (r)
 		return r;
 
-	ti->num_flush_requests = stripes;
-	ti->num_discard_requests = stripes;
-	ti->num_write_same_requests = stripes;
+	ti->num_flush_bios = stripes;
+	ti->num_discard_bios = stripes;
+	ti->num_write_same_bios = stripes;
 
 	sc->chunk_size = chunk_size;
 	if (chunk_size & (chunk_size - 1))
@@ -276,19 +276,19 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 {
 	struct stripe_c *sc = ti->private;
 	uint32_t stripe;
-	unsigned target_request_nr;
+	unsigned target_bio_nr;
 
 	if (bio->bi_rw & REQ_FLUSH) {
-		target_request_nr = dm_bio_get_target_request_nr(bio);
-		BUG_ON(target_request_nr >= sc->stripes);
-		bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
+		target_bio_nr = dm_bio_get_target_bio_nr(bio);
+		BUG_ON(target_bio_nr >= sc->stripes);
+		bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev;
 		return DM_MAPIO_REMAPPED;
 	}
 	if (unlikely(bio->bi_rw & REQ_DISCARD) ||
 	    unlikely(bio->bi_rw & REQ_WRITE_SAME)) {
-		target_request_nr = dm_bio_get_target_request_nr(bio);
-		BUG_ON(target_request_nr >= sc->stripes);
-		return stripe_map_range(sc, bio, target_request_nr);
+		target_bio_nr = dm_bio_get_target_bio_nr(bio);
+		BUG_ON(target_bio_nr >= sc->stripes);
+		return stripe_map_range(sc, bio, target_bio_nr);
 	}
 
 	stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
@@ -312,8 +312,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
  *
  */
 
-static int stripe_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void stripe_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct stripe_c *sc = (struct stripe_c *) ti->private;
 	char buffer[sc->stripes + 1];
@@ -340,7 +340,6 @@ static int stripe_status(struct dm_target *ti, status_type_t type,
 			    (unsigned long long)sc->stripe[i].physical_start);
 		break;
 	}
-	return 0;
 }
 
 static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
@@ -428,7 +427,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 
 static struct target_type stripe_target = {
 	.name   = "striped",
-	.version = {1, 5, 0},
+	.version = {1, 5, 1},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index daf25d0890b3..e50dad0c65f4 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -217,7 +217,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 
 	if (alloc_targets(t, num_targets)) {
 		kfree(t);
-		t = NULL;
 		return -ENOMEM;
 	}
 
@@ -823,8 +822,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 
 	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
-	if (!tgt->num_discard_requests && tgt->discards_supported)
-		DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.",
+	if (!tgt->num_discard_bios && tgt->discards_supported)
+		DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
 		       dm_device_name(t->md), type);
 
 	return 0;
@@ -1360,7 +1359,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
 	while (i < dm_table_get_num_targets(t)) {
 		ti = dm_table_get_target(t, i++);
 
-		if (!ti->num_flush_requests)
+		if (!ti->num_flush_bios)
 			continue;
 
 		if (ti->flush_supported)
@@ -1439,7 +1438,7 @@ static bool dm_table_supports_write_same(struct dm_table *t)
 	while (i < dm_table_get_num_targets(t)) {
 		ti = dm_table_get_target(t, i++);
 
-		if (!ti->num_write_same_requests)
+		if (!ti->num_write_same_bios)
 			return false;
 
 		if (!ti->type->iterate_devices ||
@@ -1657,7 +1656,7 @@ bool dm_table_supports_discards(struct dm_table *t)
 	while (i < dm_table_get_num_targets(t)) {
 		ti = dm_table_get_target(t, i++);
 
-		if (!ti->num_discard_requests)
+		if (!ti->num_discard_bios)
 			continue;
 
 		if (ti->discards_supported)
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 617d21a77256..37ba5db71cd9 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -116,7 +116,7 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
 	/*
 	 * Return error for discards instead of -EOPNOTSUPP
 	 */
-	tt->num_discard_requests = 1;
+	tt->num_discard_bios = 1;
 
 	return 0;
 }
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 4d6e85367b84..00cee02f8fc9 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -280,7 +280,7 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
 	*t = v & ((1 << 24) - 1);
 }
 
-static void data_block_inc(void *context, void *value_le)
+static void data_block_inc(void *context, const void *value_le)
 {
 	struct dm_space_map *sm = context;
 	__le64 v_le;
@@ -292,7 +292,7 @@ static void data_block_inc(void *context, void *value_le)
 	dm_sm_inc_block(sm, b);
 }
 
-static void data_block_dec(void *context, void *value_le)
+static void data_block_dec(void *context, const void *value_le)
 {
 	struct dm_space_map *sm = context;
 	__le64 v_le;
@@ -304,7 +304,7 @@ static void data_block_dec(void *context, void *value_le)
 	dm_sm_dec_block(sm, b);
 }
 
-static int data_block_equal(void *context, void *value1_le, void *value2_le)
+static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
 {
 	__le64 v1_le, v2_le;
 	uint64_t b1, b2;
@@ -318,7 +318,7 @@ static int data_block_equal(void *context, void *value1_le, void *value2_le)
 	return b1 == b2;
 }
 
-static void subtree_inc(void *context, void *value)
+static void subtree_inc(void *context, const void *value)
 {
 	struct dm_btree_info *info = context;
 	__le64 root_le;
@@ -329,7 +329,7 @@ static void subtree_inc(void *context, void *value)
 	dm_tm_inc(info->tm, root);
 }
 
-static void subtree_dec(void *context, void *value)
+static void subtree_dec(void *context, const void *value)
 {
 	struct dm_btree_info *info = context;
 	__le64 root_le;
@@ -341,7 +341,7 @@ static void subtree_dec(void *context, void *value)
 		DMERR("btree delete failed\n");
 }
 
-static int subtree_equal(void *context, void *value1_le, void *value2_le)
+static int subtree_equal(void *context, const void *value1_le, const void *value2_le)
 {
 	__le64 v1_le, v2_le;
 	memcpy(&v1_le, value1_le, sizeof(v1_le));
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 5409607d4875..009339d62828 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -26,6 +26,9 @@
 #define PRISON_CELLS 1024
 #define COMMIT_PERIOD HZ
 
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
+		"A percentage of time allocated for copy on write");
+
 /*
  * The block size of the device holding pool data must be
  * between 64KB and 1GB.
@@ -227,6 +230,78 @@ struct thin_c {
 /*----------------------------------------------------------------*/
 
 /*
+ * wake_worker() is used when new work is queued and when pool_resume is
+ * ready to continue deferred IO processing.
+ */
+static void wake_worker(struct pool *pool)
+{
+	queue_work(pool->wq, &pool->worker);
+}
+
+/*----------------------------------------------------------------*/
+
+static int bio_detain(struct pool *pool, struct dm_cell_key *key, struct bio *bio,
+		      struct dm_bio_prison_cell **cell_result)
+{
+	int r;
+	struct dm_bio_prison_cell *cell_prealloc;
+
+	/*
+	 * Allocate a cell from the prison's mempool.
+	 * This might block but it can't fail.
+	 */
+	cell_prealloc = dm_bio_prison_alloc_cell(pool->prison, GFP_NOIO);
+
+	r = dm_bio_detain(pool->prison, key, bio, cell_prealloc, cell_result);
+	if (r)
+		/*
+		 * We reused an old cell; we can get rid of
+		 * the new one.
+		 */
+		dm_bio_prison_free_cell(pool->prison, cell_prealloc);
+
+	return r;
+}
+
+static void cell_release(struct pool *pool,
+			 struct dm_bio_prison_cell *cell,
+			 struct bio_list *bios)
+{
+	dm_cell_release(pool->prison, cell, bios);
+	dm_bio_prison_free_cell(pool->prison, cell);
+}
+
+static void cell_release_no_holder(struct pool *pool,
+				   struct dm_bio_prison_cell *cell,
+				   struct bio_list *bios)
+{
+	dm_cell_release_no_holder(pool->prison, cell, bios);
+	dm_bio_prison_free_cell(pool->prison, cell);
+}
+
+static void cell_defer_no_holder_no_free(struct thin_c *tc,
+					 struct dm_bio_prison_cell *cell)
+{
+	struct pool *pool = tc->pool;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	dm_cell_release_no_holder(pool->prison, cell, &pool->deferred_bios);
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	wake_worker(pool);
+}
+
+static void cell_error(struct pool *pool,
+		       struct dm_bio_prison_cell *cell)
+{
+	dm_cell_error(pool->prison, cell);
+	dm_bio_prison_free_cell(pool->prison, cell);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
  * A global list of pools that uses a struct mapped_device as a key.
  */
 static struct dm_thin_pool_table {
@@ -330,14 +405,20 @@ static void requeue_io(struct thin_c *tc)
  * target.
  */
 
+static bool block_size_is_power_of_two(struct pool *pool)
+{
+	return pool->sectors_per_block_shift >= 0;
+}
+
 static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
 {
+	struct pool *pool = tc->pool;
 	sector_t block_nr = bio->bi_sector;
 
-	if (tc->pool->sectors_per_block_shift < 0)
-		(void) sector_div(block_nr, tc->pool->sectors_per_block);
+	if (block_size_is_power_of_two(pool))
+		block_nr >>= pool->sectors_per_block_shift;
 	else
-		block_nr >>= tc->pool->sectors_per_block_shift;
+		(void) sector_div(block_nr, pool->sectors_per_block);
 
 	return block_nr;
 }
@@ -348,12 +429,12 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
 	sector_t bi_sector = bio->bi_sector;
 
 	bio->bi_bdev = tc->pool_dev->bdev;
-	if (tc->pool->sectors_per_block_shift < 0)
-		bio->bi_sector = (block * pool->sectors_per_block) +
-				 sector_div(bi_sector, pool->sectors_per_block);
-	else
+	if (block_size_is_power_of_two(pool))
 		bio->bi_sector = (block << pool->sectors_per_block_shift) |
 				(bi_sector & (pool->sectors_per_block - 1));
+	else
+		bio->bi_sector = (block * pool->sectors_per_block) +
+				 sector_div(bi_sector, pool->sectors_per_block);
 }
 
 static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -420,15 +501,6 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
 	issue(tc, bio);
 }
 
-/*
- * wake_worker() is used when new work is queued and when pool_resume is
- * ready to continue deferred IO processing.
- */
-static void wake_worker(struct pool *pool)
-{
-	queue_work(pool->wq, &pool->worker);
-}
-
 /*----------------------------------------------------------------*/
 
 /*
@@ -515,14 +587,14 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell)
 	unsigned long flags;
 
 	spin_lock_irqsave(&pool->lock, flags);
-	dm_cell_release(cell, &pool->deferred_bios);
+	cell_release(pool, cell, &pool->deferred_bios);
 	spin_unlock_irqrestore(&tc->pool->lock, flags);
 
 	wake_worker(pool);
 }
 
 /*
- * Same as cell_defer except it omits the original holder of the cell.
+ * Same as cell_defer above, except it omits the original holder of the cell.
  */
 static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell)
 {
@@ -530,7 +602,7 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c
 	unsigned long flags;
 
 	spin_lock_irqsave(&pool->lock, flags);
-	dm_cell_release_no_holder(cell, &pool->deferred_bios);
+	cell_release_no_holder(pool, cell, &pool->deferred_bios);
 	spin_unlock_irqrestore(&pool->lock, flags);
 
 	wake_worker(pool);
@@ -540,13 +612,15 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
 {
 	if (m->bio)
 		m->bio->bi_end_io = m->saved_bi_end_io;
-	dm_cell_error(m->cell);
+	cell_error(m->tc->pool, m->cell);
 	list_del(&m->list);
 	mempool_free(m, m->tc->pool->mapping_pool);
 }
+
 static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 {
 	struct thin_c *tc = m->tc;
+	struct pool *pool = tc->pool;
 	struct bio *bio;
 	int r;
 
@@ -555,7 +629,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 		bio->bi_end_io = m->saved_bi_end_io;
 
 	if (m->err) {
-		dm_cell_error(m->cell);
+		cell_error(pool, m->cell);
 		goto out;
 	}
 
@@ -567,7 +641,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 	r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
 	if (r) {
 		DMERR_LIMIT("dm_thin_insert_block() failed");
-		dm_cell_error(m->cell);
+		cell_error(pool, m->cell);
 		goto out;
 	}
 
@@ -585,7 +659,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 
 out:
 	list_del(&m->list);
-	mempool_free(m, tc->pool->mapping_pool);
+	mempool_free(m, pool->mapping_pool);
 }
 
 static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
@@ -736,7 +810,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 		if (r < 0) {
 			mempool_free(m, pool->mapping_pool);
 			DMERR_LIMIT("dm_kcopyd_copy() failed");
-			dm_cell_error(cell);
+			cell_error(pool, cell);
 		}
 	}
 }
@@ -802,7 +876,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
 		if (r < 0) {
 			mempool_free(m, pool->mapping_pool);
 			DMERR_LIMIT("dm_kcopyd_zero() failed");
-			dm_cell_error(cell);
+			cell_error(pool, cell);
 		}
 	}
 }
@@ -908,13 +982,13 @@ static void retry_on_resume(struct bio *bio)
 	spin_unlock_irqrestore(&pool->lock, flags);
 }
 
-static void no_space(struct dm_bio_prison_cell *cell)
+static void no_space(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
 	struct bio *bio;
 	struct bio_list bios;
 
 	bio_list_init(&bios);
-	dm_cell_release(cell, &bios);
+	cell_release(pool, cell, &bios);
 
 	while ((bio = bio_list_pop(&bios)))
 		retry_on_resume(bio);
@@ -932,7 +1006,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
 	struct dm_thin_new_mapping *m;
 
 	build_virtual_key(tc->td, block, &key);
-	if (dm_bio_detain(tc->pool->prison, &key, bio, &cell))
+	if (bio_detain(tc->pool, &key, bio, &cell))
 		return;
 
 	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
@@ -944,7 +1018,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
 		 * on this block.
 		 */
 		build_data_key(tc->td, lookup_result.block, &key2);
-		if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
+		if (bio_detain(tc->pool, &key2, bio, &cell2)) {
 			cell_defer_no_holder(tc, cell);
 			break;
 		}
@@ -1020,13 +1094,13 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
 		break;
 
 	case -ENOSPC:
-		no_space(cell);
+		no_space(tc->pool, cell);
 		break;
 
 	default:
 		DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
 			    __func__, r);
-		dm_cell_error(cell);
+		cell_error(tc->pool, cell);
 		break;
 	}
 }
@@ -1044,7 +1118,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
 	 * of being broken so we have nothing further to do here.
 	 */
 	build_data_key(tc->td, lookup_result->block, &key);
-	if (dm_bio_detain(pool->prison, &key, bio, &cell))
+	if (bio_detain(pool, &key, bio, &cell))
 		return;
 
 	if (bio_data_dir(bio) == WRITE && bio->bi_size)
@@ -1065,12 +1139,13 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
 {
 	int r;
 	dm_block_t data_block;
+	struct pool *pool = tc->pool;
 
 	/*
 	 * Remap empty bios (flushes) immediately, without provisioning.
 	 */
 	if (!bio->bi_size) {
-		inc_all_io_entry(tc->pool, bio);
+		inc_all_io_entry(pool, bio);
 		cell_defer_no_holder(tc, cell);
 
 		remap_and_issue(tc, bio, 0);
@@ -1097,14 +1172,14 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
 		break;
 
 	case -ENOSPC:
-		no_space(cell);
+		no_space(pool, cell);
 		break;
 
 	default:
 		DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
 			    __func__, r);
-		set_pool_mode(tc->pool, PM_READ_ONLY);
-		dm_cell_error(cell);
+		set_pool_mode(pool, PM_READ_ONLY);
+		cell_error(pool, cell);
 		break;
 	}
 }
@@ -1112,6 +1187,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
 static void process_bio(struct thin_c *tc, struct bio *bio)
 {
 	int r;
+	struct pool *pool = tc->pool;
 	dm_block_t block = get_bio_block(tc, bio);
 	struct dm_bio_prison_cell *cell;
 	struct dm_cell_key key;
@@ -1122,7 +1198,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
 	 * being provisioned so we have nothing further to do here.
 	 */
 	build_virtual_key(tc->td, block, &key);
-	if (dm_bio_detain(tc->pool->prison, &key, bio, &cell))
+	if (bio_detain(pool, &key, bio, &cell))
 		return;
 
 	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
@@ -1130,9 +1206,9 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
 	case 0:
 		if (lookup_result.shared) {
 			process_shared_bio(tc, bio, block, &lookup_result);
-			cell_defer_no_holder(tc, cell);
+			cell_defer_no_holder(tc, cell); /* FIXME: pass this cell into process_shared? */
 		} else {
-			inc_all_io_entry(tc->pool, bio);
+			inc_all_io_entry(pool, bio);
 			cell_defer_no_holder(tc, cell);
 
 			remap_and_issue(tc, bio, lookup_result.block);
@@ -1141,7 +1217,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
 
 	case -ENODATA:
 		if (bio_data_dir(bio) == READ && tc->origin_dev) {
-			inc_all_io_entry(tc->pool, bio);
+			inc_all_io_entry(pool, bio);
 			cell_defer_no_holder(tc, cell);
 
 			remap_to_origin_and_issue(tc, bio);
@@ -1378,7 +1454,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 	dm_block_t block = get_bio_block(tc, bio);
 	struct dm_thin_device *td = tc->td;
 	struct dm_thin_lookup_result result;
-	struct dm_bio_prison_cell *cell1, *cell2;
+	struct dm_bio_prison_cell cell1, cell2;
+	struct dm_bio_prison_cell *cell_result;
 	struct dm_cell_key key;
 
 	thin_hook_bio(tc, bio);
@@ -1420,18 +1497,18 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 		}
 
 		build_virtual_key(tc->td, block, &key);
-		if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1))
+		if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result))
 			return DM_MAPIO_SUBMITTED;
 
 		build_data_key(tc->td, result.block, &key);
-		if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) {
-			cell_defer_no_holder(tc, cell1);
+		if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2, &cell_result)) {
+			cell_defer_no_holder_no_free(tc, &cell1);
 			return DM_MAPIO_SUBMITTED;
 		}
 
 		inc_all_io_entry(tc->pool, bio);
-		cell_defer_no_holder(tc, cell2);
-		cell_defer_no_holder(tc, cell1);
+		cell_defer_no_holder_no_free(tc, &cell2);
+		cell_defer_no_holder_no_free(tc, &cell1);
 
 		remap(tc, bio, result.block);
 		return DM_MAPIO_REMAPPED;
@@ -1636,7 +1713,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 		goto bad_prison;
 	}
 
-	pool->copier = dm_kcopyd_client_create();
+	pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
 	if (IS_ERR(pool->copier)) {
 		r = PTR_ERR(pool->copier);
 		*error = "Error creating pool's kcopyd client";
@@ -1938,7 +2015,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	pt->data_dev = data_dev;
 	pt->low_water_blocks = low_water_blocks;
 	pt->adjusted_pf = pt->requested_pf = pf;
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 
 	/*
 	 * Only need to enable discards if the pool should pass
@@ -1946,7 +2023,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	 * processing will cause mappings to be removed from the btree.
 	 */
 	if (pf.discard_enabled && pf.discard_passdown) {
-		ti->num_discard_requests = 1;
+		ti->num_discard_bios = 1;
 
 		/*
 		 * Setting 'discards_supported' circumvents the normal
@@ -2299,8 +2376,8 @@ static void emit_flags(struct pool_features *pf, char *result,
  *    <transaction id> <used metadata sectors>/<total metadata sectors>
  *    <used data sectors>/<total data sectors> <held metadata root>
  */
-static int pool_status(struct dm_target *ti, status_type_t type,
-		       unsigned status_flags, char *result, unsigned maxlen)
+static void pool_status(struct dm_target *ti, status_type_t type,
+			unsigned status_flags, char *result, unsigned maxlen)
 {
 	int r;
 	unsigned sz = 0;
@@ -2326,32 +2403,41 @@ static int pool_status(struct dm_target *ti, status_type_t type,
 		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
 			(void) commit_or_fallback(pool);
 
-		r = dm_pool_get_metadata_transaction_id(pool->pmd,
-							&transaction_id);
-		if (r)
-			return r;
+		r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
+		if (r) {
+			DMERR("dm_pool_get_metadata_transaction_id returned %d", r);
+			goto err;
+		}
 
-		r = dm_pool_get_free_metadata_block_count(pool->pmd,
-							  &nr_free_blocks_metadata);
-		if (r)
-			return r;
+		r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata);
+		if (r) {
+			DMERR("dm_pool_get_free_metadata_block_count returned %d", r);
+			goto err;
+		}
 
 		r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
-		if (r)
-			return r;
+		if (r) {
+			DMERR("dm_pool_get_metadata_dev_size returned %d", r);
+			goto err;
+		}
 
-		r = dm_pool_get_free_block_count(pool->pmd,
-						 &nr_free_blocks_data);
-		if (r)
-			return r;
+		r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data);
+		if (r) {
+			DMERR("dm_pool_get_free_block_count returned %d", r);
+			goto err;
+		}
 
 		r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
-		if (r)
-			return r;
+		if (r) {
+			DMERR("dm_pool_get_data_dev_size returned %d", r);
+			goto err;
+		}
 
 		r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
-		if (r)
-			return r;
+		if (r) {
+			DMERR("dm_pool_get_metadata_snap returned %d", r);
+			goto err;
+		}
 
 		DMEMIT("%llu %llu/%llu %llu/%llu ",
 		       (unsigned long long)transaction_id,
@@ -2388,8 +2474,10 @@ static int pool_status(struct dm_target *ti, status_type_t type,
 		emit_flags(&pt->requested_pf, result, sz, maxlen);
 		break;
 	}
+	return;
 
-	return 0;
+err:
+	DMEMIT("Error");
 }
 
 static int pool_iterate_devices(struct dm_target *ti,
@@ -2414,11 +2502,6 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
 
-static bool block_size_is_power_of_two(struct pool *pool)
-{
-	return pool->sectors_per_block_shift >= 0;
-}
-
 static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
 {
 	struct pool *pool = pt->pool;
@@ -2432,15 +2515,8 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
 	if (pt->adjusted_pf.discard_passdown) {
 		data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
 		limits->discard_granularity = data_limits->discard_granularity;
-	} else if (block_size_is_power_of_two(pool))
+	} else
 		limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
-	else
-		/*
-		 * Use largest power of 2 that is a factor of sectors_per_block
-		 * but at least DATA_DEV_BLOCK_SIZE_MIN_SECTORS.
-		 */
-		limits->discard_granularity = max(1 << (ffs(pool->sectors_per_block) - 1),
-						  DATA_DEV_BLOCK_SIZE_MIN_SECTORS) << SECTOR_SHIFT;
 }
 
 static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -2468,7 +2544,7 @@ static struct target_type pool_target = {
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 6, 0},
+	.version = {1, 6, 1},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -2588,17 +2664,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	if (r)
 		goto bad_thin_open;
 
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 	ti->flush_supported = true;
 	ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
 
 	/* In case the pool supports discards, pass them on. */
 	if (tc->pool->pf.discard_enabled) {
 		ti->discards_supported = true;
-		ti->num_discard_requests = 1;
+		ti->num_discard_bios = 1;
 		ti->discard_zeroes_data_unsupported = true;
-		/* Discard requests must be split on a block boundary */
-		ti->split_discard_requests = true;
+		/* Discard bios must be split on a block boundary */
+		ti->split_discard_bios = true;
 	}
 
 	dm_put(pool_md);
@@ -2676,8 +2752,8 @@ static void thin_postsuspend(struct dm_target *ti)
 /*
  * <nr mapped sectors> <highest mapped sector>
  */
-static int thin_status(struct dm_target *ti, status_type_t type,
-		       unsigned status_flags, char *result, unsigned maxlen)
+static void thin_status(struct dm_target *ti, status_type_t type,
+			unsigned status_flags, char *result, unsigned maxlen)
 {
 	int r;
 	ssize_t sz = 0;
@@ -2687,7 +2763,7 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 
 	if (get_pool_mode(tc->pool) == PM_FAIL) {
 		DMEMIT("Fail");
-		return 0;
+		return;
 	}
 
 	if (!tc->td)
@@ -2696,12 +2772,16 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 		switch (type) {
 		case STATUSTYPE_INFO:
 			r = dm_thin_get_mapped_count(tc->td, &mapped);
-			if (r)
-				return r;
+			if (r) {
+				DMERR("dm_thin_get_mapped_count returned %d", r);
+				goto err;
+			}
 
 			r = dm_thin_get_highest_mapped_block(tc->td, &highest);
-			if (r < 0)
-				return r;
+			if (r < 0) {
+				DMERR("dm_thin_get_highest_mapped_block returned %d", r);
+				goto err;
+			}
 
 			DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
 			if (r)
@@ -2721,7 +2801,10 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 		}
 	}
 
-	return 0;
+	return;
+
+err:
+	DMEMIT("Error");
 }
 
 static int thin_iterate_devices(struct dm_target *ti,
@@ -2748,7 +2831,7 @@ static int thin_iterate_devices(struct dm_target *ti,
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 7, 0},
+	.version = {1, 7, 1},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 52cde982164a..6ad538375c3c 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -508,8 +508,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 /*
  * Status: V (valid) or C (corruption found)
  */
-static int verity_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void verity_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct dm_verity *v = ti->private;
 	unsigned sz = 0;
@@ -540,8 +540,6 @@ static int verity_status(struct dm_target *ti, status_type_t type,
 				DMEMIT("%02x", v->salt[x]);
 		break;
 	}
-
-	return 0;
 }
 
 static int verity_ioctl(struct dm_target *ti, unsigned cmd,
@@ -860,7 +858,7 @@ bad:
 
 static struct target_type verity_target = {
 	.name		= "verity",
-	.version	= {1, 1, 0},
+	.version	= {1, 1, 1},
 	.module		= THIS_MODULE,
 	.ctr		= verity_ctr,
 	.dtr		= verity_dtr,
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index 69a5c3b3b340..c99003e0d47a 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -25,7 +25,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	/*
 	 * Silently drop discards, avoiding -EOPNOTSUPP.
 	 */
-	ti->num_discard_requests = 1;
+	ti->num_discard_bios = 1;
 
 	return 0;
 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 314a0e2faf79..7e469260fe5e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -163,7 +163,6 @@ struct mapped_device {
 	 * io objects are allocated from here.
 	 */
 	mempool_t *io_pool;
-	mempool_t *tio_pool;
 
 	struct bio_set *bs;
 
@@ -197,7 +196,6 @@ struct mapped_device {
  */
 struct dm_md_mempools {
 	mempool_t *io_pool;
-	mempool_t *tio_pool;
 	struct bio_set *bs;
 };
 
@@ -205,12 +203,6 @@ struct dm_md_mempools {
 static struct kmem_cache *_io_cache;
 static struct kmem_cache *_rq_tio_cache;
 
-/*
- * Unused now, and needs to be deleted. But since io_pool is overloaded and it's
- * still used for _io_cache, I'm leaving this for a later cleanup
- */
-static struct kmem_cache *_rq_bio_info_cache;
-
 static int __init local_init(void)
 {
 	int r = -ENOMEM;
@@ -224,13 +216,9 @@ static int __init local_init(void)
 	if (!_rq_tio_cache)
 		goto out_free_io_cache;
 
-	_rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0);
-	if (!_rq_bio_info_cache)
-		goto out_free_rq_tio_cache;
-
 	r = dm_uevent_init();
 	if (r)
-		goto out_free_rq_bio_info_cache;
+		goto out_free_rq_tio_cache;
 
 	_major = major;
 	r = register_blkdev(_major, _name);
@@ -244,8 +232,6 @@ static int __init local_init(void)
 
 out_uevent_exit:
 	dm_uevent_exit();
-out_free_rq_bio_info_cache:
-	kmem_cache_destroy(_rq_bio_info_cache);
 out_free_rq_tio_cache:
 	kmem_cache_destroy(_rq_tio_cache);
 out_free_io_cache:
@@ -256,7 +242,6 @@ out_free_io_cache:
 
 static void local_exit(void)
 {
-	kmem_cache_destroy(_rq_bio_info_cache);
 	kmem_cache_destroy(_rq_tio_cache);
 	kmem_cache_destroy(_io_cache);
 	unregister_blkdev(_major, _name);
@@ -318,7 +303,6 @@ static void __exit dm_exit(void)
 	/*
 	 * Should be empty by this point.
 	 */
-	idr_remove_all(&_minor_idr);
 	idr_destroy(&_minor_idr);
 }
 
@@ -449,12 +433,12 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
 static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
 					    gfp_t gfp_mask)
 {
-	return mempool_alloc(md->tio_pool, gfp_mask);
+	return mempool_alloc(md->io_pool, gfp_mask);
 }
 
 static void free_rq_tio(struct dm_rq_target_io *tio)
 {
-	mempool_free(tio, tio->md->tio_pool);
+	mempool_free(tio, tio->md->io_pool);
 }
 
 static int md_in_flight(struct mapped_device *md)
@@ -627,7 +611,6 @@ static void dec_pending(struct dm_io *io, int error)
 			queue_io(md, bio);
 		} else {
 			/* done with normal IO or empty flush */
-			trace_block_bio_complete(md->queue, bio, io_error);
 			bio_endio(bio, io_error);
 		}
 	}
@@ -987,12 +970,13 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
 }
 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
 
-static void __map_bio(struct dm_target *ti, struct dm_target_io *tio)
+static void __map_bio(struct dm_target_io *tio)
 {
 	int r;
 	sector_t sector;
 	struct mapped_device *md;
 	struct bio *clone = &tio->clone;
+	struct dm_target *ti = tio->ti;
 
 	clone->bi_end_io = clone_endio;
 	clone->bi_private = tio;
@@ -1033,32 +1017,54 @@ struct clone_info {
 	unsigned short idx;
 };
 
+static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len)
+{
+	bio->bi_sector = sector;
+	bio->bi_size = to_bytes(len);
+}
+
+static void bio_setup_bv(struct bio *bio, unsigned short idx, unsigned short bv_count)
+{
+	bio->bi_idx = idx;
+	bio->bi_vcnt = idx + bv_count;
+	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+}
+
+static void clone_bio_integrity(struct bio *bio, struct bio *clone,
+				unsigned short idx, unsigned len, unsigned offset,
+				unsigned trim)
+{
+	if (!bio_integrity(bio))
+		return;
+
+	bio_integrity_clone(clone, bio, GFP_NOIO);
+
+	if (trim)
+		bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len);
+}
+
 /*
  * Creates a little bio that just does part of a bvec.
  */
-static void split_bvec(struct dm_target_io *tio, struct bio *bio,
-		       sector_t sector, unsigned short idx, unsigned int offset,
-		       unsigned int len, struct bio_set *bs)
+static void clone_split_bio(struct dm_target_io *tio, struct bio *bio,
+			    sector_t sector, unsigned short idx,
+			    unsigned offset, unsigned len)
 {
 	struct bio *clone = &tio->clone;
 	struct bio_vec *bv = bio->bi_io_vec + idx;
 
 	*clone->bi_io_vec = *bv;
 
-	clone->bi_sector = sector;
+	bio_setup_sector(clone, sector, len);
+
 	clone->bi_bdev = bio->bi_bdev;
 	clone->bi_rw = bio->bi_rw;
 	clone->bi_vcnt = 1;
-	clone->bi_size = to_bytes(len);
 	clone->bi_io_vec->bv_offset = offset;
 	clone->bi_io_vec->bv_len = clone->bi_size;
 	clone->bi_flags |= 1 << BIO_CLONED;
 
-	if (bio_integrity(bio)) {
-		bio_integrity_clone(clone, bio, GFP_NOIO);
-		bio_integrity_trim(clone,
-				   bio_sector_offset(bio, idx, offset), len);
-	}
+	clone_bio_integrity(bio, clone, idx, len, offset, 1);
 }
 
 /*
@@ -1066,29 +1072,23 @@ static void split_bvec(struct dm_target_io *tio, struct bio *bio,
  */
 static void clone_bio(struct dm_target_io *tio, struct bio *bio,
 		      sector_t sector, unsigned short idx,
-		      unsigned short bv_count, unsigned int len,
-		      struct bio_set *bs)
+		      unsigned short bv_count, unsigned len)
 {
 	struct bio *clone = &tio->clone;
+	unsigned trim = 0;
 
 	__bio_clone(clone, bio);
-	clone->bi_sector = sector;
-	clone->bi_idx = idx;
-	clone->bi_vcnt = idx + bv_count;
-	clone->bi_size = to_bytes(len);
-	clone->bi_flags &= ~(1 << BIO_SEG_VALID);
-
-	if (bio_integrity(bio)) {
-		bio_integrity_clone(clone, bio, GFP_NOIO);
-
-		if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
-			bio_integrity_trim(clone,
-					   bio_sector_offset(bio, idx, 0), len);
-	}
+	bio_setup_sector(clone, sector, len);
+	bio_setup_bv(clone, idx, bv_count);
+
+	if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
+		trim = 1;
+	clone_bio_integrity(bio, clone, idx, len, 0, trim);
 }
 
 static struct dm_target_io *alloc_tio(struct clone_info *ci,
-				      struct dm_target *ti, int nr_iovecs)
+				      struct dm_target *ti, int nr_iovecs,
+				      unsigned target_bio_nr)
 {
 	struct dm_target_io *tio;
 	struct bio *clone;
@@ -1099,96 +1099,104 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
 	tio->io = ci->io;
 	tio->ti = ti;
 	memset(&tio->info, 0, sizeof(tio->info));
-	tio->target_request_nr = 0;
+	tio->target_bio_nr = target_bio_nr;
 
 	return tio;
 }
 
-static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
-				   unsigned request_nr, sector_t len)
+static void __clone_and_map_simple_bio(struct clone_info *ci,
+				       struct dm_target *ti,
+				       unsigned target_bio_nr, sector_t len)
 {
-	struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs);
+	struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr);
 	struct bio *clone = &tio->clone;
 
-	tio->target_request_nr = request_nr;
-
 	/*
 	 * Discard requests require the bio's inline iovecs be initialized.
 	 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
 	 * and discard, so no need for concern about wasted bvec allocations.
 	 */
-
 	 __bio_clone(clone, ci->bio);
-	if (len) {
-		clone->bi_sector = ci->sector;
-		clone->bi_size = to_bytes(len);
-	}
+	if (len)
+		bio_setup_sector(clone, ci->sector, len);
 
-	__map_bio(ti, tio);
+	__map_bio(tio);
 }
 
-static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
-				    unsigned num_requests, sector_t len)
+static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
+				  unsigned num_bios, sector_t len)
 {
-	unsigned request_nr;
+	unsigned target_bio_nr;
 
-	for (request_nr = 0; request_nr < num_requests; request_nr++)
-		__issue_target_request(ci, ti, request_nr, len);
+	for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++)
+		__clone_and_map_simple_bio(ci, ti, target_bio_nr, len);
 }
 
-static int __clone_and_map_empty_flush(struct clone_info *ci)
+static int __send_empty_flush(struct clone_info *ci)
 {
 	unsigned target_nr = 0;
 	struct dm_target *ti;
 
 	BUG_ON(bio_has_data(ci->bio));
 	while ((ti = dm_table_get_target(ci->map, target_nr++)))
-		__issue_target_requests(ci, ti, ti->num_flush_requests, 0);
+		__send_duplicate_bios(ci, ti, ti->num_flush_bios, 0);
 
 	return 0;
 }
 
-/*
- * Perform all io with a single clone.
- */
-static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti)
+static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
+				     sector_t sector, int nr_iovecs,
+				     unsigned short idx, unsigned short bv_count,
+				     unsigned offset, unsigned len,
+				     unsigned split_bvec)
 {
 	struct bio *bio = ci->bio;
 	struct dm_target_io *tio;
+	unsigned target_bio_nr;
+	unsigned num_target_bios = 1;
 
-	tio = alloc_tio(ci, ti, bio->bi_max_vecs);
-	clone_bio(tio, bio, ci->sector, ci->idx, bio->bi_vcnt - ci->idx,
-		  ci->sector_count, ci->md->bs);
-	__map_bio(ti, tio);
-	ci->sector_count = 0;
+	/*
+	 * Does the target want to receive duplicate copies of the bio?
+	 */
+	if (bio_data_dir(bio) == WRITE && ti->num_write_bios)
+		num_target_bios = ti->num_write_bios(ti, bio);
+
+	for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
+		tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr);
+		if (split_bvec)
+			clone_split_bio(tio, bio, sector, idx, offset, len);
+		else
+			clone_bio(tio, bio, sector, idx, bv_count, len);
+		__map_bio(tio);
+	}
 }
 
-typedef unsigned (*get_num_requests_fn)(struct dm_target *ti);
+typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
 
-static unsigned get_num_discard_requests(struct dm_target *ti)
+static unsigned get_num_discard_bios(struct dm_target *ti)
 {
-	return ti->num_discard_requests;
+	return ti->num_discard_bios;
 }
 
-static unsigned get_num_write_same_requests(struct dm_target *ti)
+static unsigned get_num_write_same_bios(struct dm_target *ti)
 {
-	return ti->num_write_same_requests;
+	return ti->num_write_same_bios;
 }
 
 typedef bool (*is_split_required_fn)(struct dm_target *ti);
 
 static bool is_split_required_for_discard(struct dm_target *ti)
 {
-	return ti->split_discard_requests;
+	return ti->split_discard_bios;
 }
 
-static int __clone_and_map_changing_extent_only(struct clone_info *ci,
-						get_num_requests_fn get_num_requests,
-						is_split_required_fn is_split_required)
+static int __send_changing_extent_only(struct clone_info *ci,
+				       get_num_bios_fn get_num_bios,
+				       is_split_required_fn is_split_required)
 {
 	struct dm_target *ti;
 	sector_t len;
-	unsigned num_requests;
+	unsigned num_bios;
 
 	do {
 		ti = dm_table_find_target(ci->map, ci->sector);
@@ -1201,8 +1209,8 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci,
 		 * reconfiguration might also have changed that since the
 		 * check was performed.
 		 */
-		num_requests = get_num_requests ? get_num_requests(ti) : 0;
-		if (!num_requests)
+		num_bios = get_num_bios ? get_num_bios(ti) : 0;
+		if (!num_bios)
 			return -EOPNOTSUPP;
 
 		if (is_split_required && !is_split_required(ti))
@@ -1210,7 +1218,7 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci,
 		else
 			len = min(ci->sector_count, max_io_len(ci->sector, ti));
 
-		__issue_target_requests(ci, ti, num_requests, len);
+		__send_duplicate_bios(ci, ti, num_bios, len);
 
 		ci->sector += len;
 	} while (ci->sector_count -= len);
@@ -1218,108 +1226,129 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci,
 	return 0;
 }
 
-static int __clone_and_map_discard(struct clone_info *ci)
+static int __send_discard(struct clone_info *ci)
 {
-	return __clone_and_map_changing_extent_only(ci, get_num_discard_requests,
-						    is_split_required_for_discard);
+	return __send_changing_extent_only(ci, get_num_discard_bios,
+					   is_split_required_for_discard);
 }
 
-static int __clone_and_map_write_same(struct clone_info *ci)
+static int __send_write_same(struct clone_info *ci)
 {
-	return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL);
+	return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
 }
 
-static int __clone_and_map(struct clone_info *ci)
+/*
+ * Find maximum number of sectors / bvecs we can process with a single bio.
+ */
+static sector_t __len_within_target(struct clone_info *ci, sector_t max, int *idx)
 {
 	struct bio *bio = ci->bio;
-	struct dm_target *ti;
-	sector_t len = 0, max;
-	struct dm_target_io *tio;
+	sector_t bv_len, total_len = 0;
 
-	if (unlikely(bio->bi_rw & REQ_DISCARD))
-		return __clone_and_map_discard(ci);
-	else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
-		return __clone_and_map_write_same(ci);
+	for (*idx = ci->idx; max && (*idx < bio->bi_vcnt); (*idx)++) {
+		bv_len = to_sector(bio->bi_io_vec[*idx].bv_len);
 
-	ti = dm_table_find_target(ci->map, ci->sector);
-	if (!dm_target_is_valid(ti))
-		return -EIO;
-
-	max = max_io_len(ci->sector, ti);
+		if (bv_len > max)
+			break;
 
-	if (ci->sector_count <= max) {
-		/*
-		 * Optimise for the simple case where we can do all of
-		 * the remaining io with a single clone.
-		 */
-		__clone_and_map_simple(ci, ti);
+		max -= bv_len;
+		total_len += bv_len;
+	}
 
-	} else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
-		/*
-		 * There are some bvecs that don't span targets.
-		 * Do as many of these as possible.
-		 */
-		int i;
-		sector_t remaining = max;
-		sector_t bv_len;
+	return total_len;
+}
 
-		for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) {
-			bv_len = to_sector(bio->bi_io_vec[i].bv_len);
+static int __split_bvec_across_targets(struct clone_info *ci,
+				       struct dm_target *ti, sector_t max)
+{
+	struct bio *bio = ci->bio;
+	struct bio_vec *bv = bio->bi_io_vec + ci->idx;
+	sector_t remaining = to_sector(bv->bv_len);
+	unsigned offset = 0;
+	sector_t len;
 
-			if (bv_len > remaining)
-				break;
+	do {
+		if (offset) {
+			ti = dm_table_find_target(ci->map, ci->sector);
+			if (!dm_target_is_valid(ti))
+				return -EIO;
 
-			remaining -= bv_len;
-			len += bv_len;
+			max = max_io_len(ci->sector, ti);
 		}
 
-		tio = alloc_tio(ci, ti, bio->bi_max_vecs);
-		clone_bio(tio, bio, ci->sector, ci->idx, i - ci->idx, len,
-			  ci->md->bs);
-		__map_bio(ti, tio);
+		len = min(remaining, max);
+
+		__clone_and_map_data_bio(ci, ti, ci->sector, 1, ci->idx, 0,
+					 bv->bv_offset + offset, len, 1);
 
 		ci->sector += len;
 		ci->sector_count -= len;
-		ci->idx = i;
+		offset += to_bytes(len);
+	} while (remaining -= len);
 
-	} else {
-		/*
-		 * Handle a bvec that must be split between two or more targets.
-		 */
-		struct bio_vec *bv = bio->bi_io_vec + ci->idx;
-		sector_t remaining = to_sector(bv->bv_len);
-		unsigned int offset = 0;
+	ci->idx++;
 
-		do {
-			if (offset) {
-				ti = dm_table_find_target(ci->map, ci->sector);
-				if (!dm_target_is_valid(ti))
-					return -EIO;
+	return 0;
+}
 
-				max = max_io_len(ci->sector, ti);
-			}
+/*
+ * Select the correct strategy for processing a non-flush bio.
+ */
+static int __split_and_process_non_flush(struct clone_info *ci)
+{
+	struct bio *bio = ci->bio;
+	struct dm_target *ti;
+	sector_t len, max;
+	int idx;
 
-			len = min(remaining, max);
+	if (unlikely(bio->bi_rw & REQ_DISCARD))
+		return __send_discard(ci);
+	else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
+		return __send_write_same(ci);
 
-			tio = alloc_tio(ci, ti, 1);
-			split_bvec(tio, bio, ci->sector, ci->idx,
-				   bv->bv_offset + offset, len, ci->md->bs);
+	ti = dm_table_find_target(ci->map, ci->sector);
+	if (!dm_target_is_valid(ti))
+		return -EIO;
 
-			__map_bio(ti, tio);
+	max = max_io_len(ci->sector, ti);
+
+	/*
+	 * Optimise for the simple case where we can do all of
+	 * the remaining io with a single clone.
+	 */
+	if (ci->sector_count <= max) {
+		__clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs,
+					 ci->idx, bio->bi_vcnt - ci->idx, 0,
+					 ci->sector_count, 0);
+		ci->sector_count = 0;
+		return 0;
+	}
+
+	/*
+	 * There are some bvecs that don't span targets.
+	 * Do as many of these as possible.
+	 */
+	if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
+		len = __len_within_target(ci, max, &idx);
 
-			ci->sector += len;
-			ci->sector_count -= len;
-			offset += to_bytes(len);
-		} while (remaining -= len);
+		__clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs,
+					 ci->idx, idx - ci->idx, 0, len, 0);
 
-		ci->idx++;
+		ci->sector += len;
+		ci->sector_count -= len;
+		ci->idx = idx;
+
+		return 0;
 	}
 
-	return 0;
+	/*
+	 * Handle a bvec that must be split between two or more targets.
+	 */
+	return __split_bvec_across_targets(ci, ti, max);
 }
 
 /*
- * Split the bio into several clones and submit it to targets.
+ * Entry point to split a bio into clones and submit them to the targets.
  */
 static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 {
@@ -1343,16 +1372,17 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 	ci.idx = bio->bi_idx;
 
 	start_io_acct(ci.io);
+
 	if (bio->bi_rw & REQ_FLUSH) {
 		ci.bio = &ci.md->flush_bio;
 		ci.sector_count = 0;
-		error = __clone_and_map_empty_flush(&ci);
+		error = __send_empty_flush(&ci);
 		/* dec_pending submits any data associated with flush */
 	} else {
 		ci.bio = bio;
 		ci.sector_count = bio_sectors(bio);
 		while (ci.sector_count && !error)
-			error = __clone_and_map(&ci);
+			error = __split_and_process_non_flush(&ci);
 	}
 
 	/* drop the extra reference count */
@@ -1756,62 +1786,38 @@ static void free_minor(int minor)
  */
 static int specific_minor(int minor)
 {
-	int r, m;
+	int r;
 
 	if (minor >= (1 << MINORBITS))
 		return -EINVAL;
 
-	r = idr_pre_get(&_minor_idr, GFP_KERNEL);
-	if (!r)
-		return -ENOMEM;
-
+	idr_preload(GFP_KERNEL);
 	spin_lock(&_minor_lock);
 
-	if (idr_find(&_minor_idr, minor)) {
-		r = -EBUSY;
-		goto out;
-	}
+	r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT);
 
-	r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m);
-	if (r)
-		goto out;
-
-	if (m != minor) {
-		idr_remove(&_minor_idr, m);
-		r = -EBUSY;
-		goto out;
-	}
-
-out:
 	spin_unlock(&_minor_lock);
-	return r;
+	idr_preload_end();
+	if (r < 0)
+		return r == -ENOSPC ? -EBUSY : r;
+	return 0;
 }
 
 static int next_free_minor(int *minor)
 {
-	int r, m;
-
-	r = idr_pre_get(&_minor_idr, GFP_KERNEL);
-	if (!r)
-		return -ENOMEM;
+	int r;
 
+	idr_preload(GFP_KERNEL);
 	spin_lock(&_minor_lock);
 
-	r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
-	if (r)
-		goto out;
+	r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT);
 
-	if (m >= (1 << MINORBITS)) {
-		idr_remove(&_minor_idr, m);
-		r = -ENOSPC;
-		goto out;
-	}
-
-	*minor = m;
-
-out:
 	spin_unlock(&_minor_lock);
-	return r;
+	idr_preload_end();
+	if (r < 0)
+		return r;
+	*minor = r;
+	return 0;
 }
 
 static const struct block_device_operations dm_blk_dops;
@@ -1949,8 +1955,6 @@ static void free_dev(struct mapped_device *md)
 	unlock_fs(md);
 	bdput(md->bdev);
 	destroy_workqueue(md->wq);
-	if (md->tio_pool)
-		mempool_destroy(md->tio_pool);
 	if (md->io_pool)
 		mempool_destroy(md->io_pool);
 	if (md->bs)
@@ -1973,24 +1977,33 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
 {
 	struct dm_md_mempools *p = dm_table_get_md_mempools(t);
 
-	if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) {
-		/*
-		 * The md already has necessary mempools. Reload just the
-		 * bioset because front_pad may have changed because
-		 * a different table was loaded.
-		 */
-		bioset_free(md->bs);
-		md->bs = p->bs;
-		p->bs = NULL;
+	if (md->io_pool && md->bs) {
+		/* The md already has necessary mempools. */
+		if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) {
+			/*
+			 * Reload bioset because front_pad may have changed
+			 * because a different table was loaded.
+			 */
+			bioset_free(md->bs);
+			md->bs = p->bs;
+			p->bs = NULL;
+		} else if (dm_table_get_type(t) == DM_TYPE_REQUEST_BASED) {
+			/*
+			 * There's no need to reload with request-based dm
+			 * because the size of front_pad doesn't change.
+			 * Note for future: If you are to reload bioset,
+			 * prep-ed requests in the queue may refer
+			 * to bio from the old bioset, so you must walk
+			 * through the queue to unprep.
+			 */
+		}
 		goto out;
 	}
 
-	BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
+	BUG_ON(!p || md->io_pool || md->bs);
 
 	md->io_pool = p->io_pool;
 	p->io_pool = NULL;
-	md->tio_pool = p->tio_pool;
-	p->tio_pool = NULL;
 	md->bs = p->bs;
 	p->bs = NULL;
 
@@ -2421,7 +2434,7 @@ static void dm_queue_flush(struct mapped_device *md)
  */
 struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
-	struct dm_table *live_map, *map = ERR_PTR(-EINVAL);
+	struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
 	struct queue_limits limits;
 	int r;
 
@@ -2444,10 +2457,12 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 		dm_table_put(live_map);
 	}
 
-	r = dm_calculate_queue_limits(table, &limits);
-	if (r) {
-		map = ERR_PTR(r);
-		goto out;
+	if (!live_map) {
+		r = dm_calculate_queue_limits(table, &limits);
+		if (r) {
+			map = ERR_PTR(r);
+			goto out;
+		}
 	}
 
 	map = __bind(md, table, &limits);
@@ -2745,52 +2760,42 @@ EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
 struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size)
 {
-	struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
-	unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS;
+	struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
+	struct kmem_cache *cachep;
+	unsigned int pool_size;
+	unsigned int front_pad;
 
 	if (!pools)
 		return NULL;
 
-	per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io));
+	if (type == DM_TYPE_BIO_BASED) {
+		cachep = _io_cache;
+		pool_size = 16;
+		front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
+	} else if (type == DM_TYPE_REQUEST_BASED) {
+		cachep = _rq_tio_cache;
+		pool_size = MIN_IOS;
+		front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
+		/* per_bio_data_size is not used. See __bind_mempools(). */
+		WARN_ON(per_bio_data_size != 0);
+	} else
+		goto out;
 
-	pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
-			 mempool_create_slab_pool(MIN_IOS, _io_cache) :
-			 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
+	pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep);
 	if (!pools->io_pool)
-		goto free_pools_and_out;
-
-	pools->tio_pool = NULL;
-	if (type == DM_TYPE_REQUEST_BASED) {
-		pools->tio_pool = mempool_create_slab_pool(MIN_IOS, _rq_tio_cache);
-		if (!pools->tio_pool)
-			goto free_io_pool_and_out;
-	}
+		goto out;
 
-	pools->bs = (type == DM_TYPE_BIO_BASED) ?
-		bioset_create(pool_size,
-			      per_bio_data_size + offsetof(struct dm_target_io, clone)) :
-		bioset_create(pool_size,
-			      offsetof(struct dm_rq_clone_bio_info, clone));
+	pools->bs = bioset_create(pool_size, front_pad);
 	if (!pools->bs)
-		goto free_tio_pool_and_out;
+		goto out;
 
 	if (integrity && bioset_integrity_create(pools->bs, pool_size))
-		goto free_bioset_and_out;
+		goto out;
 
 	return pools;
 
-free_bioset_and_out:
-	bioset_free(pools->bs);
-
-free_tio_pool_and_out:
-	if (pools->tio_pool)
-		mempool_destroy(pools->tio_pool);
-
-free_io_pool_and_out:
-	mempool_destroy(pools->io_pool);
-
-free_pools_and_out:
-	kfree(pools);
+out:
+	dm_free_md_mempools(pools);
 
 	return NULL;
 }
@@ -2803,9 +2808,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
 	if (pools->io_pool)
 		mempool_destroy(pools->io_pool);
 
-	if (pools->tio_pool)
-		mempool_destroy(pools->tio_pool);
-
 	if (pools->bs)
 		bioset_free(pools->bs);
 
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
index ceb359050a59..19b268795415 100644
--- a/drivers/md/persistent-data/Kconfig
+++ b/drivers/md/persistent-data/Kconfig
@@ -1,6 +1,6 @@
 config DM_PERSISTENT_DATA
        tristate
-       depends on BLK_DEV_DM && EXPERIMENTAL
+       depends on BLK_DEV_DM
        select LIBCRC32C
        select DM_BUFIO
        ---help---
diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile
index d8e7cb767c1e..ff528792c358 100644
--- a/drivers/md/persistent-data/Makefile
+++ b/drivers/md/persistent-data/Makefile
@@ -1,5 +1,7 @@
 obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o
 dm-persistent-data-objs := \
+	dm-array.o \
+	dm-bitset.o \
 	dm-block-manager.o \
 	dm-space-map-common.o \
 	dm-space-map-disk.o \
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
new file mode 100644
index 000000000000..172147eb1d40
--- /dev/null
+++ b/drivers/md/persistent-data/dm-array.c
@@ -0,0 +1,808 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-array.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "array"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The array is implemented as a fully populated btree, which points to
+ * blocks that contain the packed values.  This is more space efficient
+ * than just using a btree since we don't store 1 key per value.
+ */
+struct array_block {
+	__le32 csum;
+	__le32 max_entries;
+	__le32 nr_entries;
+	__le32 value_size;
+	__le64 blocknr; /* Block this node is supposed to live in. */
+} __packed;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Validator methods.  As usual we calculate a checksum, and also write the
+ * block location into the header (paranoia about ssds remapping areas by
+ * mistake).
+ */
+#define CSUM_XOR 595846735
+
+static void array_block_prepare_for_write(struct dm_block_validator *v,
+					  struct dm_block *b,
+					  size_t size_of_block)
+{
+	struct array_block *bh_le = dm_block_data(b);
+
+	bh_le->blocknr = cpu_to_le64(dm_block_location(b));
+	bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
+						 size_of_block - sizeof(__le32),
+						 CSUM_XOR));
+}
+
+static int array_block_check(struct dm_block_validator *v,
+			     struct dm_block *b,
+			     size_t size_of_block)
+{
+	struct array_block *bh_le = dm_block_data(b);
+	__le32 csum_disk;
+
+	if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
+		DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu",
+			    (unsigned long long) le64_to_cpu(bh_le->blocknr),
+			    (unsigned long long) dm_block_location(b));
+		return -ENOTBLK;
+	}
+
+	csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
+					       size_of_block - sizeof(__le32),
+					       CSUM_XOR));
+	if (csum_disk != bh_le->csum) {
+		DMERR_LIMIT("array_block_check failed: csum %u != wanted %u",
+			    (unsigned) le32_to_cpu(csum_disk),
+			    (unsigned) le32_to_cpu(bh_le->csum));
+		return -EILSEQ;
+	}
+
+	return 0;
+}
+
+static struct dm_block_validator array_validator = {
+	.name = "array",
+	.prepare_for_write = array_block_prepare_for_write,
+	.check = array_block_check
+};
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Functions for manipulating the array blocks.
+ */
+
+/*
+ * Returns a pointer to a value within an array block.
+ *
+ * index - The index into _this_ specific block.
+ */
+static void *element_at(struct dm_array_info *info, struct array_block *ab,
+			unsigned index)
+{
+	unsigned char *entry = (unsigned char *) (ab + 1);
+
+	entry += index * info->value_type.size;
+
+	return entry;
+}
+
+/*
+ * Utility function that calls one of the value_type methods on every value
+ * in an array block.
+ */
+static void on_entries(struct dm_array_info *info, struct array_block *ab,
+		       void (*fn)(void *, const void *))
+{
+	unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
+
+	for (i = 0; i < nr_entries; i++)
+		fn(info->value_type.context, element_at(info, ab, i));
+}
+
+/*
+ * Increment every value in an array block.
+ */
+static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab)
+{
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	if (vt->inc)
+		on_entries(info, ab, vt->inc);
+}
+
+/*
+ * Decrement every value in an array block.
+ */
+static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab)
+{
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	if (vt->dec)
+		on_entries(info, ab, vt->dec);
+}
+
+/*
+ * Each array block can hold this many values.
+ */
+static uint32_t calc_max_entries(size_t value_size, size_t size_of_block)
+{
+	return (size_of_block - sizeof(struct array_block)) / value_size;
+}
+
+/*
+ * Allocate a new array block.  The caller will need to unlock block.
+ */
+static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
+			uint32_t max_entries,
+			struct dm_block **block, struct array_block **ab)
+{
+	int r;
+
+	r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
+	if (r)
+		return r;
+
+	(*ab) = dm_block_data(*block);
+	(*ab)->max_entries = cpu_to_le32(max_entries);
+	(*ab)->nr_entries = cpu_to_le32(0);
+	(*ab)->value_size = cpu_to_le32(info->value_type.size);
+
+	return 0;
+}
+
+/*
+ * Pad an array block out with a particular value.  Every instance will
+ * cause an increment of the value_type.  new_nr must always be more than
+ * the current number of entries.
+ */
+static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
+			const void *value, unsigned new_nr)
+{
+	unsigned i;
+	uint32_t nr_entries;
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
+	BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
+
+	nr_entries = le32_to_cpu(ab->nr_entries);
+	for (i = nr_entries; i < new_nr; i++) {
+		if (vt->inc)
+			vt->inc(vt->context, value);
+		memcpy(element_at(info, ab, i), value, vt->size);
+	}
+	ab->nr_entries = cpu_to_le32(new_nr);
+}
+
+/*
+ * Remove some entries from the back of an array block.  Every value
+ * removed will be decremented.  new_nr must be <= the current number of
+ * entries.
+ */
+static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
+			unsigned new_nr)
+{
+	unsigned i;
+	uint32_t nr_entries;
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
+	BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
+
+	nr_entries = le32_to_cpu(ab->nr_entries);
+	for (i = nr_entries; i > new_nr; i--)
+		if (vt->dec)
+			vt->dec(vt->context, element_at(info, ab, i - 1));
+	ab->nr_entries = cpu_to_le32(new_nr);
+}
+
+/*
+ * Read locks a block, and coerces it to an array block.  The caller must
+ * unlock 'block' when finished.
+ */
+static int get_ablock(struct dm_array_info *info, dm_block_t b,
+		      struct dm_block **block, struct array_block **ab)
+{
+	int r;
+
+	r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
+	if (r)
+		return r;
+
+	*ab = dm_block_data(*block);
+	return 0;
+}
+
+/*
+ * Unlocks an array block.
+ */
+static int unlock_ablock(struct dm_array_info *info, struct dm_block *block)
+{
+	return dm_tm_unlock(info->btree_info.tm, block);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Btree manipulation.
+ */
+
+/*
+ * Looks up an array block in the btree, and then read locks it.
+ *
+ * index is the index of the index of the array_block, (ie. the array index
+ * / max_entries).
+ */
+static int lookup_ablock(struct dm_array_info *info, dm_block_t root,
+			 unsigned index, struct dm_block **block,
+			 struct array_block **ab)
+{
+	int r;
+	uint64_t key = index;
+	__le64 block_le;
+
+	r = dm_btree_lookup(&info->btree_info, root, &key, &block_le);
+	if (r)
+		return r;
+
+	return get_ablock(info, le64_to_cpu(block_le), block, ab);
+}
+
+/*
+ * Insert an array block into the btree.  The block is _not_ unlocked.
+ */
+static int insert_ablock(struct dm_array_info *info, uint64_t index,
+			 struct dm_block *block, dm_block_t *root)
+{
+	__le64 block_le = cpu_to_le64(dm_block_location(block));
+
+	__dm_bless_for_disk(block_le);
+	return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
+}
+
+/*
+ * Looks up an array block in the btree.  Then shadows it, and updates the
+ * btree to point to this new shadow.  'root' is an input/output parameter
+ * for both the current root block, and the new one.
+ */
+static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
+			 unsigned index, struct dm_block **block,
+			 struct array_block **ab)
+{
+	int r, inc;
+	uint64_t key = index;
+	dm_block_t b;
+	__le64 block_le;
+
+	/*
+	 * lookup
+	 */
+	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
+	if (r)
+		return r;
+	b = le64_to_cpu(block_le);
+
+	/*
+	 * shadow
+	 */
+	r = dm_tm_shadow_block(info->btree_info.tm, b,
+			       &array_validator, block, &inc);
+	if (r)
+		return r;
+
+	*ab = dm_block_data(*block);
+	if (inc)
+		inc_ablock_entries(info, *ab);
+
+	/*
+	 * Reinsert.
+	 *
+	 * The shadow op will often be a noop.  Only insert if it really
+	 * copied data.
+	 */
+	if (dm_block_location(*block) != b)
+		r = insert_ablock(info, index, *block, root);
+
+	return r;
+}
+
+/*
+ * Allocate an new array block, and fill it with some values.
+ */
+static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block,
+			     uint32_t max_entries,
+			     unsigned block_index, uint32_t nr,
+			     const void *value, dm_block_t *root)
+{
+	int r;
+	struct dm_block *block;
+	struct array_block *ab;
+
+	r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
+	if (r)
+		return r;
+
+	fill_ablock(info, ab, value, nr);
+	r = insert_ablock(info, block_index, block, root);
+	unlock_ablock(info, block);
+
+	return r;
+}
+
+static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block,
+			       unsigned begin_block, unsigned end_block,
+			       unsigned max_entries, const void *value,
+			       dm_block_t *root)
+{
+	int r = 0;
+
+	for (; !r && begin_block != end_block; begin_block++)
+		r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root);
+
+	return r;
+}
+
+/*
+ * There are a bunch of functions involved with resizing an array.  This
+ * structure holds information that commonly needed by them.  Purely here
+ * to reduce parameter count.
+ */
+struct resize {
+	/*
+	 * Describes the array.
+	 */
+	struct dm_array_info *info;
+
+	/*
+	 * The current root of the array.  This gets updated.
+	 */
+	dm_block_t root;
+
+	/*
+	 * Metadata block size.  Used to calculate the nr entries in an
+	 * array block.
+	 */
+	size_t size_of_block;
+
+	/*
+	 * Maximum nr entries in an array block.
+	 */
+	unsigned max_entries;
+
+	/*
+	 * nr of completely full blocks in the array.
+	 *
+	 * 'old' refers to before the resize, 'new' after.
+	 */
+	unsigned old_nr_full_blocks, new_nr_full_blocks;
+
+	/*
+	 * Number of entries in the final block.  0 iff only full blocks in
+	 * the array.
+	 */
+	unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block;
+
+	/*
+	 * The default value used when growing the array.
+	 */
+	const void *value;
+};
+
+/*
+ * Removes a consecutive set of array blocks from the btree.  The values
+ * in block are decremented as a side effect of the btree remove.
+ *
+ * begin_index - the index of the first array block to remove.
+ * end_index - the one-past-the-end value.  ie. this block is not removed.
+ */
+static int drop_blocks(struct resize *resize, unsigned begin_index,
+		       unsigned end_index)
+{
+	int r;
+
+	while (begin_index != end_index) {
+		uint64_t key = begin_index++;
+		r = dm_btree_remove(&resize->info->btree_info, resize->root,
+				    &key, &resize->root);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/*
+ * Calculates how many blocks are needed for the array.
+ */
+static unsigned total_nr_blocks_needed(unsigned nr_full_blocks,
+				       unsigned nr_entries_in_last_block)
+{
+	return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0);
+}
+
+/*
+ * Shrink an array.
+ */
+static int shrink(struct resize *resize)
+{
+	int r;
+	unsigned begin, end;
+	struct dm_block *block;
+	struct array_block *ab;
+
+	/*
+	 * Lose some blocks from the back?
+	 */
+	if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) {
+		begin = total_nr_blocks_needed(resize->new_nr_full_blocks,
+					       resize->new_nr_entries_in_last_block);
+		end = total_nr_blocks_needed(resize->old_nr_full_blocks,
+					     resize->old_nr_entries_in_last_block);
+
+		r = drop_blocks(resize, begin, end);
+		if (r)
+			return r;
+	}
+
+	/*
+	 * Trim the new tail block
+	 */
+	if (resize->new_nr_entries_in_last_block) {
+		r = shadow_ablock(resize->info, &resize->root,
+				  resize->new_nr_full_blocks, &block, &ab);
+		if (r)
+			return r;
+
+		trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block);
+		unlock_ablock(resize->info, block);
+	}
+
+	return 0;
+}
+
+/*
+ * Grow an array.
+ */
+static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries)
+{
+	int r;
+	struct dm_block *block;
+	struct array_block *ab;
+
+	r = shadow_ablock(resize->info, &resize->root,
+			  resize->old_nr_full_blocks, &block, &ab);
+	if (r)
+		return r;
+
+	fill_ablock(resize->info, ab, resize->value, new_nr_entries);
+	unlock_ablock(resize->info, block);
+
+	return r;
+}
+
+static int grow_add_tail_block(struct resize *resize)
+{
+	return insert_new_ablock(resize->info, resize->size_of_block,
+				 resize->max_entries,
+				 resize->new_nr_full_blocks,
+				 resize->new_nr_entries_in_last_block,
+				 resize->value, &resize->root);
+}
+
+static int grow_needs_more_blocks(struct resize *resize)
+{
+	int r;
+
+	if (resize->old_nr_entries_in_last_block > 0) {
+		r = grow_extend_tail_block(resize, resize->max_entries);
+		if (r)
+			return r;
+	}
+
+	r = insert_full_ablocks(resize->info, resize->size_of_block,
+				resize->old_nr_full_blocks,
+				resize->new_nr_full_blocks,
+				resize->max_entries, resize->value,
+				&resize->root);
+	if (r)
+		return r;
+
+	if (resize->new_nr_entries_in_last_block)
+		r = grow_add_tail_block(resize);
+
+	return r;
+}
+
+static int grow(struct resize *resize)
+{
+	if (resize->new_nr_full_blocks > resize->old_nr_full_blocks)
+		return grow_needs_more_blocks(resize);
+
+	else if (resize->old_nr_entries_in_last_block)
+		return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block);
+
+	else
+		return grow_add_tail_block(resize);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * These are the value_type functions for the btree elements, which point
+ * to array blocks.
+ */
+static void block_inc(void *context, const void *value)
+{
+	__le64 block_le;
+	struct dm_array_info *info = context;
+
+	memcpy(&block_le, value, sizeof(block_le));
+	dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
+}
+
+static void block_dec(void *context, const void *value)
+{
+	int r;
+	uint64_t b;
+	__le64 block_le;
+	uint32_t ref_count;
+	struct dm_block *block;
+	struct array_block *ab;
+	struct dm_array_info *info = context;
+
+	memcpy(&block_le, value, sizeof(block_le));
+	b = le64_to_cpu(block_le);
+
+	r = dm_tm_ref(info->btree_info.tm, b, &ref_count);
+	if (r) {
+		DMERR_LIMIT("couldn't get reference count for block %llu",
+			    (unsigned long long) b);
+		return;
+	}
+
+	if (ref_count == 1) {
+		/*
+		 * We're about to drop the last reference to this ablock.
+		 * So we need to decrement the ref count of the contents.
+		 */
+		r = get_ablock(info, b, &block, &ab);
+		if (r) {
+			DMERR_LIMIT("couldn't get array block %llu",
+				    (unsigned long long) b);
+			return;
+		}
+
+		dec_ablock_entries(info, ab);
+		unlock_ablock(info, block);
+	}
+
+	dm_tm_dec(info->btree_info.tm, b);
+}
+
+static int block_equal(void *context, const void *value1, const void *value2)
+{
+	return !memcmp(value1, value2, sizeof(__le64));
+}
+
+/*----------------------------------------------------------------*/
+
+void dm_array_info_init(struct dm_array_info *info,
+			struct dm_transaction_manager *tm,
+			struct dm_btree_value_type *vt)
+{
+	struct dm_btree_value_type *bvt = &info->btree_info.value_type;
+
+	memcpy(&info->value_type, vt, sizeof(info->value_type));
+	info->btree_info.tm = tm;
+	info->btree_info.levels = 1;
+
+	bvt->context = info;
+	bvt->size = sizeof(__le64);
+	bvt->inc = block_inc;
+	bvt->dec = block_dec;
+	bvt->equal = block_equal;
+}
+EXPORT_SYMBOL_GPL(dm_array_info_init);
+
+int dm_array_empty(struct dm_array_info *info, dm_block_t *root)
+{
+	return dm_btree_empty(&info->btree_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_array_empty);
+
+static int array_resize(struct dm_array_info *info, dm_block_t root,
+			uint32_t old_size, uint32_t new_size,
+			const void *value, dm_block_t *new_root)
+{
+	int r;
+	struct resize resize;
+
+	if (old_size == new_size)
+		return 0;
+
+	resize.info = info;
+	resize.root = root;
+	resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+	resize.max_entries = calc_max_entries(info->value_type.size,
+					      resize.size_of_block);
+
+	resize.old_nr_full_blocks = old_size / resize.max_entries;
+	resize.old_nr_entries_in_last_block = old_size % resize.max_entries;
+	resize.new_nr_full_blocks = new_size / resize.max_entries;
+	resize.new_nr_entries_in_last_block = new_size % resize.max_entries;
+	resize.value = value;
+
+	r = ((new_size > old_size) ? grow : shrink)(&resize);
+	if (r)
+		return r;
+
+	*new_root = resize.root;
+	return 0;
+}
+
+int dm_array_resize(struct dm_array_info *info, dm_block_t root,
+		    uint32_t old_size, uint32_t new_size,
+		    const void *value, dm_block_t *new_root)
+		    __dm_written_to_disk(value)
+{
+	int r = array_resize(info, root, old_size, new_size, value, new_root);
+	__dm_unbless_for_disk(value);
+	return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_resize);
+
+int dm_array_del(struct dm_array_info *info, dm_block_t root)
+{
+	return dm_btree_del(&info->btree_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_array_del);
+
+int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
+		       uint32_t index, void *value_le)
+{
+	int r;
+	struct dm_block *block;
+	struct array_block *ab;
+	size_t size_of_block;
+	unsigned entry, max_entries;
+
+	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+	max_entries = calc_max_entries(info->value_type.size, size_of_block);
+
+	r = lookup_ablock(info, root, index / max_entries, &block, &ab);
+	if (r)
+		return r;
+
+	entry = index % max_entries;
+	if (entry >= le32_to_cpu(ab->nr_entries))
+		r = -ENODATA;
+	else
+		memcpy(value_le, element_at(info, ab, entry),
+		       info->value_type.size);
+
+	unlock_ablock(info, block);
+	return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_get_value);
+
+static int array_set_value(struct dm_array_info *info, dm_block_t root,
+			   uint32_t index, const void *value, dm_block_t *new_root)
+{
+	int r;
+	struct dm_block *block;
+	struct array_block *ab;
+	size_t size_of_block;
+	unsigned max_entries;
+	unsigned entry;
+	void *old_value;
+	struct dm_btree_value_type *vt = &info->value_type;
+
+	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+	max_entries = calc_max_entries(info->value_type.size, size_of_block);
+
+	r = shadow_ablock(info, &root, index / max_entries, &block, &ab);
+	if (r)
+		return r;
+	*new_root = root;
+
+	entry = index % max_entries;
+	if (entry >= le32_to_cpu(ab->nr_entries)) {
+		r = -ENODATA;
+		goto out;
+	}
+
+	old_value = element_at(info, ab, entry);
+	if (vt->dec &&
+	    (!vt->equal || !vt->equal(vt->context, old_value, value))) {
+		vt->dec(vt->context, old_value);
+		if (vt->inc)
+			vt->inc(vt->context, value);
+	}
+
+	memcpy(old_value, value, info->value_type.size);
+
+out:
+	unlock_ablock(info, block);
+	return r;
+}
+
+int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
+		 uint32_t index, const void *value, dm_block_t *new_root)
+		 __dm_written_to_disk(value)
+{
+	int r;
+
+	r = array_set_value(info, root, index, value, new_root);
+	__dm_unbless_for_disk(value);
+	return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_set_value);
+
+struct walk_info {
+	struct dm_array_info *info;
+	int (*fn)(void *context, uint64_t key, void *leaf);
+	void *context;
+};
+
+static int walk_ablock(void *context, uint64_t *keys, void *leaf)
+{
+	struct walk_info *wi = context;
+
+	int r;
+	unsigned i;
+	__le64 block_le;
+	unsigned nr_entries, max_entries;
+	struct dm_block *block;
+	struct array_block *ab;
+
+	memcpy(&block_le, leaf, sizeof(block_le));
+	r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab);
+	if (r)
+		return r;
+
+	max_entries = le32_to_cpu(ab->max_entries);
+	nr_entries = le32_to_cpu(ab->nr_entries);
+	for (i = 0; i < nr_entries; i++) {
+		r = wi->fn(wi->context, keys[0] * max_entries + i,
+			   element_at(wi->info, ab, i));
+
+		if (r)
+			break;
+	}
+
+	unlock_ablock(wi->info, block);
+	return r;
+}
+
+int dm_array_walk(struct dm_array_info *info, dm_block_t root,
+		  int (*fn)(void *, uint64_t key, void *leaf),
+		  void *context)
+{
+	struct walk_info wi;
+
+	wi.info = info;
+	wi.fn = fn;
+	wi.context = context;
+
+	return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi);
+}
+EXPORT_SYMBOL_GPL(dm_array_walk);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h
new file mode 100644
index 000000000000..ea177d6fa58f
--- /dev/null
+++ b/drivers/md/persistent-data/dm-array.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#ifndef _LINUX_DM_ARRAY_H
+#define _LINUX_DM_ARRAY_H
+
+#include "dm-btree.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The dm-array is a persistent version of an array.  It packs the data
+ * more efficiently than a btree which will result in less disk space use,
+ * and a performance boost.  The element get and set operations are still
+ * O(ln(n)), but with a much smaller constant.
+ *
+ * The value type structure is reused from the btree type to support proper
+ * reference counting of values.
+ *
+ * The arrays implicitly know their length, and bounds are checked for
+ * lookups and updated.  It doesn't store this in an accessible place
+ * because it would waste a whole metadata block.  Make sure you store the
+ * size along with the array root in your encompassing data.
+ *
+ * Array entries are indexed via an unsigned integer starting from zero.
+ * Arrays are not sparse; if you resize an array to have 'n' entries then
+ * 'n - 1' will be the last valid index.
+ *
+ * Typical use:
+ *
+ * a) initialise a dm_array_info structure.  This describes the array
+ *    values and ties it into a specific transaction manager.  It holds no
+ *    instance data; the same info can be used for many similar arrays if
+ *    you wish.
+ *
+ * b) Get yourself a root.  The root is the index of a block of data on the
+ *    disk that holds a particular instance of an array.  You may have a
+ *    pre existing root in your metadata that you wish to use, or you may
+ *    want to create a brand new, empty array with dm_array_empty().
+ *
+ * Like the other data structures in this library, dm_array objects are
+ * immutable between transactions.  Update functions will return you the
+ * root for a _new_ array.  If you've incremented the old root, via
+ * dm_tm_inc(), before calling the update function you may continue to use
+ * it in parallel with the new root.
+ *
+ * c) resize an array with dm_array_resize().
+ *
+ * d) Get a value from the array with dm_array_get_value().
+ *
+ * e) Set a value in the array with dm_array_set_value().
+ *
+ * f) Walk an array of values in index order with dm_array_walk().  More
+ *    efficient than making many calls to dm_array_get_value().
+ *
+ * g) Destroy the array with dm_array_del().  This tells the transaction
+ *    manager that you're no longer using this data structure so it can
+ *    recycle it's blocks.  (dm_array_dec() would be a better name for it,
+ *    but del is in keeping with dm_btree_del()).
+ */
+
+/*
+ * Describes an array.  Don't initialise this structure yourself, use the
+ * init function below.
+ */
+struct dm_array_info {
+	struct dm_transaction_manager *tm;
+	struct dm_btree_value_type value_type;
+	struct dm_btree_info btree_info;
+};
+
+/*
+ * Sets up a dm_array_info structure.  You don't need to do anything with
+ * this structure when you finish using it.
+ *
+ * info - the structure being filled in.
+ * tm   - the transaction manager that should supervise this structure.
+ * vt   - describes the leaf values.
+ */
+void dm_array_info_init(struct dm_array_info *info,
+			struct dm_transaction_manager *tm,
+			struct dm_btree_value_type *vt);
+
+/*
+ * Create an empty, zero length array.
+ *
+ * info - describes the array
+ * root - on success this will be filled out with the root block
+ */
+int dm_array_empty(struct dm_array_info *info, dm_block_t *root);
+
+/*
+ * Resizes the array.
+ *
+ * info - describes the array
+ * root - the root block of the array on disk
+ * old_size - the caller is responsible for remembering the size of
+ *            the array
+ * new_size - can be bigger or smaller than old_size
+ * value - if we're growing the array the new entries will have this value
+ * new_root - on success, points to the new root block
+ *
+ * If growing the inc function for 'value' will be called the appropriate
+ * number of times.  So if the caller is holding a reference they may want
+ * to drop it.
+ */
+int dm_array_resize(struct dm_array_info *info, dm_block_t root,
+		    uint32_t old_size, uint32_t new_size,
+		    const void *value, dm_block_t *new_root)
+	__dm_written_to_disk(value);
+
+/*
+ * Frees a whole array.  The value_type's decrement operation will be called
+ * for all values in the array
+ */
+int dm_array_del(struct dm_array_info *info, dm_block_t root);
+
+/*
+ * Lookup a value in the array
+ *
+ * info - describes the array
+ * root - root block of the array
+ * index - array index
+ * value - the value to be read.  Will be in on-disk format of course.
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
+		       uint32_t index, void *value);
+
+/*
+ * Set an entry in the array.
+ *
+ * info - describes the array
+ * root - root block of the array
+ * index - array index
+ * value - value to be written to disk.  Make sure you confirm the value is
+ *         in on-disk format with__dm_bless_for_disk() before calling.
+ * new_root - the new root block
+ *
+ * The old value being overwritten will be decremented, the new value
+ * incremented.
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
+		       uint32_t index, const void *value, dm_block_t *new_root)
+	__dm_written_to_disk(value);
+
+/*
+ * Walk through all the entries in an array.
+ *
+ * info - describes the array
+ * root - root block of the array
+ * fn - called back for every element
+ * context - passed to the callback
+ */
+int dm_array_walk(struct dm_array_info *info, dm_block_t root,
+		  int (*fn)(void *context, uint64_t key, void *leaf),
+		  void *context);
+
+/*----------------------------------------------------------------*/
+
+#endif	/* _LINUX_DM_ARRAY_H */
diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c
new file mode 100644
index 000000000000..cd9a86d4cdf0
--- /dev/null
+++ b/drivers/md/persistent-data/dm-bitset.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-bitset.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "bitset"
+#define BITS_PER_ARRAY_ENTRY 64
+
+/*----------------------------------------------------------------*/
+
+static struct dm_btree_value_type bitset_bvt = {
+	.context = NULL,
+	.size = sizeof(__le64),
+	.inc = NULL,
+	.dec = NULL,
+	.equal = NULL,
+};
+
+/*----------------------------------------------------------------*/
+
+void dm_disk_bitset_init(struct dm_transaction_manager *tm,
+			 struct dm_disk_bitset *info)
+{
+	dm_array_info_init(&info->array_info, tm, &bitset_bvt);
+	info->current_index_set = false;
+}
+EXPORT_SYMBOL_GPL(dm_disk_bitset_init);
+
+int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root)
+{
+	return dm_array_empty(&info->array_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_bitset_empty);
+
+int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root,
+		     uint32_t old_nr_entries, uint32_t new_nr_entries,
+		     bool default_value, dm_block_t *new_root)
+{
+	uint32_t old_blocks = dm_div_up(old_nr_entries, BITS_PER_ARRAY_ENTRY);
+	uint32_t new_blocks = dm_div_up(new_nr_entries, BITS_PER_ARRAY_ENTRY);
+	__le64 value = default_value ? cpu_to_le64(~0) : cpu_to_le64(0);
+
+	__dm_bless_for_disk(&value);
+	return dm_array_resize(&info->array_info, root, old_blocks, new_blocks,
+			       &value, new_root);
+}
+EXPORT_SYMBOL_GPL(dm_bitset_resize);
+
+int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root)
+{
+	return dm_array_del(&info->array_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_bitset_del);
+
+int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
+		    dm_block_t *new_root)
+{
+	int r;
+	__le64 value;
+
+	if (!info->current_index_set)
+		return 0;
+
+	value = cpu_to_le64(info->current_bits);
+
+	__dm_bless_for_disk(&value);
+	r = dm_array_set_value(&info->array_info, root, info->current_index,
+			       &value, new_root);
+	if (r)
+		return r;
+
+	info->current_index_set = false;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_flush);
+
+static int read_bits(struct dm_disk_bitset *info, dm_block_t root,
+		     uint32_t array_index)
+{
+	int r;
+	__le64 value;
+
+	r = dm_array_get_value(&info->array_info, root, array_index, &value);
+	if (r)
+		return r;
+
+	info->current_bits = le64_to_cpu(value);
+	info->current_index_set = true;
+	info->current_index = array_index;
+	return 0;
+}
+
+static int get_array_entry(struct dm_disk_bitset *info, dm_block_t root,
+			   uint32_t index, dm_block_t *new_root)
+{
+	int r;
+	unsigned array_index = index / BITS_PER_ARRAY_ENTRY;
+
+	if (info->current_index_set) {
+		if (info->current_index == array_index)
+			return 0;
+
+		r = dm_bitset_flush(info, root, new_root);
+		if (r)
+			return r;
+	}
+
+	return read_bits(info, root, array_index);
+}
+
+int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,
+		      uint32_t index, dm_block_t *new_root)
+{
+	int r;
+	unsigned b = index % BITS_PER_ARRAY_ENTRY;
+
+	r = get_array_entry(info, root, index, new_root);
+	if (r)
+		return r;
+
+	set_bit(b, (unsigned long *) &info->current_bits);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_set_bit);
+
+int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,
+			uint32_t index, dm_block_t *new_root)
+{
+	int r;
+	unsigned b = index % BITS_PER_ARRAY_ENTRY;
+
+	r = get_array_entry(info, root, index, new_root);
+	if (r)
+		return r;
+
+	clear_bit(b, (unsigned long *) &info->current_bits);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_clear_bit);
+
+int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root,
+		       uint32_t index, dm_block_t *new_root, bool *result)
+{
+	int r;
+	unsigned b = index % BITS_PER_ARRAY_ENTRY;
+
+	r = get_array_entry(info, root, index, new_root);
+	if (r)
+		return r;
+
+	*result = test_bit(b, (unsigned long *) &info->current_bits);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_test_bit);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h
new file mode 100644
index 000000000000..e1b9bea14aa1
--- /dev/null
+++ b/drivers/md/persistent-data/dm-bitset.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#ifndef _LINUX_DM_BITSET_H
+#define _LINUX_DM_BITSET_H
+
+#include "dm-array.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * This bitset type is a thin wrapper round a dm_array of 64bit words.  It
+ * uses a tiny, one word cache to reduce the number of array lookups and so
+ * increase performance.
+ *
+ * Like the dm-array that it's based on, the caller needs to keep track of
+ * the size of the bitset separately.  The underlying dm-array implicitly
+ * knows how many words it's storing and will return -ENODATA if you try
+ * and access an out of bounds word.  However, an out of bounds bit in the
+ * final word will _not_ be detected, you have been warned.
+ *
+ * Bits are indexed from zero.
+
+ * Typical use:
+ *
+ * a) Initialise a dm_disk_bitset structure with dm_disk_bitset_init().
+ *    This describes the bitset and includes the cache.  It's not called it
+ *    dm_bitset_info in line with other data structures because it does
+ *    include instance data.
+ *
+ * b) Get yourself a root.  The root is the index of a block of data on the
+ *    disk that holds a particular instance of an bitset.  You may have a
+ *    pre existing root in your metadata that you wish to use, or you may
+ *    want to create a brand new, empty bitset with dm_bitset_empty().
+ *
+ * Like the other data structures in this library, dm_bitset objects are
+ * immutable between transactions.  Update functions will return you the
+ * root for a _new_ array.  If you've incremented the old root, via
+ * dm_tm_inc(), before calling the update function you may continue to use
+ * it in parallel with the new root.
+ *
+ * Even read operations may trigger the cache to be flushed and as such
+ * return a root for a new, updated bitset.
+ *
+ * c) resize a bitset with dm_bitset_resize().
+ *
+ * d) Set a bit with dm_bitset_set_bit().
+ *
+ * e) Clear a bit with dm_bitset_clear_bit().
+ *
+ * f) Test a bit with dm_bitset_test_bit().
+ *
+ * g) Flush all updates from the cache with dm_bitset_flush().
+ *
+ * h) Destroy the bitset with dm_bitset_del().  This tells the transaction
+ *    manager that you're no longer using this data structure so it can
+ *    recycle it's blocks.  (dm_bitset_dec() would be a better name for it,
+ *    but del is in keeping with dm_btree_del()).
+ */
+
+/*
+ * Opaque object.  Unlike dm_array_info, you should have one of these per
+ * bitset.  Initialise with dm_disk_bitset_init().
+ */
+struct dm_disk_bitset {
+	struct dm_array_info array_info;
+
+	uint32_t current_index;
+	uint64_t current_bits;
+
+	bool current_index_set:1;
+};
+
+/*
+ * Sets up a dm_disk_bitset structure.  You don't need to do anything with
+ * this structure when you finish using it.
+ *
+ * tm - the transaction manager that should supervise this structure
+ * info - the structure being initialised
+ */
+void dm_disk_bitset_init(struct dm_transaction_manager *tm,
+			 struct dm_disk_bitset *info);
+
+/*
+ * Create an empty, zero length bitset.
+ *
+ * info - describes the bitset
+ * new_root - on success, points to the new root block
+ */
+int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root);
+
+/*
+ * Resize the bitset.
+ *
+ * info - describes the bitset
+ * old_root - the root block of the array on disk
+ * old_nr_entries - the number of bits in the old bitset
+ * new_nr_entries - the number of bits you want in the new bitset
+ * default_value - the value for any new bits
+ * new_root - on success, points to the new root block
+ */
+int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t old_root,
+		     uint32_t old_nr_entries, uint32_t new_nr_entries,
+		     bool default_value, dm_block_t *new_root);
+
+/*
+ * Frees the bitset.
+ */
+int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root);
+
+/*
+ * Set a bit.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * index - the bit index
+ * new_root - on success, points to the new root block
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,
+		      uint32_t index, dm_block_t *new_root);
+
+/*
+ * Clears a bit.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * index - the bit index
+ * new_root - on success, points to the new root block
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,
+			uint32_t index, dm_block_t *new_root);
+
+/*
+ * Tests a bit.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * index - the bit index
+ * new_root - on success, points to the new root block (cached values may have been written)
+ * result - the bit value you're after
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root,
+		       uint32_t index, dm_block_t *new_root, bool *result);
+
+/*
+ * Flush any cached changes to disk.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * new_root - on success, points to the new root block
+ */
+int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
+		    dm_block_t *new_root);
+
+/*----------------------------------------------------------------*/
+
+#endif /* _LINUX_DM_BITSET_H */
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 28c3ed072a79..81b513890e2b 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -613,6 +613,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
 
 	return dm_bufio_write_dirty_buffers(bm->bufio);
 }
+EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock);
 
 void dm_bm_set_read_only(struct dm_block_manager *bm)
 {
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index accbb05f17b6..37d367bb9aa8 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -64,6 +64,7 @@ struct ro_spine {
 void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
 int exit_ro_spine(struct ro_spine *s);
 int ro_step(struct ro_spine *s, dm_block_t new_child);
+void ro_pop(struct ro_spine *s);
 struct btree_node *ro_node(struct ro_spine *s);
 
 struct shadow_spine {
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index f199a0c4ed04..cf9fd676ae44 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -164,6 +164,13 @@ int ro_step(struct ro_spine *s, dm_block_t new_child)
 	return r;
 }
 
+void ro_pop(struct ro_spine *s)
+{
+	BUG_ON(!s->count);
+	--s->count;
+	unlock_block(s->info, s->nodes[s->count]);
+}
+
 struct btree_node *ro_node(struct ro_spine *s)
 {
 	struct dm_block *block;
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 4caf66918cdb..35865425e4b4 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -807,3 +807,55 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
 	return r ? r : count;
 }
 EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
+
+/*
+ * FIXME: We shouldn't use a recursive algorithm when we have limited stack
+ * space.  Also this only works for single level trees.
+ */
+static int walk_node(struct ro_spine *s, dm_block_t block,
+		     int (*fn)(void *context, uint64_t *keys, void *leaf),
+		     void *context)
+{
+	int r;
+	unsigned i, nr;
+	struct btree_node *n;
+	uint64_t keys;
+
+	r = ro_step(s, block);
+	n = ro_node(s);
+
+	nr = le32_to_cpu(n->header.nr_entries);
+	for (i = 0; i < nr; i++) {
+		if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
+			r = walk_node(s, value64(n, i), fn, context);
+			if (r)
+				goto out;
+		} else {
+			keys = le64_to_cpu(*key_ptr(n, i));
+			r = fn(context, &keys, value_ptr(n, i));
+			if (r)
+				goto out;
+		}
+	}
+
+out:
+	ro_pop(s);
+	return r;
+}
+
+int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
+		  int (*fn)(void *context, uint64_t *keys, void *leaf),
+		  void *context)
+{
+	int r;
+	struct ro_spine spine;
+
+	BUG_ON(info->levels > 1);
+
+	init_ro_spine(&spine, info);
+	r = walk_node(&spine, root, fn, context);
+	exit_ro_spine(&spine);
+
+	return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_walk);
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index a2cd50441ca1..8672d159e0b5 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -58,21 +58,21 @@ struct dm_btree_value_type {
 	 * somewhere.) This method is _not_ called for insertion of a new
 	 * value: It is assumed the ref count is already 1.
 	 */
-	void (*inc)(void *context, void *value);
+	void (*inc)(void *context, const void *value);
 
 	/*
 	 * This value is being deleted.  The btree takes care of freeing
 	 * the memory pointed to by @value.  Often the del function just
 	 * needs to decrement a reference count somewhere.
 	 */
-	void (*dec)(void *context, void *value);
+	void (*dec)(void *context, const void *value);
 
 	/*
 	 * A test for equality between two values.  When a value is
 	 * overwritten with a new one, the old one has the dec method
 	 * called _unless_ the new and old value are deemed equal.
 	 */
-	int (*equal)(void *context, void *value1, void *value2);
+	int (*equal)(void *context, const void *value1, const void *value2);
 };
 
 /*
@@ -142,4 +142,13 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
 			      uint64_t *result_keys);
 
+/*
+ * Iterate through the a btree, calling fn() on each entry.
+ * It only works for single level trees and is internally recursive, so
+ * monitor stack usage carefully.
+ */
+int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
+		  int (*fn)(void *context, uint64_t *keys, void *leaf),
+		  void *context);
+
 #endif	/* _LINUX_DM_BTREE_H */
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 7b17a1fdeaf9..81da1a26042e 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -46,10 +46,9 @@ static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 	int r = 0;
 	unsigned bucket = dm_hash_block(b, DM_HASH_MASK);
 	struct shadow_info *si;
-	struct hlist_node *n;
 
 	spin_lock(&tm->lock);
-	hlist_for_each_entry(si, n, tm->buckets + bucket, hlist)
+	hlist_for_each_entry(si, tm->buckets + bucket, hlist)
 		if (si->where == b) {
 			r = 1;
 			break;
@@ -81,14 +80,14 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 static void wipe_shadow_table(struct dm_transaction_manager *tm)
 {
 	struct shadow_info *si;
-	struct hlist_node *n, *tmp;
+	struct hlist_node *tmp;
 	struct hlist_head *bucket;
 	int i;
 
 	spin_lock(&tm->lock);
 	for (i = 0; i < DM_HASH_SIZE; i++) {
 		bucket = tm->buckets + i;
-		hlist_for_each_entry_safe(si, n, tmp, bucket, hlist)
+		hlist_for_each_entry_safe(si, tmp, bucket, hlist)
 			kfree(si);
 
 		INIT_HLIST_HEAD(bucket);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 19d77a026639..5af2d2709081 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -184,8 +184,6 @@ static void return_io(struct bio *return_bi)
 		return_bi = bi->bi_next;
 		bi->bi_next = NULL;
 		bi->bi_size = 0;
-		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
-					 bi, 0);
 		bio_endio(bi, 0);
 		bi = return_bi;
 	}
@@ -365,10 +363,9 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
 					 short generation)
 {
 	struct stripe_head *sh;
-	struct hlist_node *hn;
 
 	pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
-	hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
+	hlist_for_each_entry(sh, stripe_hash(conf, sector), hash)
 		if (sh->sector == sector && sh->generation == generation)
 			return sh;
 	pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
@@ -3917,8 +3914,6 @@ static void raid5_align_endio(struct bio *bi, int error)
 	rdev_dec_pending(rdev, conf->mddev);
 
 	if (!error && uptodate) {
-		trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
-					 raid_bi, 0);
 		bio_endio(raid_bi, 0);
 		if (atomic_dec_and_test(&conf->active_aligned_reads))
 			wake_up(&conf->wait_for_stripe);
@@ -4377,8 +4372,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
 		if ( rw == WRITE )
 			md_write_end(mddev);
 
-		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
-					 bi, 0);
 		bio_endio(bi, 0);
 	}
 }
@@ -4755,11 +4748,8 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
 		handled++;
 	}
 	remaining = raid5_dec_bi_active_stripes(raid_bio);
-	if (remaining == 0) {
-		trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
-					 raid_bio, 0);
+	if (remaining == 0)
 		bio_endio(raid_bio, 0);
-	}
 	if (atomic_dec_and_test(&conf->active_aligned_reads))
 		wake_up(&conf->wait_for_stripe);
 	return handled;
diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c
index 383a727b8aa0..6e5ad8ec0a22 100644
--- a/drivers/media/platform/omap3isp/isp.c
+++ b/drivers/media/platform/omap3isp/isp.c
@@ -1338,28 +1338,15 @@ static int isp_enable_clocks(struct isp_device *isp)
 {
 	int r;
 	unsigned long rate;
-	int divisor;
-
-	/*
-	 * cam_mclk clock chain:
-	 *   dpll4 -> dpll4_m5 -> dpll4_m5x2 -> cam_mclk
-	 *
-	 * In OMAP3630 dpll4_m5x2 != 2 x dpll4_m5 but both are
-	 * set to the same value. Hence the rate set for dpll4_m5
-	 * has to be twice of what is set on OMAP3430 to get
-	 * the required value for cam_mclk
-	 */
-	divisor = isp->revision == ISP_REVISION_15_0 ? 1 : 2;
 
 	r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_ICK]);
 	if (r) {
 		dev_err(isp->dev, "failed to enable cam_ick clock\n");
 		goto out_clk_enable_ick;
 	}
-	r = clk_set_rate(isp->clock[ISP_CLK_DPLL4_M5_CK],
-			 CM_CAM_MCLK_HZ/divisor);
+	r = clk_set_rate(isp->clock[ISP_CLK_CAM_MCLK], CM_CAM_MCLK_HZ);
 	if (r) {
-		dev_err(isp->dev, "clk_set_rate for dpll4_m5_ck failed\n");
+		dev_err(isp->dev, "clk_set_rate for cam_mclk failed\n");
 		goto out_clk_enable_mclk;
 	}
 	r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_MCLK]);
@@ -1401,7 +1388,6 @@ static void isp_disable_clocks(struct isp_device *isp)
 static const char *isp_clocks[] = {
 	"cam_ick",
 	"cam_mclk",
-	"dpll4_m5_ck",
 	"csi2_96m_fck",
 	"l3_ick",
 };
diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h
index 517d348ce32b..c77e1f2ae5ca 100644
--- a/drivers/media/platform/omap3isp/isp.h
+++ b/drivers/media/platform/omap3isp/isp.h
@@ -147,7 +147,6 @@ struct isp_platform_callback {
  * @ref_count: Reference count for handling multiple ISP requests.
  * @cam_ick: Pointer to camera interface clock structure.
  * @cam_mclk: Pointer to camera functional clock structure.
- * @dpll4_m5_ck: Pointer to DPLL4 M5 clock structure.
  * @csi2_fck: Pointer to camera CSI2 complexIO clock structure.
  * @l3_ick: Pointer to OMAP3 L3 bus interface clock.
  * @irq: Currently attached ISP ISR callbacks information structure.
@@ -189,10 +188,9 @@ struct isp_device {
 	u32 xclk_divisor[2];	/* Two clocks, a and b. */
 #define ISP_CLK_CAM_ICK		0
 #define ISP_CLK_CAM_MCLK	1
-#define ISP_CLK_DPLL4_M5_CK	2
-#define ISP_CLK_CSI2_FCK	3
-#define ISP_CLK_L3_ICK		4
-	struct clk *clock[5];
+#define ISP_CLK_CSI2_FCK	2
+#define ISP_CLK_L3_ICK		3
+	struct clk *clock[4];
 
 	/* ISP modules */
 	struct ispstat isp_af;
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index 56ff19cdc2ad..ffcb10ac4341 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -512,18 +512,17 @@ int memstick_add_host(struct memstick_host *host)
 {
 	int rc;
 
-	while (1) {
-		if (!idr_pre_get(&memstick_host_idr, GFP_KERNEL))
-			return -ENOMEM;
+	idr_preload(GFP_KERNEL);
+	spin_lock(&memstick_host_lock);
 
-		spin_lock(&memstick_host_lock);
-		rc = idr_get_new(&memstick_host_idr, host, &host->id);
-		spin_unlock(&memstick_host_lock);
-		if (!rc)
-			break;
-		else if (rc != -EAGAIN)
-			return rc;
-	}
+	rc = idr_alloc(&memstick_host_idr, host, 0, 0, GFP_NOWAIT);
+	if (rc >= 0)
+		host->id = rc;
+
+	spin_unlock(&memstick_host_lock);
+	idr_preload_end();
+	if (rc < 0)
+		return rc;
 
 	dev_set_name(&host->dev, "memstick%u", host->id);
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 9729b92fbfdd..f12b78dbce04 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1213,21 +1213,10 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	msb->page_size = be16_to_cpu(sys_info->unit_size);
 
 	mutex_lock(&mspro_block_disk_lock);
-	if (!idr_pre_get(&mspro_block_disk_idr, GFP_KERNEL)) {
-		mutex_unlock(&mspro_block_disk_lock);
-		return -ENOMEM;
-	}
-
-	rc = idr_get_new(&mspro_block_disk_idr, card, &disk_id);
+	disk_id = idr_alloc(&mspro_block_disk_idr, card, 0, 256, GFP_KERNEL);
 	mutex_unlock(&mspro_block_disk_lock);
-
-	if (rc)
-		return rc;
-
-	if ((disk_id << MSPRO_BLOCK_PART_SHIFT) > 255) {
-		rc = -ENOSPC;
-		goto out_release_id;
-	}
+	if (disk_id < 0)
+		return disk_id;
 
 	msb->disk = alloc_disk(1 << MSPRO_BLOCK_PART_SHIFT);
 	if (!msb->disk) {
diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c
index 29b2172ae18f..a7c5b31c0d50 100644
--- a/drivers/memstick/host/r592.c
+++ b/drivers/memstick/host/r592.c
@@ -454,7 +454,7 @@ static int r592_transfer_fifo_pio(struct r592_device *dev)
 /* Executes one TPC (data is read/written from small or large fifo) */
 static void r592_execute_tpc(struct r592_device *dev)
 {
-	bool is_write = dev->req->tpc >= MS_TPC_SET_RW_REG_ADRS;
+	bool is_write;
 	int len, error;
 	u32 status, reg;
 
@@ -463,6 +463,7 @@ static void r592_execute_tpc(struct r592_device *dev)
 		return;
 	}
 
+	is_write = dev->req->tpc >= MS_TPC_SET_RW_REG_ADRS;
 	len = dev->req->long_data ?
 		dev->req->sg.length : dev->req->data_len;
 
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 481a98a10ecd..2f12cc13489a 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -1091,15 +1091,14 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
 	}
 	handle->pcr = pcr;
 
-	if (!idr_pre_get(&rtsx_pci_idr, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto free_handle;
-	}
-
+	idr_preload(GFP_KERNEL);
 	spin_lock(&rtsx_pci_lock);
-	ret = idr_get_new(&rtsx_pci_idr, pcr, &pcr->id);
+	ret = idr_alloc(&rtsx_pci_idr, pcr, 0, 0, GFP_NOWAIT);
+	if (ret >= 0)
+		pcr->id = ret;
 	spin_unlock(&rtsx_pci_lock);
-	if (ret)
+	idr_preload_end();
+	if (ret < 0)
 		goto free_handle;
 
 	pcr->pci = pcidev;
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c
index f428d86bfc10..f32550a74bdd 100644
--- a/drivers/misc/c2port/core.c
+++ b/drivers/misc/c2port/core.c
@@ -885,7 +885,7 @@ struct c2port_device *c2port_device_register(char *name,
 					struct c2port_ops *ops, void *devdata)
 {
 	struct c2port_device *c2dev;
-	int id, ret;
+	int ret;
 
 	if (unlikely(!ops) || unlikely(!ops->access) || \
 		unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \
@@ -897,22 +897,18 @@ struct c2port_device *c2port_device_register(char *name,
 	if (unlikely(!c2dev))
 		return ERR_PTR(-ENOMEM);
 
-	ret = idr_pre_get(&c2port_idr, GFP_KERNEL);
-	if (!ret) {
-		ret = -ENOMEM;
-		goto error_idr_get_new;
-	}
-
+	idr_preload(GFP_KERNEL);
 	spin_lock_irq(&c2port_idr_lock);
-	ret = idr_get_new(&c2port_idr, c2dev, &id);
+	ret = idr_alloc(&c2port_idr, c2dev, 0, 0, GFP_NOWAIT);
 	spin_unlock_irq(&c2port_idr_lock);
+	idr_preload_end();
 
 	if (ret < 0)
-		goto error_idr_get_new;
-	c2dev->id = id;
+		goto error_idr_alloc;
+	c2dev->id = ret;
 
 	c2dev->dev = device_create(c2port_class, NULL, 0, c2dev,
-					"c2port%d", id);
+				   "c2port%d", c2dev->id);
 	if (unlikely(IS_ERR(c2dev->dev))) {
 		ret = PTR_ERR(c2dev->dev);
 		goto error_device_create;
@@ -946,10 +942,10 @@ error_device_create_bin_file:
 
 error_device_create:
 	spin_lock_irq(&c2port_idr_lock);
-	idr_remove(&c2port_idr, id);
+	idr_remove(&c2port_idr, c2dev->id);
 	spin_unlock_irq(&c2port_idr_lock);
 
-error_idr_get_new:
+error_idr_alloc:
 	kfree(c2dev);
 
 	return ERR_PTR(ret);
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 3aa9a969b373..36f5d52775a9 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -103,6 +103,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <asm/sections.h>
 
 #define v1printk(a...) do { \
 	if (verbose) \
@@ -222,6 +223,7 @@ static unsigned long lookup_addr(char *arg)
 		addr = (unsigned long)do_fork;
 	else if (!strcmp(arg, "hw_break_val"))
 		addr = (unsigned long)&hw_break_val;
+	addr = (unsigned long) dereference_function_descriptor((void *)addr);
 	return addr;
 }
 
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index 240a6d361665..2129274ef7ab 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -280,11 +280,10 @@ static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
 			const struct mmu_notifier_ops *ops)
 {
 	struct mmu_notifier *mn, *gru_mn = NULL;
-	struct hlist_node *n;
 
 	if (mm->mmu_notifier_mm) {
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
+		hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
 					 hlist)
 		    if (mn->ops == ops) {
 			gru_mn = mn;
diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c
index 0bd5349b0422..0ab7c922212c 100644
--- a/drivers/misc/tifm_core.c
+++ b/drivers/misc/tifm_core.c
@@ -196,13 +196,14 @@ int tifm_add_adapter(struct tifm_adapter *fm)
 {
 	int rc;
 
-	if (!idr_pre_get(&tifm_adapter_idr, GFP_KERNEL))
-		return -ENOMEM;
-
+	idr_preload(GFP_KERNEL);
 	spin_lock(&tifm_adapter_lock);
-	rc = idr_get_new(&tifm_adapter_idr, fm, &fm->id);
+	rc = idr_alloc(&tifm_adapter_idr, fm, 0, 0, GFP_NOWAIT);
+	if (rc >= 0)
+		fm->id = rc;
 	spin_unlock(&tifm_adapter_lock);
-	if (rc)
+	idr_preload_end();
+	if (rc < 0)
 		return rc;
 
 	dev_set_name(&fm->dev, "tifm%u", fm->id);
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c
index c3e8397f62ed..a8cee33ae8d2 100644
--- a/drivers/misc/vmw_vmci/vmci_doorbell.c
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.c
@@ -127,9 +127,8 @@ static struct dbell_entry *dbell_index_table_find(u32 idx)
 {
 	u32 bucket = VMCI_DOORBELL_HASH(idx);
 	struct dbell_entry *dbell;
-	struct hlist_node *node;
 
-	hlist_for_each_entry(dbell, node, &vmci_doorbell_it.entries[bucket],
+	hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket],
 			     node) {
 		if (idx == dbell->idx)
 			return dbell;
@@ -359,12 +358,10 @@ static void dbell_fire_entries(u32 notify_idx)
 {
 	u32 bucket = VMCI_DOORBELL_HASH(notify_idx);
 	struct dbell_entry *dbell;
-	struct hlist_node *node;
 
 	spin_lock_bh(&vmci_doorbell_it.lock);
 
-	hlist_for_each_entry(dbell, node,
-			     &vmci_doorbell_it.entries[bucket], node) {
+	hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) {
 		if (dbell->idx == notify_idx &&
 		    atomic_read(&dbell->active) == 1) {
 			if (dbell->run_delayed) {
diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c
index a196f84a4fd2..9a53a30de445 100644
--- a/drivers/misc/vmw_vmci/vmci_resource.c
+++ b/drivers/misc/vmw_vmci/vmci_resource.c
@@ -46,11 +46,10 @@ static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle,
 						  enum vmci_resource_type type)
 {
 	struct vmci_resource *r, *resource = NULL;
-	struct hlist_node *node;
 	unsigned int idx = vmci_resource_hash(handle);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(r, node,
+	hlist_for_each_entry_rcu(r,
 				 &vmci_resource_table.entries[idx], node) {
 		u32 cid = r->handle.context;
 		u32 rid = r->handle.resource;
@@ -146,12 +145,11 @@ void vmci_resource_remove(struct vmci_resource *resource)
 	struct vmci_handle handle = resource->handle;
 	unsigned int idx = vmci_resource_hash(handle);
 	struct vmci_resource *r;
-	struct hlist_node *node;
 
 	/* Remove resource from hash table. */
 	spin_lock(&vmci_resource_table.lock);
 
-	hlist_for_each_entry(r, node, &vmci_resource_table.entries[idx], node) {
+	hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) {
 		if (vmci_handle_is_equal(r->handle, resource->handle)) {
 			hlist_del_init_rcu(&r->node);
 			break;
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 821cd8224137..2a3593d9f87d 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -429,19 +429,20 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 	int err;
 	struct mmc_host *host;
 
-	if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL))
-		return NULL;
-
 	host = kzalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL);
 	if (!host)
 		return NULL;
 
 	/* scanning will be enabled when we're ready */
 	host->rescan_disable = 1;
+	idr_preload(GFP_KERNEL);
 	spin_lock(&mmc_host_lock);
-	err = idr_get_new(&mmc_host_idr, host, &host->index);
+	err = idr_alloc(&mmc_host_idr, host, 0, 0, GFP_NOWAIT);
+	if (err >= 0)
+		host->index = err;
 	spin_unlock(&mmc_host_lock);
-	if (err)
+	idr_preload_end();
+	if (err < 0)
 		goto free;
 
 	dev_set_name(&host->class_dev, "mmc%d", host->index);
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 60063ccb4c4b..98342213ed21 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1453,7 +1453,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 		if (!sg_miter_next(sg_miter))
 			goto done;
 
-		host->sg = sg_miter->__sg;
+		host->sg = sg_miter->piter.sg;
 		buf = sg_miter->addr;
 		remain = sg_miter->length;
 		offset = 0;
@@ -1508,7 +1508,7 @@ static void dw_mci_write_data_pio(struct dw_mci *host)
 		if (!sg_miter_next(sg_miter))
 			goto done;
 
-		host->sg = sg_miter->__sg;
+		host->sg = sg_miter->piter.sg;
 		buf = sg_miter->addr;
 		remain = sg_miter->length;
 		offset = 0;
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 03f2eb5627ec..557bec599f4f 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -74,8 +74,8 @@ config MTD_REDBOOT_PARTS_READONLY
 endif # MTD_REDBOOT_PARTS
 
 config MTD_CMDLINE_PARTS
-	bool "Command line partition table parsing"
-	depends on MTD = "y"
+	tristate "Command line partition table parsing"
+	depends on MTD
 	---help---
 	  Allow generic configuration of the MTD partition tables via the kernel
 	  command line. Multiple flash resources are supported for hardware where
diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c
index 7c057a05adb6..ddc0a4287a4b 100644
--- a/drivers/mtd/ar7part.c
+++ b/drivers/mtd/ar7part.c
@@ -142,7 +142,13 @@ static int __init ar7_parser_init(void)
 	return register_mtd_parser(&ar7_parser);
 }
 
+static void __exit ar7_parser_exit(void)
+{
+	deregister_mtd_parser(&ar7_parser);
+}
+
 module_init(ar7_parser_init);
+module_exit(ar7_parser_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR(	"Felix Fietkau <nbd@openwrt.org>, "
diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c
index e06d782489a6..63feb75cc8e0 100644
--- a/drivers/mtd/bcm47xxpart.c
+++ b/drivers/mtd/bcm47xxpart.c
@@ -14,17 +14,11 @@
 #include <linux/slab.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
-#include <asm/mach-bcm47xx/nvram.h>
+#include <bcm47xx_nvram.h>
 
 /* 10 parts were found on sflash on Netgear WNDR4500 */
 #define BCM47XXPART_MAX_PARTS		12
 
-/*
- * Amount of bytes we read when analyzing each block of flash memory.
- * Set it big enough to allow detecting partition and reading important data.
- */
-#define BCM47XXPART_BYTES_TO_READ	0x404
-
 /* Magics */
 #define BOARD_DATA_MAGIC		0x5246504D	/* MPFR */
 #define POT_MAGIC1			0x54544f50	/* POTT */
@@ -59,13 +53,21 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 	uint32_t *buf;
 	size_t bytes_read;
 	uint32_t offset;
-	uint32_t blocksize = 0x10000;
+	uint32_t blocksize = master->erasesize;
 	struct trx_header *trx;
+	int trx_part = -1;
+	int last_trx_part = -1;
+	int max_bytes_to_read = 0x8004;
+
+	if (blocksize <= 0x10000)
+		blocksize = 0x10000;
+	if (blocksize == 0x20000)
+		max_bytes_to_read = 0x18004;
 
 	/* Alloc */
 	parts = kzalloc(sizeof(struct mtd_partition) * BCM47XXPART_MAX_PARTS,
 			GFP_KERNEL);
-	buf = kzalloc(BCM47XXPART_BYTES_TO_READ, GFP_KERNEL);
+	buf = kzalloc(max_bytes_to_read, GFP_KERNEL);
 
 	/* Parse block by block looking for magics */
 	for (offset = 0; offset <= master->size - blocksize;
@@ -80,7 +82,7 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 		}
 
 		/* Read beginning of the block */
-		if (mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ,
+		if (mtd_read(master, offset, max_bytes_to_read,
 			     &bytes_read, (uint8_t *)buf) < 0) {
 			pr_err("mtd_read error while parsing (offset: 0x%X)!\n",
 			       offset);
@@ -95,9 +97,16 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 		}
 
 		/* Standard NVRAM */
-		if (buf[0x000 / 4] == NVRAM_HEADER) {
+		if (buf[0x000 / 4] == NVRAM_HEADER ||
+		    buf[0x1000 / 4] == NVRAM_HEADER ||
+		    buf[0x8000 / 4] == NVRAM_HEADER ||
+		    (blocksize == 0x20000 && (
+		      buf[0x10000 / 4] == NVRAM_HEADER ||
+		      buf[0x11000 / 4] == NVRAM_HEADER ||
+		      buf[0x18000 / 4] == NVRAM_HEADER))) {
 			bcm47xxpart_add_part(&parts[curr_part++], "nvram",
 					     offset, 0);
+			offset = rounddown(offset, blocksize);
 			continue;
 		}
 
@@ -131,6 +140,10 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 		if (buf[0x000 / 4] == TRX_MAGIC) {
 			trx = (struct trx_header *)buf;
 
+			trx_part = curr_part;
+			bcm47xxpart_add_part(&parts[curr_part++], "firmware",
+					     offset, 0);
+
 			i = 0;
 			/* We have LZMA loader if offset[2] points to sth */
 			if (trx->offset[2]) {
@@ -154,6 +167,8 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 					     offset + trx->offset[i], 0);
 			i++;
 
+			last_trx_part = curr_part - 1;
+
 			/*
 			 * We have whole TRX scanned, skip to the next part. Use
 			 * roundown (not roundup), as the loop will increase
@@ -169,11 +184,15 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 	 * Assume that partitions end at the beginning of the one they are
 	 * followed by.
 	 */
-	for (i = 0; i < curr_part - 1; i++)
-		parts[i].size = parts[i + 1].offset - parts[i].offset;
-	if (curr_part > 0)
-		parts[curr_part - 1].size =
-				master->size - parts[curr_part - 1].offset;
+	for (i = 0; i < curr_part; i++) {
+		u64 next_part_offset = (i < curr_part - 1) ?
+				       parts[i + 1].offset : master->size;
+
+		parts[i].size = next_part_offset - parts[i].offset;
+		if (i == last_trx_part && trx_part >= 0)
+			parts[trx_part].size = next_part_offset -
+					       parts[trx_part].offset;
+	}
 
 	*pparts = parts;
 	return curr_part;
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index b86197286f24..fff665d59a0d 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -33,6 +33,8 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/reboot.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/mtd/map.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/cfi.h>
@@ -74,6 +76,10 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad
 static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
+static int cfi_ppb_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+static int cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+static int cfi_ppb_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+
 static struct mtd_chip_driver cfi_amdstd_chipdrv = {
 	.probe		= NULL, /* Not usable directly */
 	.destroy	= cfi_amdstd_destroy,
@@ -496,6 +502,7 @@ static void cfi_fixup_m29ew_delay_after_resume(struct cfi_private *cfi)
 struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
+	struct device_node __maybe_unused *np = map->device_node;
 	struct mtd_info *mtd;
 	int i;
 
@@ -570,6 +577,17 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
 			cfi_tell_features(extp);
 #endif
 
+#ifdef CONFIG_OF
+			if (np && of_property_read_bool(
+				    np, "use-advanced-sector-protection")
+			    && extp->BlkProtUnprot == 8) {
+				printk(KERN_INFO "  Advanced Sector Protection (PPB Locking) supported\n");
+				mtd->_lock = cfi_ppb_lock;
+				mtd->_unlock = cfi_ppb_unlock;
+				mtd->_is_locked = cfi_ppb_is_locked;
+			}
+#endif
+
 			bootloc = extp->TopBottom;
 			if ((bootloc < 2) || (bootloc > 5)) {
 				printk(KERN_WARNING "%s: CFI contains unrecognised boot "
@@ -2172,6 +2190,205 @@ static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return cfi_varsize_frob(mtd, do_atmel_unlock, ofs, len, NULL);
 }
 
+/*
+ * Advanced Sector Protection - PPB (Persistent Protection Bit) locking
+ */
+
+struct ppb_lock {
+	struct flchip *chip;
+	loff_t offset;
+	int locked;
+};
+
+#define MAX_SECTORS			512
+
+#define DO_XXLOCK_ONEBLOCK_LOCK		((void *)1)
+#define DO_XXLOCK_ONEBLOCK_UNLOCK	((void *)2)
+#define DO_XXLOCK_ONEBLOCK_GETLOCK	((void *)3)
+
+static int __maybe_unused do_ppb_xxlock(struct map_info *map,
+					struct flchip *chip,
+					unsigned long adr, int len, void *thunk)
+{
+	struct cfi_private *cfi = map->fldrv_priv;
+	unsigned long timeo;
+	int ret;
+
+	mutex_lock(&chip->mutex);
+	ret = get_chip(map, chip, adr + chip->start, FL_LOCKING);
+	if (ret) {
+		mutex_unlock(&chip->mutex);
+		return ret;
+	}
+
+	pr_debug("MTD %s(): XXLOCK 0x%08lx len %d\n", __func__, adr, len);
+
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	/* PPB entry command */
+	cfi_send_gen_cmd(0xC0, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+
+	if (thunk == DO_XXLOCK_ONEBLOCK_LOCK) {
+		chip->state = FL_LOCKING;
+		map_write(map, CMD(0xA0), chip->start + adr);
+		map_write(map, CMD(0x00), chip->start + adr);
+	} else if (thunk == DO_XXLOCK_ONEBLOCK_UNLOCK) {
+		/*
+		 * Unlocking of one specific sector is not supported, so we
+		 * have to unlock all sectors of this device instead
+		 */
+		chip->state = FL_UNLOCKING;
+		map_write(map, CMD(0x80), chip->start);
+		map_write(map, CMD(0x30), chip->start);
+	} else if (thunk == DO_XXLOCK_ONEBLOCK_GETLOCK) {
+		chip->state = FL_JEDEC_QUERY;
+		/* Return locked status: 0->locked, 1->unlocked */
+		ret = !cfi_read_query(map, adr);
+	} else
+		BUG();
+
+	/*
+	 * Wait for some time as unlocking of all sectors takes quite long
+	 */
+	timeo = jiffies + msecs_to_jiffies(2000);	/* 2s max (un)locking */
+	for (;;) {
+		if (chip_ready(map, adr))
+			break;
+
+		if (time_after(jiffies, timeo)) {
+			printk(KERN_ERR "Waiting for chip to be ready timed out.\n");
+			ret = -EIO;
+			break;
+		}
+
+		UDELAY(map, chip, adr, 1);
+	}
+
+	/* Exit BC commands */
+	map_write(map, CMD(0x90), chip->start);
+	map_write(map, CMD(0x00), chip->start);
+
+	chip->state = FL_READY;
+	put_chip(map, chip, adr + chip->start);
+	mutex_unlock(&chip->mutex);
+
+	return ret;
+}
+
+static int __maybe_unused cfi_ppb_lock(struct mtd_info *mtd, loff_t ofs,
+				       uint64_t len)
+{
+	return cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len,
+				DO_XXLOCK_ONEBLOCK_LOCK);
+}
+
+static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs,
+					 uint64_t len)
+{
+	struct mtd_erase_region_info *regions = mtd->eraseregions;
+	struct map_info *map = mtd->priv;
+	struct cfi_private *cfi = map->fldrv_priv;
+	struct ppb_lock *sect;
+	unsigned long adr;
+	loff_t offset;
+	uint64_t length;
+	int chipnum;
+	int i;
+	int sectors;
+	int ret;
+
+	/*
+	 * PPB unlocking always unlocks all sectors of the flash chip.
+	 * We need to re-lock all previously locked sectors. So lets
+	 * first check the locking status of all sectors and save
+	 * it for future use.
+	 */
+	sect = kzalloc(MAX_SECTORS * sizeof(struct ppb_lock), GFP_KERNEL);
+	if (!sect)
+		return -ENOMEM;
+
+	/*
+	 * This code to walk all sectors is a slightly modified version
+	 * of the cfi_varsize_frob() code.
+	 */
+	i = 0;
+	chipnum = 0;
+	adr = 0;
+	sectors = 0;
+	offset = 0;
+	length = mtd->size;
+
+	while (length) {
+		int size = regions[i].erasesize;
+
+		/*
+		 * Only test sectors that shall not be unlocked. The other
+		 * sectors shall be unlocked, so lets keep their locking
+		 * status at "unlocked" (locked=0) for the final re-locking.
+		 */
+		if ((adr < ofs) || (adr >= (ofs + len))) {
+			sect[sectors].chip = &cfi->chips[chipnum];
+			sect[sectors].offset = offset;
+			sect[sectors].locked = do_ppb_xxlock(
+				map, &cfi->chips[chipnum], adr, 0,
+				DO_XXLOCK_ONEBLOCK_GETLOCK);
+		}
+
+		adr += size;
+		offset += size;
+		length -= size;
+
+		if (offset == regions[i].offset + size * regions[i].numblocks)
+			i++;
+
+		if (adr >> cfi->chipshift) {
+			adr = 0;
+			chipnum++;
+
+			if (chipnum >= cfi->numchips)
+				break;
+		}
+
+		sectors++;
+		if (sectors >= MAX_SECTORS) {
+			printk(KERN_ERR "Only %d sectors for PPB locking supported!\n",
+			       MAX_SECTORS);
+			kfree(sect);
+			return -EINVAL;
+		}
+	}
+
+	/* Now unlock the whole chip */
+	ret = cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len,
+			       DO_XXLOCK_ONEBLOCK_UNLOCK);
+	if (ret) {
+		kfree(sect);
+		return ret;
+	}
+
+	/*
+	 * PPB unlocking always unlocks all sectors of the flash chip.
+	 * We need to re-lock all previously locked sectors.
+	 */
+	for (i = 0; i < sectors; i++) {
+		if (sect[i].locked)
+			do_ppb_xxlock(map, sect[i].chip, sect[i].offset, 0,
+				      DO_XXLOCK_ONEBLOCK_LOCK);
+	}
+
+	kfree(sect);
+	return ret;
+}
+
+static int __maybe_unused cfi_ppb_is_locked(struct mtd_info *mtd, loff_t ofs,
+					    uint64_t len)
+{
+	return cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len,
+				DO_XXLOCK_ONEBLOCK_GETLOCK) ? 1 : 0;
+}
 
 static void cfi_amdstd_sync (struct mtd_info *mtd)
 {
diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c
index c533f27d863f..721caebbc5cc 100644
--- a/drivers/mtd/cmdlinepart.c
+++ b/drivers/mtd/cmdlinepart.c
@@ -22,11 +22,22 @@
  *
  * mtdparts=<mtddef>[;<mtddef]
  * <mtddef>  := <mtd-id>:<partdef>[,<partdef>]
- *              where <mtd-id> is the name from the "cat /proc/mtd" command
- * <partdef> := <size>[@offset][<name>][ro][lk]
+ * <partdef> := <size>[@<offset>][<name>][ro][lk]
  * <mtd-id>  := unique name used in mapping driver/device (mtd->name)
  * <size>    := standard linux memsize OR "-" to denote all remaining space
+ *              size is automatically truncated at end of device
+ *              if specified or trucated size is 0 the part is skipped
+ * <offset>  := standard linux memsize
+ *              if omitted the part will immediately follow the previous part
+ *              or 0 if the first part
  * <name>    := '(' NAME ')'
+ *              NAME will appear in /proc/mtd
+ *
+ * <size> and <offset> can be specified such that the parts are out of order
+ * in physical memory and may even overlap.
+ *
+ * The parts are assigned MTD numbers in the order they are specified in the
+ * command line regardless of their order in physical memory.
  *
  * Examples:
  *
@@ -70,6 +81,7 @@ struct cmdline_mtd_partition {
 static struct cmdline_mtd_partition *partitions;
 
 /* the command line passed to mtdpart_setup() */
+static char *mtdparts;
 static char *cmdline;
 static int cmdline_parsed;
 
@@ -330,6 +342,14 @@ static int parse_cmdline_partitions(struct mtd_info *master,
 		if (part->parts[i].size == SIZE_REMAINING)
 			part->parts[i].size = master->size - offset;
 
+		if (offset + part->parts[i].size > master->size) {
+			printk(KERN_WARNING ERRP
+			       "%s: partitioning exceeds flash size, truncating\n",
+			       part->mtd_id);
+			part->parts[i].size = master->size - offset;
+		}
+		offset += part->parts[i].size;
+
 		if (part->parts[i].size == 0) {
 			printk(KERN_WARNING ERRP
 			       "%s: skipping zero sized partition\n",
@@ -337,16 +357,8 @@ static int parse_cmdline_partitions(struct mtd_info *master,
 			part->num_parts--;
 			memmove(&part->parts[i], &part->parts[i + 1],
 				sizeof(*part->parts) * (part->num_parts - i));
-			continue;
-		}
-
-		if (offset + part->parts[i].size > master->size) {
-			printk(KERN_WARNING ERRP
-			       "%s: partitioning exceeds flash size, truncating\n",
-			       part->mtd_id);
-			part->parts[i].size = master->size - offset;
+			i--;
 		}
-		offset += part->parts[i].size;
 	}
 
 	*pparts = kmemdup(part->parts, sizeof(*part->parts) * part->num_parts,
@@ -365,7 +377,7 @@ static int parse_cmdline_partitions(struct mtd_info *master,
  *
  * This function needs to be visible for bootloaders.
  */
-static int mtdpart_setup(char *s)
+static int __init mtdpart_setup(char *s)
 {
 	cmdline = s;
 	return 1;
@@ -381,10 +393,21 @@ static struct mtd_part_parser cmdline_parser = {
 
 static int __init cmdline_parser_init(void)
 {
+	if (mtdparts)
+		mtdpart_setup(mtdparts);
 	return register_mtd_parser(&cmdline_parser);
 }
 
+static void __exit cmdline_parser_exit(void)
+{
+	deregister_mtd_parser(&cmdline_parser);
+}
+
 module_init(cmdline_parser_init);
+module_exit(cmdline_parser_exit);
+
+MODULE_PARM_DESC(mtdparts, "Partitioning specification");
+module_param(mtdparts, charp, 0);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marius Groeger <mag@sysgo.de>");
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index 395733a30ef4..369a1943ca25 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -17,8 +17,10 @@ obj-$(CONFIG_MTD_LART)		+= lart.o
 obj-$(CONFIG_MTD_BLOCK2MTD)	+= block2mtd.o
 obj-$(CONFIG_MTD_DATAFLASH)	+= mtd_dataflash.o
 obj-$(CONFIG_MTD_M25P80)	+= m25p80.o
+obj-$(CONFIG_MTD_NAND_OMAP_BCH)	+= elm.o
 obj-$(CONFIG_MTD_SPEAR_SMI)	+= spear_smi.o
 obj-$(CONFIG_MTD_SST25L)	+= sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)	+= bcm47xxsflash.o
 
-CFLAGS_docg3.o			+= -I$(src)
-\ No newline at end of file
+
+CFLAGS_docg3.o			+= -I$(src)
diff --git a/drivers/mtd/devices/bcm47xxsflash.c b/drivers/mtd/devices/bcm47xxsflash.c
index 4714584aa993..95266285acb1 100644
--- a/drivers/mtd/devices/bcm47xxsflash.c
+++ b/drivers/mtd/devices/bcm47xxsflash.c
@@ -5,6 +5,8 @@
 #include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
 
+#include "bcm47xxsflash.h"
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Serial flash driver for BCMA bus");
 
@@ -13,26 +15,28 @@ static const char *probes[] = { "bcm47xxpart", NULL };
 static int bcm47xxsflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 			      size_t *retlen, u_char *buf)
 {
-	struct bcma_sflash *sflash = mtd->priv;
+	struct bcm47xxsflash *b47s = mtd->priv;
 
 	/* Check address range */
 	if ((from + len) > mtd->size)
 		return -EINVAL;
 
-	memcpy_fromio(buf, (void __iomem *)KSEG0ADDR(sflash->window + from),
+	memcpy_fromio(buf, (void __iomem *)KSEG0ADDR(b47s->window + from),
 		      len);
+	*retlen = len;
 
 	return len;
 }
 
-static void bcm47xxsflash_fill_mtd(struct bcma_sflash *sflash,
-				   struct mtd_info *mtd)
+static void bcm47xxsflash_fill_mtd(struct bcm47xxsflash *b47s)
 {
-	mtd->priv = sflash;
+	struct mtd_info *mtd = &b47s->mtd;
+
+	mtd->priv = b47s;
 	mtd->name = "bcm47xxsflash";
 	mtd->owner = THIS_MODULE;
 	mtd->type = MTD_ROM;
-	mtd->size = sflash->size;
+	mtd->size = b47s->size;
 	mtd->_read = bcm47xxsflash_read;
 
 	/* TODO: implement writing support and verify/change following code */
@@ -40,19 +44,30 @@ static void bcm47xxsflash_fill_mtd(struct bcma_sflash *sflash,
 	mtd->writebufsize = mtd->writesize = 1;
 }
 
-static int bcm47xxsflash_probe(struct platform_device *pdev)
+/**************************************************
+ * BCMA
+ **************************************************/
+
+static int bcm47xxsflash_bcma_probe(struct platform_device *pdev)
 {
 	struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev);
+	struct bcm47xxsflash *b47s;
 	int err;
 
-	sflash->mtd = kzalloc(sizeof(struct mtd_info), GFP_KERNEL);
-	if (!sflash->mtd) {
+	b47s = kzalloc(sizeof(*b47s), GFP_KERNEL);
+	if (!b47s) {
 		err = -ENOMEM;
 		goto out;
 	}
-	bcm47xxsflash_fill_mtd(sflash, sflash->mtd);
+	sflash->priv = b47s;
 
-	err = mtd_device_parse_register(sflash->mtd, probes, NULL, NULL, 0);
+	b47s->window = sflash->window;
+	b47s->blocksize = sflash->blocksize;
+	b47s->numblocks = sflash->numblocks;
+	b47s->size = sflash->size;
+	bcm47xxsflash_fill_mtd(b47s);
+
+	err = mtd_device_parse_register(&b47s->mtd, probes, NULL, NULL, 0);
 	if (err) {
 		pr_err("Failed to register MTD device: %d\n", err);
 		goto err_dev_reg;
@@ -61,34 +76,40 @@ static int bcm47xxsflash_probe(struct platform_device *pdev)
 	return 0;
 
 err_dev_reg:
-	kfree(sflash->mtd);
+	kfree(&b47s->mtd);
 out:
 	return err;
 }
 
-static int bcm47xxsflash_remove(struct platform_device *pdev)
+static int bcm47xxsflash_bcma_remove(struct platform_device *pdev)
 {
 	struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev);
+	struct bcm47xxsflash *b47s = sflash->priv;
 
-	mtd_device_unregister(sflash->mtd);
-	kfree(sflash->mtd);
+	mtd_device_unregister(&b47s->mtd);
+	kfree(b47s);
 
 	return 0;
 }
 
 static struct platform_driver bcma_sflash_driver = {
-	.remove = bcm47xxsflash_remove,
+	.probe	= bcm47xxsflash_bcma_probe,
+	.remove = bcm47xxsflash_bcma_remove,
 	.driver = {
 		.name = "bcma_sflash",
 		.owner = THIS_MODULE,
 	},
 };
 
+/**************************************************
+ * Init
+ **************************************************/
+
 static int __init bcm47xxsflash_init(void)
 {
 	int err;
 
-	err = platform_driver_probe(&bcma_sflash_driver, bcm47xxsflash_probe);
+	err = platform_driver_register(&bcma_sflash_driver);
 	if (err)
 		pr_err("Failed to register BCMA serial flash driver: %d\n",
 		       err);
diff --git a/drivers/mtd/devices/bcm47xxsflash.h b/drivers/mtd/devices/bcm47xxsflash.h
new file mode 100644
index 000000000000..ebf6f710e23c
--- /dev/null
+++ b/drivers/mtd/devices/bcm47xxsflash.h
@@ -0,0 +1,15 @@
+#ifndef __BCM47XXSFLASH_H
+#define __BCM47XXSFLASH_H
+
+#include <linux/mtd/mtd.h>
+
+struct bcm47xxsflash {
+	u32 window;
+	u32 blocksize;
+	u16 numblocks;
+	u32 size;
+
+	struct mtd_info mtd;
+};
+
+#endif /* BCM47XXSFLASH */
diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c
new file mode 100644
index 000000000000..2ec5da9ee248
--- /dev/null
+++ b/drivers/mtd/devices/elm.c
@@ -0,0 +1,404 @@
+/*
+ * Error Location Module
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/pm_runtime.h>
+#include <linux/platform_data/elm.h>
+
+#define ELM_IRQSTATUS			0x018
+#define ELM_IRQENABLE			0x01c
+#define ELM_LOCATION_CONFIG		0x020
+#define ELM_PAGE_CTRL			0x080
+#define ELM_SYNDROME_FRAGMENT_0		0x400
+#define ELM_SYNDROME_FRAGMENT_6		0x418
+#define ELM_LOCATION_STATUS		0x800
+#define ELM_ERROR_LOCATION_0		0x880
+
+/* ELM Interrupt Status Register */
+#define INTR_STATUS_PAGE_VALID		BIT(8)
+
+/* ELM Interrupt Enable Register */
+#define INTR_EN_PAGE_MASK		BIT(8)
+
+/* ELM Location Configuration Register */
+#define ECC_BCH_LEVEL_MASK		0x3
+
+/* ELM syndrome */
+#define ELM_SYNDROME_VALID		BIT(16)
+
+/* ELM_LOCATION_STATUS Register */
+#define ECC_CORRECTABLE_MASK		BIT(8)
+#define ECC_NB_ERRORS_MASK		0x1f
+
+/* ELM_ERROR_LOCATION_0-15 Registers */
+#define ECC_ERROR_LOCATION_MASK		0x1fff
+
+#define ELM_ECC_SIZE			0x7ff
+
+#define SYNDROME_FRAGMENT_REG_SIZE	0x40
+#define ERROR_LOCATION_SIZE		0x100
+
+struct elm_info {
+	struct device *dev;
+	void __iomem *elm_base;
+	struct completion elm_completion;
+	struct list_head list;
+	enum bch_ecc bch_type;
+};
+
+static LIST_HEAD(elm_devices);
+
+static void elm_write_reg(struct elm_info *info, int offset, u32 val)
+{
+	writel(val, info->elm_base + offset);
+}
+
+static u32 elm_read_reg(struct elm_info *info, int offset)
+{
+	return readl(info->elm_base + offset);
+}
+
+/**
+ * elm_config - Configure ELM module
+ * @dev:	ELM device
+ * @bch_type:	Type of BCH ecc
+ */
+void elm_config(struct device *dev, enum bch_ecc bch_type)
+{
+	u32 reg_val;
+	struct elm_info *info = dev_get_drvdata(dev);
+
+	reg_val = (bch_type & ECC_BCH_LEVEL_MASK) | (ELM_ECC_SIZE << 16);
+	elm_write_reg(info, ELM_LOCATION_CONFIG, reg_val);
+	info->bch_type = bch_type;
+}
+EXPORT_SYMBOL(elm_config);
+
+/**
+ * elm_configure_page_mode - Enable/Disable page mode
+ * @info:	elm info
+ * @index:	index number of syndrome fragment vector
+ * @enable:	enable/disable flag for page mode
+ *
+ * Enable page mode for syndrome fragment index
+ */
+static void elm_configure_page_mode(struct elm_info *info, int index,
+		bool enable)
+{
+	u32 reg_val;
+
+	reg_val = elm_read_reg(info, ELM_PAGE_CTRL);
+	if (enable)
+		reg_val |= BIT(index);	/* enable page mode */
+	else
+		reg_val &= ~BIT(index);	/* disable page mode */
+
+	elm_write_reg(info, ELM_PAGE_CTRL, reg_val);
+}
+
+/**
+ * elm_load_syndrome - Load ELM syndrome reg
+ * @info:	elm info
+ * @err_vec:	elm error vectors
+ * @ecc:	buffer with calculated ecc
+ *
+ * Load syndrome fragment registers with calculated ecc in reverse order.
+ */
+static void elm_load_syndrome(struct elm_info *info,
+		struct elm_errorvec *err_vec, u8 *ecc)
+{
+	int i, offset;
+	u32 val;
+
+	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
+
+		/* Check error reported */
+		if (err_vec[i].error_reported) {
+			elm_configure_page_mode(info, i, true);
+			offset = ELM_SYNDROME_FRAGMENT_0 +
+				SYNDROME_FRAGMENT_REG_SIZE * i;
+
+			/* BCH8 */
+			if (info->bch_type) {
+
+				/* syndrome fragment 0 = ecc[9-12B] */
+				val = cpu_to_be32(*(u32 *) &ecc[9]);
+				elm_write_reg(info, offset, val);
+
+				/* syndrome fragment 1 = ecc[5-8B] */
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[5]);
+				elm_write_reg(info, offset, val);
+
+				/* syndrome fragment 2 = ecc[1-4B] */
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[1]);
+				elm_write_reg(info, offset, val);
+
+				/* syndrome fragment 3 = ecc[0B] */
+				offset += 4;
+				val = ecc[0];
+				elm_write_reg(info, offset, val);
+			} else {
+				/* syndrome fragment 0 = ecc[20-52b] bits */
+				val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) |
+					((ecc[2] & 0xf) << 28);
+				elm_write_reg(info, offset, val);
+
+				/* syndrome fragment 1 = ecc[0-20b] bits */
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12;
+				elm_write_reg(info, offset, val);
+			}
+		}
+
+		/* Update ecc pointer with ecc byte size */
+		ecc += info->bch_type ? BCH8_SIZE : BCH4_SIZE;
+	}
+}
+
+/**
+ * elm_start_processing - start elm syndrome processing
+ * @info:	elm info
+ * @err_vec:	elm error vectors
+ *
+ * Set syndrome valid bit for syndrome fragment registers for which
+ * elm syndrome fragment registers are loaded. This enables elm module
+ * to start processing syndrome vectors.
+ */
+static void elm_start_processing(struct elm_info *info,
+		struct elm_errorvec *err_vec)
+{
+	int i, offset;
+	u32 reg_val;
+
+	/*
+	 * Set syndrome vector valid, so that ELM module
+	 * will process it for vectors error is reported
+	 */
+	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
+		if (err_vec[i].error_reported) {
+			offset = ELM_SYNDROME_FRAGMENT_6 +
+				SYNDROME_FRAGMENT_REG_SIZE * i;
+			reg_val = elm_read_reg(info, offset);
+			reg_val |= ELM_SYNDROME_VALID;
+			elm_write_reg(info, offset, reg_val);
+		}
+	}
+}
+
+/**
+ * elm_error_correction - locate correctable error position
+ * @info:	elm info
+ * @err_vec:	elm error vectors
+ *
+ * On completion of processing by elm module, error location status
+ * register updated with correctable/uncorrectable error information.
+ * In case of correctable errors, number of errors located from
+ * elm location status register & read the positions from
+ * elm error location register.
+ */
+static void elm_error_correction(struct elm_info *info,
+		struct elm_errorvec *err_vec)
+{
+	int i, j, errors = 0;
+	int offset;
+	u32 reg_val;
+
+	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
+
+		/* Check error reported */
+		if (err_vec[i].error_reported) {
+			offset = ELM_LOCATION_STATUS + ERROR_LOCATION_SIZE * i;
+			reg_val = elm_read_reg(info, offset);
+
+			/* Check correctable error or not */
+			if (reg_val & ECC_CORRECTABLE_MASK) {
+				offset = ELM_ERROR_LOCATION_0 +
+					ERROR_LOCATION_SIZE * i;
+
+				/* Read count of correctable errors */
+				err_vec[i].error_count = reg_val &
+					ECC_NB_ERRORS_MASK;
+
+				/* Update the error locations in error vector */
+				for (j = 0; j < err_vec[i].error_count; j++) {
+
+					reg_val = elm_read_reg(info, offset);
+					err_vec[i].error_loc[j] = reg_val &
+						ECC_ERROR_LOCATION_MASK;
+
+					/* Update error location register */
+					offset += 4;
+				}
+
+				errors += err_vec[i].error_count;
+			} else {
+				err_vec[i].error_uncorrectable = true;
+			}
+
+			/* Clearing interrupts for processed error vectors */
+			elm_write_reg(info, ELM_IRQSTATUS, BIT(i));
+
+			/* Disable page mode */
+			elm_configure_page_mode(info, i, false);
+		}
+	}
+}
+
+/**
+ * elm_decode_bch_error_page - Locate error position
+ * @dev:	device pointer
+ * @ecc_calc:	calculated ECC bytes from GPMC
+ * @err_vec:	elm error vectors
+ *
+ * Called with one or more error reported vectors & vectors with
+ * error reported is updated in err_vec[].error_reported
+ */
+void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,
+		struct elm_errorvec *err_vec)
+{
+	struct elm_info *info = dev_get_drvdata(dev);
+	u32 reg_val;
+
+	/* Enable page mode interrupt */
+	reg_val = elm_read_reg(info, ELM_IRQSTATUS);
+	elm_write_reg(info, ELM_IRQSTATUS, reg_val & INTR_STATUS_PAGE_VALID);
+	elm_write_reg(info, ELM_IRQENABLE, INTR_EN_PAGE_MASK);
+
+	/* Load valid ecc byte to syndrome fragment register */
+	elm_load_syndrome(info, err_vec, ecc_calc);
+
+	/* Enable syndrome processing for which syndrome fragment is updated */
+	elm_start_processing(info, err_vec);
+
+	/* Wait for ELM module to finish locating error correction */
+	wait_for_completion(&info->elm_completion);
+
+	/* Disable page mode interrupt */
+	reg_val = elm_read_reg(info, ELM_IRQENABLE);
+	elm_write_reg(info, ELM_IRQENABLE, reg_val & ~INTR_EN_PAGE_MASK);
+	elm_error_correction(info, err_vec);
+}
+EXPORT_SYMBOL(elm_decode_bch_error_page);
+
+static irqreturn_t elm_isr(int this_irq, void *dev_id)
+{
+	u32 reg_val;
+	struct elm_info *info = dev_id;
+
+	reg_val = elm_read_reg(info, ELM_IRQSTATUS);
+
+	/* All error vectors processed */
+	if (reg_val & INTR_STATUS_PAGE_VALID) {
+		elm_write_reg(info, ELM_IRQSTATUS,
+				reg_val & INTR_STATUS_PAGE_VALID);
+		complete(&info->elm_completion);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static int elm_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct resource *res, *irq;
+	struct elm_info *info;
+
+	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		dev_err(&pdev->dev, "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	info->dev = &pdev->dev;
+
+	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq) {
+		dev_err(&pdev->dev, "no irq resource defined\n");
+		return -ENODEV;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no memory resource defined\n");
+		return -ENODEV;
+	}
+
+	info->elm_base = devm_request_and_ioremap(&pdev->dev, res);
+	if (!info->elm_base)
+		return -EADDRNOTAVAIL;
+
+	ret = devm_request_irq(&pdev->dev, irq->start, elm_isr, 0,
+			pdev->name, info);
+	if (ret) {
+		dev_err(&pdev->dev, "failure requesting irq %i\n", irq->start);
+		return ret;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	if (pm_runtime_get_sync(&pdev->dev)) {
+		ret = -EINVAL;
+		pm_runtime_disable(&pdev->dev);
+		dev_err(&pdev->dev, "can't enable clock\n");
+		return ret;
+	}
+
+	init_completion(&info->elm_completion);
+	INIT_LIST_HEAD(&info->list);
+	list_add(&info->list, &elm_devices);
+	platform_set_drvdata(pdev, info);
+	return ret;
+}
+
+static int elm_remove(struct platform_device *pdev)
+{
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id elm_of_match[] = {
+	{ .compatible = "ti,am3352-elm" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, elm_of_match);
+#endif
+
+static struct platform_driver elm_driver = {
+	.driver	= {
+		.name	= "elm",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(elm_of_match),
+	},
+	.probe	= elm_probe,
+	.remove	= elm_remove,
+};
+
+module_platform_driver(elm_driver);
+
+MODULE_DESCRIPTION("ELM driver for BCH error correction");
+MODULE_AUTHOR("Texas Instruments");
+MODULE_ALIAS("platform: elm");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 4eeeb2d7f6ea..5b6b0728be21 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -565,6 +565,96 @@ time_out:
 	return ret;
 }
 
+static int m25p80_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	struct m25p *flash = mtd_to_m25p(mtd);
+	uint32_t offset = ofs;
+	uint8_t status_old, status_new;
+	int res = 0;
+
+	mutex_lock(&flash->lock);
+	/* Wait until finished previous command */
+	if (wait_till_ready(flash)) {
+		res = 1;
+		goto err;
+	}
+
+	status_old = read_sr(flash);
+
+	if (offset < flash->mtd.size-(flash->mtd.size/2))
+		status_new = status_old | SR_BP2 | SR_BP1 | SR_BP0;
+	else if (offset < flash->mtd.size-(flash->mtd.size/4))
+		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
+	else if (offset < flash->mtd.size-(flash->mtd.size/8))
+		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
+	else if (offset < flash->mtd.size-(flash->mtd.size/16))
+		status_new = (status_old & ~(SR_BP0|SR_BP1)) | SR_BP2;
+	else if (offset < flash->mtd.size-(flash->mtd.size/32))
+		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
+	else if (offset < flash->mtd.size-(flash->mtd.size/64))
+		status_new = (status_old & ~(SR_BP2|SR_BP0)) | SR_BP1;
+	else
+		status_new = (status_old & ~(SR_BP2|SR_BP1)) | SR_BP0;
+
+	/* Only modify protection if it will not unlock other areas */
+	if ((status_new&(SR_BP2|SR_BP1|SR_BP0)) >
+					(status_old&(SR_BP2|SR_BP1|SR_BP0))) {
+		write_enable(flash);
+		if (write_sr(flash, status_new) < 0) {
+			res = 1;
+			goto err;
+		}
+	}
+
+err:	mutex_unlock(&flash->lock);
+	return res;
+}
+
+static int m25p80_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	struct m25p *flash = mtd_to_m25p(mtd);
+	uint32_t offset = ofs;
+	uint8_t status_old, status_new;
+	int res = 0;
+
+	mutex_lock(&flash->lock);
+	/* Wait until finished previous command */
+	if (wait_till_ready(flash)) {
+		res = 1;
+		goto err;
+	}
+
+	status_old = read_sr(flash);
+
+	if (offset+len > flash->mtd.size-(flash->mtd.size/64))
+		status_new = status_old & ~(SR_BP2|SR_BP1|SR_BP0);
+	else if (offset+len > flash->mtd.size-(flash->mtd.size/32))
+		status_new = (status_old & ~(SR_BP2|SR_BP1)) | SR_BP0;
+	else if (offset+len > flash->mtd.size-(flash->mtd.size/16))
+		status_new = (status_old & ~(SR_BP2|SR_BP0)) | SR_BP1;
+	else if (offset+len > flash->mtd.size-(flash->mtd.size/8))
+		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
+	else if (offset+len > flash->mtd.size-(flash->mtd.size/4))
+		status_new = (status_old & ~(SR_BP0|SR_BP1)) | SR_BP2;
+	else if (offset+len > flash->mtd.size-(flash->mtd.size/2))
+		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
+	else
+		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
+
+	/* Only modify protection if it will not lock other areas */
+	if ((status_new&(SR_BP2|SR_BP1|SR_BP0)) <
+					(status_old&(SR_BP2|SR_BP1|SR_BP0))) {
+		write_enable(flash);
+		if (write_sr(flash, status_new) < 0) {
+			res = 1;
+			goto err;
+		}
+	}
+
+err:	mutex_unlock(&flash->lock);
+	return res;
+}
+
 /****************************************************************************/
 
 /*
@@ -642,6 +732,10 @@ static const struct spi_device_id m25p_ids[] = {
 	/* Everspin */
 	{ "mr25h256", CAT25_INFO(  32 * 1024, 1, 256, 2) },
 
+	/* GigaDevice */
+	{ "gd25q32", INFO(0xc84016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) },
+
 	/* Intel/Numonyx -- xxxs33b */
 	{ "160s33b",  INFO(0x898911, 0, 64 * 1024,  32, 0) },
 	{ "320s33b",  INFO(0x898912, 0, 64 * 1024,  64, 0) },
@@ -899,6 +993,12 @@ static int m25p_probe(struct spi_device *spi)
 	flash->mtd._erase = m25p80_erase;
 	flash->mtd._read = m25p80_read;
 
+	/* flash protection support for STmicro chips */
+	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) {
+		flash->mtd._lock = m25p80_lock;
+		flash->mtd._unlock = m25p80_unlock;
+	}
+
 	/* sst flash chips use AAI word program */
 	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_SST)
 		flash->mtd._write = sst_write;
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 62ba82c396c2..3ed17c4d4358 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -429,7 +429,7 @@ config MTD_GPIO_ADDR
 
 config MTD_UCLINUX
 	bool "Generic uClinux RAM/ROM filesystem support"
-	depends on MTD_RAM=y && (!MMU || COLDFIRE)
+	depends on (MTD_RAM=y || MTD_ROM=y) && (!MMU || COLDFIRE)
 	help
 	  Map driver to support image based filesystems for uClinux.
 
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 7901d72c9242..363939dfad05 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -68,9 +68,6 @@ static int of_flash_remove(struct platform_device *dev)
 			kfree(info->list[i].res);
 		}
 	}
-
-	kfree(info);
-
 	return 0;
 }
 
@@ -199,8 +196,9 @@ static int of_flash_probe(struct platform_device *dev)
 	map_indirect = of_property_read_bool(dp, "no-unaligned-direct-access");
 
 	err = -ENOMEM;
-	info = kzalloc(sizeof(struct of_flash) +
-		       sizeof(struct of_flash_list) * count, GFP_KERNEL);
+	info = devm_kzalloc(&dev->dev,
+			    sizeof(struct of_flash) +
+			    sizeof(struct of_flash_list) * count, GFP_KERNEL);
 	if (!info)
 		goto err_flash_remove;
 
@@ -241,6 +239,7 @@ static int of_flash_probe(struct platform_device *dev)
 		info->list[i].map.phys = res.start;
 		info->list[i].map.size = res_size;
 		info->list[i].map.bankwidth = be32_to_cpup(width);
+		info->list[i].map.device_node = dp;
 
 		err = -ENOMEM;
 		info->list[i].map.virt = ioremap(info->list[i].map.phys,
diff --git a/drivers/mtd/maps/uclinux.c b/drivers/mtd/maps/uclinux.c
index 299bf88a6f41..c1af83db5202 100644
--- a/drivers/mtd/maps/uclinux.c
+++ b/drivers/mtd/maps/uclinux.c
@@ -23,12 +23,26 @@
 
 /****************************************************************************/
 
+#ifdef CONFIG_MTD_ROM
+#define MAP_NAME "rom"
+#else
+#define MAP_NAME "ram"
+#endif
+
+/*
+ * Blackfin uses uclinux_ram_map during startup, so it must not be static.
+ * Provide a dummy declaration to make sparse happy.
+ */
+extern struct map_info uclinux_ram_map;
+
 struct map_info uclinux_ram_map = {
-	.name = "RAM",
-	.phys = (unsigned long)__bss_stop,
+	.name = MAP_NAME,
 	.size = 0,
 };
 
+static unsigned long physaddr = -1;
+module_param(physaddr, ulong, S_IRUGO);
+
 static struct mtd_info *uclinux_ram_mtdinfo;
 
 /****************************************************************************/
@@ -60,11 +74,17 @@ static int __init uclinux_mtd_init(void)
 	struct map_info *mapp;
 
 	mapp = &uclinux_ram_map;
+
+	if (physaddr == -1)
+		mapp->phys = (resource_size_t)__bss_stop;
+	else
+		mapp->phys = physaddr;
+
 	if (!mapp->size)
 		mapp->size = PAGE_ALIGN(ntohl(*((unsigned long *)(mapp->phys + 8))));
 	mapp->bankwidth = 4;
 
-	printk("uclinux[mtd]: RAM probe address=0x%x size=0x%x\n",
+	printk("uclinux[mtd]: probe address=0x%x size=0x%x\n",
 	       	(int) mapp->phys, (int) mapp->size);
 
 	/*
@@ -82,7 +102,7 @@ static int __init uclinux_mtd_init(void)
 
 	simple_map_init(mapp);
 
-	mtd = do_map_probe("map_ram", mapp);
+	mtd = do_map_probe("map_" MAP_NAME, mapp);
 	if (!mtd) {
 		printk("uclinux[mtd]: failed to find a mapping?\n");
 		return(-ENXIO);
@@ -118,6 +138,6 @@ module_exit(uclinux_mtd_cleanup);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Greg Ungerer <gerg@snapgear.com>");
-MODULE_DESCRIPTION("Generic RAM based MTD for uClinux");
+MODULE_DESCRIPTION("Generic MTD for uClinux");
 
 /****************************************************************************/
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index ec794a72975d..61d5f56473e1 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -349,13 +349,8 @@ int add_mtd_device(struct mtd_info *mtd)
 	BUG_ON(mtd->writesize == 0);
 	mutex_lock(&mtd_table_mutex);
 
-	do {
-		if (!idr_pre_get(&mtd_idr, GFP_KERNEL))
-			goto fail_locked;
-		error = idr_get_new(&mtd_idr, mtd, &i);
-	} while (error == -EAGAIN);
-
-	if (error)
+	i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL);
+	if (i < 0)
 		goto fail_locked;
 
 	mtd->index = i;
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index c516a9408087..ffcbcca2fd2d 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -101,6 +101,8 @@ struct atmel_nand_host {
 	u8			pmecc_corr_cap;
 	u16			pmecc_sector_size;
 	u32			pmecc_lookup_table_offset;
+	u32			pmecc_lookup_table_offset_512;
+	u32			pmecc_lookup_table_offset_1024;
 
 	int			pmecc_bytes_per_sector;
 	int			pmecc_sector_number;
@@ -908,6 +910,84 @@ static void atmel_pmecc_core_init(struct mtd_info *mtd)
 	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE);
 }
 
+/*
+ * Get ECC requirement in ONFI parameters, returns -1 if ONFI
+ * parameters is not supported.
+ * return 0 if success to get the ECC requirement.
+ */
+static int get_onfi_ecc_param(struct nand_chip *chip,
+		int *ecc_bits, int *sector_size)
+{
+	*ecc_bits = *sector_size = 0;
+
+	if (chip->onfi_params.ecc_bits == 0xff)
+		/* TODO: the sector_size and ecc_bits need to be find in
+		 * extended ecc parameter, currently we don't support it.
+		 */
+		return -1;
+
+	*ecc_bits = chip->onfi_params.ecc_bits;
+
+	/* The default sector size (ecc codeword size) is 512 */
+	*sector_size = 512;
+
+	return 0;
+}
+
+/*
+ * Get ecc requirement from ONFI parameters ecc requirement.
+ * If pmecc-cap, pmecc-sector-size in DTS are not specified, this function
+ * will set them according to ONFI ecc requirement. Otherwise, use the
+ * value in DTS file.
+ * return 0 if success. otherwise return error code.
+ */
+static int pmecc_choose_ecc(struct atmel_nand_host *host,
+		int *cap, int *sector_size)
+{
+	/* Get ECC requirement from ONFI parameters */
+	*cap = *sector_size = 0;
+	if (host->nand_chip.onfi_version) {
+		if (!get_onfi_ecc_param(&host->nand_chip, cap, sector_size))
+			dev_info(host->dev, "ONFI params, minimum required ECC: %d bits in %d bytes\n",
+				*cap, *sector_size);
+		else
+			dev_info(host->dev, "NAND chip ECC reqirement is in Extended ONFI parameter, we don't support yet.\n");
+	} else {
+		dev_info(host->dev, "NAND chip is not ONFI compliant, assume ecc_bits is 2 in 512 bytes");
+	}
+	if (*cap == 0 && *sector_size == 0) {
+		*cap = 2;
+		*sector_size = 512;
+	}
+
+	/* If dts file doesn't specify then use the one in ONFI parameters */
+	if (host->pmecc_corr_cap == 0) {
+		/* use the most fitable ecc bits (the near bigger one ) */
+		if (*cap <= 2)
+			host->pmecc_corr_cap = 2;
+		else if (*cap <= 4)
+			host->pmecc_corr_cap = 4;
+		else if (*cap < 8)
+			host->pmecc_corr_cap = 8;
+		else if (*cap < 12)
+			host->pmecc_corr_cap = 12;
+		else if (*cap < 24)
+			host->pmecc_corr_cap = 24;
+		else
+			return -EINVAL;
+	}
+	if (host->pmecc_sector_size == 0) {
+		/* use the most fitable sector size (the near smaller one ) */
+		if (*sector_size >= 1024)
+			host->pmecc_sector_size = 1024;
+		else if (*sector_size >= 512)
+			host->pmecc_sector_size = 512;
+		else
+			return -EINVAL;
+	}
+	return 0;
+}
+
 static int __init atmel_pmecc_nand_init_params(struct platform_device *pdev,
 					 struct atmel_nand_host *host)
 {
@@ -916,8 +996,22 @@ static int __init atmel_pmecc_nand_init_params(struct platform_device *pdev,
 	struct resource *regs, *regs_pmerr, *regs_rom;
 	int cap, sector_size, err_no;
 
+	err_no = pmecc_choose_ecc(host, &cap, &sector_size);
+	if (err_no) {
+		dev_err(host->dev, "The NAND flash's ECC requirement are not support!");
+		return err_no;
+	}
+
+	if (cap != host->pmecc_corr_cap ||
+			sector_size != host->pmecc_sector_size)
+		dev_info(host->dev, "WARNING: Be Caution! Using different PMECC parameters from Nand ONFI ECC reqirement.\n");
+
 	cap = host->pmecc_corr_cap;
 	sector_size = host->pmecc_sector_size;
+	host->pmecc_lookup_table_offset = (sector_size == 512) ?
+			host->pmecc_lookup_table_offset_512 :
+			host->pmecc_lookup_table_offset_1024;
+
 	dev_info(host->dev, "Initialize PMECC params, cap: %d, sector: %d\n",
 		 cap, sector_size);
 
@@ -1215,7 +1309,7 @@ static void atmel_nand_hwctl(struct mtd_info *mtd, int mode)
 static int atmel_of_init_port(struct atmel_nand_host *host,
 			      struct device_node *np)
 {
-	u32 val, table_offset;
+	u32 val;
 	u32 offset[2];
 	int ecc_mode;
 	struct atmel_nand_data *board = &host->board;
@@ -1259,42 +1353,41 @@ static int atmel_of_init_port(struct atmel_nand_host *host,
 
 	/* use PMECC, get correction capability, sector size and lookup
 	 * table offset.
+	 * If correction bits and sector size are not specified, then find
+	 * them from NAND ONFI parameters.
 	 */
-	if (of_property_read_u32(np, "atmel,pmecc-cap", &val) != 0) {
-		dev_err(host->dev, "Cannot decide PMECC Capability\n");
-		return -EINVAL;
-	} else if ((val != 2) && (val != 4) && (val != 8) && (val != 12) &&
-	    (val != 24)) {
-		dev_err(host->dev,
-			"Unsupported PMECC correction capability: %d; should be 2, 4, 8, 12 or 24\n",
-			val);
-		return -EINVAL;
+	if (of_property_read_u32(np, "atmel,pmecc-cap", &val) == 0) {
+		if ((val != 2) && (val != 4) && (val != 8) && (val != 12) &&
+				(val != 24)) {
+			dev_err(host->dev,
+				"Unsupported PMECC correction capability: %d; should be 2, 4, 8, 12 or 24\n",
+				val);
+			return -EINVAL;
+		}
+		host->pmecc_corr_cap = (u8)val;
 	}
-	host->pmecc_corr_cap = (u8)val;
 
-	if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) != 0) {
-		dev_err(host->dev, "Cannot decide PMECC Sector Size\n");
-		return -EINVAL;
-	} else if ((val != 512) && (val != 1024)) {
-		dev_err(host->dev,
-			"Unsupported PMECC sector size: %d; should be 512 or 1024 bytes\n",
-			val);
-		return -EINVAL;
+	if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) == 0) {
+		if ((val != 512) && (val != 1024)) {
+			dev_err(host->dev,
+				"Unsupported PMECC sector size: %d; should be 512 or 1024 bytes\n",
+				val);
+			return -EINVAL;
+		}
+		host->pmecc_sector_size = (u16)val;
 	}
-	host->pmecc_sector_size = (u16)val;
 
 	if (of_property_read_u32_array(np, "atmel,pmecc-lookup-table-offset",
 			offset, 2) != 0) {
 		dev_err(host->dev, "Cannot get PMECC lookup table offset\n");
 		return -EINVAL;
 	}
-	table_offset = host->pmecc_sector_size == 512 ? offset[0] : offset[1];
-
-	if (!table_offset) {
+	if (!offset[0] && !offset[1]) {
 		dev_err(host->dev, "Invalid PMECC lookup table offset\n");
 		return -EINVAL;
 	}
-	host->pmecc_lookup_table_offset = table_offset;
+	host->pmecc_lookup_table_offset_512 = offset[0];
+	host->pmecc_lookup_table_offset_1024 = offset[1];
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h b/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h
index 0bdb2ce4da75..c005a62330b1 100644
--- a/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h
+++ b/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h
@@ -1,6 +1,10 @@
 #ifndef __BCM47XXNFLASH_H
 #define __BCM47XXNFLASH_H
 
+#ifndef pr_fmt
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+#endif
+
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 
diff --git a/drivers/mtd/nand/bcm47xxnflash/main.c b/drivers/mtd/nand/bcm47xxnflash/main.c
index 8363a9a5fa3f..7bae569fdc79 100644
--- a/drivers/mtd/nand/bcm47xxnflash/main.c
+++ b/drivers/mtd/nand/bcm47xxnflash/main.c
@@ -9,14 +9,14 @@
  *
  */
 
+#include "bcm47xxnflash.h"
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
 
-#include "bcm47xxnflash.h"
-
 MODULE_DESCRIPTION("NAND flash driver for BCMA bus");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rafał Miłecki");
@@ -77,6 +77,7 @@ static int bcm47xxnflash_remove(struct platform_device *pdev)
 }
 
 static struct platform_driver bcm47xxnflash_driver = {
+	.probe	= bcm47xxnflash_probe,
 	.remove = bcm47xxnflash_remove,
 	.driver = {
 		.name = "bcma_nflash",
@@ -88,13 +89,10 @@ static int __init bcm47xxnflash_init(void)
 {
 	int err;
 
-	/*
-	 * Platform device "bcma_nflash" exists on SoCs and is registered very
-	 * early, it won't be added during runtime (use platform_driver_probe).
-	 */
-	err = platform_driver_probe(&bcm47xxnflash_driver, bcm47xxnflash_probe);
+	err = platform_driver_register(&bcm47xxnflash_driver);
 	if (err)
-		pr_err("Failed to register serial flash driver: %d\n", err);
+		pr_err("Failed to register bcm47xx nand flash driver: %d\n",
+		       err);
 
 	return err;
 }
diff --git a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
index 595de4012e71..b2ab373c9eef 100644
--- a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
@@ -9,13 +9,13 @@
  *
  */
 
+#include "bcm47xxnflash.h"
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/bcma/bcma.h>
 
-#include "bcm47xxnflash.h"
-
 /* Broadcom uses 1'000'000 but it seems to be too many. Tests on WNDR4500 has
  * shown ~1000 retries as maxiumum. */
 #define NFLASH_READY_RETRIES		10000
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index feae55c7b880..94e17af8e450 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -606,7 +606,7 @@ static int __init nand_davinci_probe(struct platform_device *pdev)
 	if (pdev->id < 0 || pdev->id > 3)
 		return -ENODEV;
 
-	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		dev_err(&pdev->dev, "unable to allocate memory\n");
 		ret = -ENOMEM;
@@ -623,11 +623,11 @@ static int __init nand_davinci_probe(struct platform_device *pdev)
 		goto err_nomem;
 	}
 
-	vaddr = ioremap(res1->start, resource_size(res1));
-	base = ioremap(res2->start, resource_size(res2));
+	vaddr = devm_request_and_ioremap(&pdev->dev, res1);
+	base = devm_request_and_ioremap(&pdev->dev, res2);
 	if (!vaddr || !base) {
 		dev_err(&pdev->dev, "ioremap failed\n");
-		ret = -EINVAL;
+		ret = -EADDRNOTAVAIL;
 		goto err_ioremap;
 	}
 
@@ -717,7 +717,7 @@ static int __init nand_davinci_probe(struct platform_device *pdev)
 	}
 	info->chip.ecc.mode = ecc_mode;
 
-	info->clk = clk_get(&pdev->dev, "aemif");
+	info->clk = devm_clk_get(&pdev->dev, "aemif");
 	if (IS_ERR(info->clk)) {
 		ret = PTR_ERR(info->clk);
 		dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret);
@@ -845,8 +845,6 @@ err_timing:
 	clk_disable_unprepare(info->clk);
 
 err_clk_enable:
-	clk_put(info->clk);
-
 	spin_lock_irq(&davinci_nand_lock);
 	if (ecc_mode == NAND_ECC_HW_SYNDROME)
 		ecc4_busy = false;
@@ -855,13 +853,7 @@ err_clk_enable:
 err_ecc:
 err_clk:
 err_ioremap:
-	if (base)
-		iounmap(base);
-	if (vaddr)
-		iounmap(vaddr);
-
 err_nomem:
-	kfree(info);
 	return ret;
 }
 
@@ -874,15 +866,9 @@ static int __exit nand_davinci_remove(struct platform_device *pdev)
 		ecc4_busy = false;
 	spin_unlock_irq(&davinci_nand_lock);
 
-	iounmap(info->base);
-	iounmap(info->vaddr);
-
 	nand_release(&info->mtd);
 
 	clk_disable_unprepare(info->clk);
-	clk_put(info->clk);
-
-	kfree(info);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index ad6222627fed..f1f7f12ab501 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -176,8 +176,8 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob)
 
 	ifc_nand_ctrl->page = page_addr;
 	/* Program ROW0/COL0 */
-	out_be32(&ifc->ifc_nand.row0, page_addr);
-	out_be32(&ifc->ifc_nand.col0, (oob ? IFC_NAND_COL_MS : 0) | column);
+	iowrite32be(page_addr, &ifc->ifc_nand.row0);
+	iowrite32be((oob ? IFC_NAND_COL_MS : 0) | column, &ifc->ifc_nand.col0);
 
 	buf_num = page_addr & priv->bufnum_mask;
 
@@ -239,18 +239,19 @@ static void fsl_ifc_run_command(struct mtd_info *mtd)
 	int i;
 
 	/* set the chip select for NAND Transaction */
-	out_be32(&ifc->ifc_nand.nand_csel, priv->bank << IFC_NAND_CSEL_SHIFT);
+	iowrite32be(priv->bank << IFC_NAND_CSEL_SHIFT,
+		    &ifc->ifc_nand.nand_csel);
 
 	dev_vdbg(priv->dev,
 			"%s: fir0=%08x fcr0=%08x\n",
 			__func__,
-			in_be32(&ifc->ifc_nand.nand_fir0),
-			in_be32(&ifc->ifc_nand.nand_fcr0));
+			ioread32be(&ifc->ifc_nand.nand_fir0),
+			ioread32be(&ifc->ifc_nand.nand_fcr0));
 
 	ctrl->nand_stat = 0;
 
 	/* start read/write seq */
-	out_be32(&ifc->ifc_nand.nandseq_strt, IFC_NAND_SEQ_STRT_FIR_STRT);
+	iowrite32be(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt);
 
 	/* wait for command complete flag or timeout */
 	wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat,
@@ -273,7 +274,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd)
 		int sector_end = sector + chip->ecc.steps - 1;
 
 		for (i = sector / 4; i <= sector_end / 4; i++)
-			eccstat[i] = in_be32(&ifc->ifc_nand.nand_eccstat[i]);
+			eccstat[i] = ioread32be(&ifc->ifc_nand.nand_eccstat[i]);
 
 		for (i = sector; i <= sector_end; i++) {
 			errors = check_read_ecc(mtd, ctrl, eccstat, i);
@@ -313,31 +314,33 @@ static void fsl_ifc_do_read(struct nand_chip *chip,
 
 	/* Program FIR/IFC_NAND_FCR0 for Small/Large page */
 	if (mtd->writesize > 512) {
-		out_be32(&ifc->ifc_nand.nand_fir0,
-			 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
-			 (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) |
-			 (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) |
-			 (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP3_SHIFT) |
-			 (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP4_SHIFT));
-		out_be32(&ifc->ifc_nand.nand_fir1, 0x0);
-
-		out_be32(&ifc->ifc_nand.nand_fcr0,
-			(NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT) |
-			(NAND_CMD_READSTART << IFC_NAND_FCR0_CMD1_SHIFT));
+		iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+			    (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) |
+			    (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) |
+			    (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP3_SHIFT) |
+			    (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP4_SHIFT),
+			    &ifc->ifc_nand.nand_fir0);
+		iowrite32be(0x0, &ifc->ifc_nand.nand_fir1);
+
+		iowrite32be((NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT) |
+			    (NAND_CMD_READSTART << IFC_NAND_FCR0_CMD1_SHIFT),
+			    &ifc->ifc_nand.nand_fcr0);
 	} else {
-		out_be32(&ifc->ifc_nand.nand_fir0,
-			 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
-			 (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) |
-			 (IFC_FIR_OP_RA0  << IFC_NAND_FIR0_OP2_SHIFT) |
-			 (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP3_SHIFT));
-		out_be32(&ifc->ifc_nand.nand_fir1, 0x0);
+		iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+			    (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) |
+			    (IFC_FIR_OP_RA0  << IFC_NAND_FIR0_OP2_SHIFT) |
+			    (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP3_SHIFT),
+			    &ifc->ifc_nand.nand_fir0);
+		iowrite32be(0x0, &ifc->ifc_nand.nand_fir1);
 
 		if (oob)
-			out_be32(&ifc->ifc_nand.nand_fcr0,
-				 NAND_CMD_READOOB << IFC_NAND_FCR0_CMD0_SHIFT);
+			iowrite32be(NAND_CMD_READOOB <<
+				    IFC_NAND_FCR0_CMD0_SHIFT,
+				    &ifc->ifc_nand.nand_fcr0);
 		else
-			out_be32(&ifc->ifc_nand.nand_fcr0,
-				NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT);
+			iowrite32be(NAND_CMD_READ0 <<
+				    IFC_NAND_FCR0_CMD0_SHIFT,
+				    &ifc->ifc_nand.nand_fcr0);
 	}
 }
 
@@ -357,7 +360,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 	switch (command) {
 	/* READ0 read the entire buffer to use hardware ECC. */
 	case NAND_CMD_READ0:
-		out_be32(&ifc->ifc_nand.nand_fbcr, 0);
+		iowrite32be(0, &ifc->ifc_nand.nand_fbcr);
 		set_addr(mtd, 0, page_addr, 0);
 
 		ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize;
@@ -372,7 +375,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 
 	/* READOOB reads only the OOB because no ECC is performed. */
 	case NAND_CMD_READOOB:
-		out_be32(&ifc->ifc_nand.nand_fbcr, mtd->oobsize - column);
+		iowrite32be(mtd->oobsize - column, &ifc->ifc_nand.nand_fbcr);
 		set_addr(mtd, column, page_addr, 1);
 
 		ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize;
@@ -388,19 +391,19 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 		if (command == NAND_CMD_PARAM)
 			timing = IFC_FIR_OP_RBCD;
 
-		out_be32(&ifc->ifc_nand.nand_fir0,
-				(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
-				(IFC_FIR_OP_UA  << IFC_NAND_FIR0_OP1_SHIFT) |
-				(timing << IFC_NAND_FIR0_OP2_SHIFT));
-		out_be32(&ifc->ifc_nand.nand_fcr0,
-				command << IFC_NAND_FCR0_CMD0_SHIFT);
-		out_be32(&ifc->ifc_nand.row3, column);
+		iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+			    (IFC_FIR_OP_UA  << IFC_NAND_FIR0_OP1_SHIFT) |
+			    (timing << IFC_NAND_FIR0_OP2_SHIFT),
+			    &ifc->ifc_nand.nand_fir0);
+		iowrite32be(command << IFC_NAND_FCR0_CMD0_SHIFT,
+			    &ifc->ifc_nand.nand_fcr0);
+		iowrite32be(column, &ifc->ifc_nand.row3);
 
 		/*
 		 * although currently it's 8 bytes for READID, we always read
 		 * the maximum 256 bytes(for PARAM)
 		 */
-		out_be32(&ifc->ifc_nand.nand_fbcr, 256);
+		iowrite32be(256, &ifc->ifc_nand.nand_fbcr);
 		ifc_nand_ctrl->read_bytes = 256;
 
 		set_addr(mtd, 0, 0, 0);
@@ -415,16 +418,16 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 
 	/* ERASE2 uses the block and page address from ERASE1 */
 	case NAND_CMD_ERASE2:
-		out_be32(&ifc->ifc_nand.nand_fir0,
-			 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
-			 (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP1_SHIFT) |
-			 (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP2_SHIFT));
+		iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+			    (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP1_SHIFT) |
+			    (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP2_SHIFT),
+			    &ifc->ifc_nand.nand_fir0);
 
-		out_be32(&ifc->ifc_nand.nand_fcr0,
-			 (NAND_CMD_ERASE1 << IFC_NAND_FCR0_CMD0_SHIFT) |
-			 (NAND_CMD_ERASE2 << IFC_NAND_FCR0_CMD1_SHIFT));
+		iowrite32be((NAND_CMD_ERASE1 << IFC_NAND_FCR0_CMD0_SHIFT) |
+			    (NAND_CMD_ERASE2 << IFC_NAND_FCR0_CMD1_SHIFT),
+			    &ifc->ifc_nand.nand_fcr0);
 
-		out_be32(&ifc->ifc_nand.nand_fbcr, 0);
+		iowrite32be(0, &ifc->ifc_nand.nand_fbcr);
 		ifc_nand_ctrl->read_bytes = 0;
 		fsl_ifc_run_command(mtd);
 		return;
@@ -440,26 +443,28 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 				(NAND_CMD_SEQIN << IFC_NAND_FCR0_CMD0_SHIFT) |
 				(NAND_CMD_PAGEPROG << IFC_NAND_FCR0_CMD1_SHIFT);
 
-			out_be32(&ifc->ifc_nand.nand_fir0,
-				 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
-				 (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) |
-				 (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) |
-				 (IFC_FIR_OP_WBCD  << IFC_NAND_FIR0_OP3_SHIFT) |
-				 (IFC_FIR_OP_CW1 << IFC_NAND_FIR0_OP4_SHIFT));
+			iowrite32be(
+				(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+				(IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) |
+				(IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) |
+				(IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP3_SHIFT) |
+				(IFC_FIR_OP_CW1 << IFC_NAND_FIR0_OP4_SHIFT),
+				&ifc->ifc_nand.nand_fir0);
 		} else {
 			nand_fcr0 = ((NAND_CMD_PAGEPROG <<
 					IFC_NAND_FCR0_CMD1_SHIFT) |
 				    (NAND_CMD_SEQIN <<
 					IFC_NAND_FCR0_CMD2_SHIFT));
 
-			out_be32(&ifc->ifc_nand.nand_fir0,
-				 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
-				 (IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP1_SHIFT) |
-				 (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP2_SHIFT) |
-				 (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP3_SHIFT) |
-				 (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP4_SHIFT));
-			out_be32(&ifc->ifc_nand.nand_fir1,
-				 (IFC_FIR_OP_CW1 << IFC_NAND_FIR1_OP5_SHIFT));
+			iowrite32be(
+				(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+				(IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP1_SHIFT) |
+				(IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP2_SHIFT) |
+				(IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP3_SHIFT) |
+				(IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP4_SHIFT),
+				&ifc->ifc_nand.nand_fir0);
+			iowrite32be(IFC_FIR_OP_CW1 << IFC_NAND_FIR1_OP5_SHIFT,
+				    &ifc->ifc_nand.nand_fir1);
 
 			if (column >= mtd->writesize)
 				nand_fcr0 |=
@@ -474,7 +479,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 			column -= mtd->writesize;
 			ifc_nand_ctrl->oob = 1;
 		}
-		out_be32(&ifc->ifc_nand.nand_fcr0, nand_fcr0);
+		iowrite32be(nand_fcr0, &ifc->ifc_nand.nand_fcr0);
 		set_addr(mtd, column, page_addr, ifc_nand_ctrl->oob);
 		return;
 	}
@@ -482,10 +487,11 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 	/* PAGEPROG reuses all of the setup from SEQIN and adds the length */
 	case NAND_CMD_PAGEPROG: {
 		if (ifc_nand_ctrl->oob) {
-			out_be32(&ifc->ifc_nand.nand_fbcr,
-				ifc_nand_ctrl->index - ifc_nand_ctrl->column);
+			iowrite32be(ifc_nand_ctrl->index -
+				    ifc_nand_ctrl->column,
+				    &ifc->ifc_nand.nand_fbcr);
 		} else {
-			out_be32(&ifc->ifc_nand.nand_fbcr, 0);
+			iowrite32be(0, &ifc->ifc_nand.nand_fbcr);
 		}
 
 		fsl_ifc_run_command(mtd);
@@ -493,12 +499,12 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 	}
 
 	case NAND_CMD_STATUS:
-		out_be32(&ifc->ifc_nand.nand_fir0,
-				(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
-				(IFC_FIR_OP_RB << IFC_NAND_FIR0_OP1_SHIFT));
-		out_be32(&ifc->ifc_nand.nand_fcr0,
-				NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT);
-		out_be32(&ifc->ifc_nand.nand_fbcr, 1);
+		iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+			    (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP1_SHIFT),
+			    &ifc->ifc_nand.nand_fir0);
+		iowrite32be(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT,
+			    &ifc->ifc_nand.nand_fcr0);
+		iowrite32be(1, &ifc->ifc_nand.nand_fbcr);
 		set_addr(mtd, 0, 0, 0);
 		ifc_nand_ctrl->read_bytes = 1;
 
@@ -512,10 +518,10 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 		return;
 
 	case NAND_CMD_RESET:
-		out_be32(&ifc->ifc_nand.nand_fir0,
-				IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT);
-		out_be32(&ifc->ifc_nand.nand_fcr0,
-				NAND_CMD_RESET << IFC_NAND_FCR0_CMD0_SHIFT);
+		iowrite32be(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT,
+			    &ifc->ifc_nand.nand_fir0);
+		iowrite32be(NAND_CMD_RESET << IFC_NAND_FCR0_CMD0_SHIFT,
+			    &ifc->ifc_nand.nand_fcr0);
 		fsl_ifc_run_command(mtd);
 		return;
 
@@ -639,18 +645,18 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip)
 	u32 nand_fsr;
 
 	/* Use READ_STATUS command, but wait for the device to be ready */
-	out_be32(&ifc->ifc_nand.nand_fir0,
-		 (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
-		 (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR0_OP1_SHIFT));
-	out_be32(&ifc->ifc_nand.nand_fcr0, NAND_CMD_STATUS <<
-			IFC_NAND_FCR0_CMD0_SHIFT);
-	out_be32(&ifc->ifc_nand.nand_fbcr, 1);
+	iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+		    (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR0_OP1_SHIFT),
+		    &ifc->ifc_nand.nand_fir0);
+	iowrite32be(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT,
+		    &ifc->ifc_nand.nand_fcr0);
+	iowrite32be(1, &ifc->ifc_nand.nand_fbcr);
 	set_addr(mtd, 0, 0, 0);
 	ifc_nand_ctrl->read_bytes = 1;
 
 	fsl_ifc_run_command(mtd);
 
-	nand_fsr = in_be32(&ifc->ifc_nand.nand_fsr);
+	nand_fsr = ioread32be(&ifc->ifc_nand.nand_fsr);
 
 	/*
 	 * The chip always seems to report that it is
@@ -744,34 +750,34 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv)
 	uint32_t cs = priv->bank;
 
 	/* Save CSOR and CSOR_ext */
-	csor = in_be32(&ifc->csor_cs[cs].csor);
-	csor_ext = in_be32(&ifc->csor_cs[cs].csor_ext);
+	csor = ioread32be(&ifc->csor_cs[cs].csor);
+	csor_ext = ioread32be(&ifc->csor_cs[cs].csor_ext);
 
 	/* chage PageSize 8K and SpareSize 1K*/
 	csor_8k = (csor & ~(CSOR_NAND_PGS_MASK)) | 0x0018C000;
-	out_be32(&ifc->csor_cs[cs].csor, csor_8k);
-	out_be32(&ifc->csor_cs[cs].csor_ext, 0x0000400);
+	iowrite32be(csor_8k, &ifc->csor_cs[cs].csor);
+	iowrite32be(0x0000400, &ifc->csor_cs[cs].csor_ext);
 
 	/* READID */
-	out_be32(&ifc->ifc_nand.nand_fir0,
-			(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
-			(IFC_FIR_OP_UA  << IFC_NAND_FIR0_OP1_SHIFT) |
-			(IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT));
-	out_be32(&ifc->ifc_nand.nand_fcr0,
-			NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT);
-	out_be32(&ifc->ifc_nand.row3, 0x0);
+	iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) |
+		    (IFC_FIR_OP_UA  << IFC_NAND_FIR0_OP1_SHIFT) |
+		    (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT),
+		    &ifc->ifc_nand.nand_fir0);
+	iowrite32be(NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT,
+		    &ifc->ifc_nand.nand_fcr0);
+	iowrite32be(0x0, &ifc->ifc_nand.row3);
 
-	out_be32(&ifc->ifc_nand.nand_fbcr, 0x0);
+	iowrite32be(0x0, &ifc->ifc_nand.nand_fbcr);
 
 	/* Program ROW0/COL0 */
-	out_be32(&ifc->ifc_nand.row0, 0x0);
-	out_be32(&ifc->ifc_nand.col0, 0x0);
+	iowrite32be(0x0, &ifc->ifc_nand.row0);
+	iowrite32be(0x0, &ifc->ifc_nand.col0);
 
 	/* set the chip select for NAND Transaction */
-	out_be32(&ifc->ifc_nand.nand_csel, cs << IFC_NAND_CSEL_SHIFT);
+	iowrite32be(cs << IFC_NAND_CSEL_SHIFT, &ifc->ifc_nand.nand_csel);
 
 	/* start read seq */
-	out_be32(&ifc->ifc_nand.nandseq_strt, IFC_NAND_SEQ_STRT_FIR_STRT);
+	iowrite32be(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt);
 
 	/* wait for command complete flag or timeout */
 	wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat,
@@ -781,8 +787,8 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv)
 		printk(KERN_ERR "fsl-ifc: Failed to Initialise SRAM\n");
 
 	/* Restore CSOR and CSOR_ext */
-	out_be32(&ifc->csor_cs[cs].csor, csor);
-	out_be32(&ifc->csor_cs[cs].csor_ext, csor_ext);
+	iowrite32be(csor, &ifc->csor_cs[cs].csor);
+	iowrite32be(csor_ext, &ifc->csor_cs[cs].csor_ext);
 }
 
 static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
@@ -799,7 +805,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 
 	/* fill in nand_chip structure */
 	/* set up function call table */
-	if ((in_be32(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16)
+	if ((ioread32be(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16)
 		chip->read_byte = fsl_ifc_read_byte16;
 	else
 		chip->read_byte = fsl_ifc_read_byte;
@@ -813,13 +819,13 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	chip->bbt_td = &bbt_main_descr;
 	chip->bbt_md = &bbt_mirror_descr;
 
-	out_be32(&ifc->ifc_nand.ncfgr, 0x0);
+	iowrite32be(0x0, &ifc->ifc_nand.ncfgr);
 
 	/* set up nand options */
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 
 
-	if (in_be32(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) {
+	if (ioread32be(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) {
 		chip->read_byte = fsl_ifc_read_byte16;
 		chip->options |= NAND_BUSWIDTH_16;
 	} else {
@@ -832,7 +838,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	chip->ecc.read_page = fsl_ifc_read_page;
 	chip->ecc.write_page = fsl_ifc_write_page;
 
-	csor = in_be32(&ifc->csor_cs[priv->bank].csor);
+	csor = ioread32be(&ifc->csor_cs[priv->bank].csor);
 
 	/* Hardware generates ECC per 512 Bytes */
 	chip->ecc.size = 512;
@@ -884,7 +890,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 		chip->ecc.mode = NAND_ECC_SOFT;
 	}
 
-	ver = in_be32(&ifc->ifc_rev);
+	ver = ioread32be(&ifc->ifc_rev);
 	if (ver == FSL_IFC_V1_1_0)
 		fsl_ifc_sram_init(priv);
 
@@ -910,7 +916,7 @@ static int fsl_ifc_chip_remove(struct fsl_ifc_mtd *priv)
 static int match_bank(struct fsl_ifc_regs __iomem *ifc, int bank,
 		      phys_addr_t addr)
 {
-	u32 cspr = in_be32(&ifc->cspr_cs[bank].cspr);
+	u32 cspr = ioread32be(&ifc->cspr_cs[bank].cspr);
 
 	if (!(cspr & CSPR_V))
 		return 0;
@@ -997,17 +1003,16 @@ static int fsl_ifc_nand_probe(struct platform_device *dev)
 
 	dev_set_drvdata(priv->dev, priv);
 
-	out_be32(&ifc->ifc_nand.nand_evter_en,
-			IFC_NAND_EVTER_EN_OPC_EN |
-			IFC_NAND_EVTER_EN_FTOER_EN |
-			IFC_NAND_EVTER_EN_WPER_EN);
+	iowrite32be(IFC_NAND_EVTER_EN_OPC_EN |
+		    IFC_NAND_EVTER_EN_FTOER_EN |
+		    IFC_NAND_EVTER_EN_WPER_EN,
+		    &ifc->ifc_nand.nand_evter_en);
 
 	/* enable NAND Machine Interrupts */
-	out_be32(&ifc->ifc_nand.nand_evter_intr_en,
-			IFC_NAND_EVTER_INTR_OPCIR_EN |
-			IFC_NAND_EVTER_INTR_FTOERIR_EN |
-			IFC_NAND_EVTER_INTR_WPERIR_EN);
-
+	iowrite32be(IFC_NAND_EVTER_INTR_OPCIR_EN |
+		    IFC_NAND_EVTER_INTR_FTOERIR_EN |
+		    IFC_NAND_EVTER_INTR_WPERIR_EN,
+		    &ifc->ifc_nand.nand_evter_intr_en);
 	priv->mtd.name = kasprintf(GFP_KERNEL, "%x.flash", (unsigned)res.start);
 	if (!priv->mtd.name) {
 		ret = -ENOMEM;
diff --git a/drivers/mtd/nand/gpmi-nand/bch-regs.h b/drivers/mtd/nand/gpmi-nand/bch-regs.h
index a0924515c396..588f5374047c 100644
--- a/drivers/mtd/nand/gpmi-nand/bch-regs.h
+++ b/drivers/mtd/nand/gpmi-nand/bch-regs.h
@@ -61,6 +61,16 @@
 			& BM_BCH_FLASH0LAYOUT0_ECC0)		\
 	)
 
+#define MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14	10
+#define MX6Q_BM_BCH_FLASH0LAYOUT0_GF_13_14			\
+				(0x1 << MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14)
+#define BF_BCH_FLASH0LAYOUT0_GF(v, x)				\
+	((GPMI_IS_MX6Q(x) && ((v) == 14))			\
+		? (((1) << MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14)	\
+			& MX6Q_BM_BCH_FLASH0LAYOUT0_GF_13_14)	\
+		: 0						\
+	)
+
 #define BP_BCH_FLASH0LAYOUT0_DATA0_SIZE		0
 #define BM_BCH_FLASH0LAYOUT0_DATA0_SIZE		\
 			(0xfff << BP_BCH_FLASH0LAYOUT0_DATA0_SIZE)
@@ -93,6 +103,16 @@
 			& BM_BCH_FLASH0LAYOUT1_ECCN)		\
 	)
 
+#define MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14	10
+#define MX6Q_BM_BCH_FLASH0LAYOUT1_GF_13_14			\
+				(0x1 << MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14)
+#define BF_BCH_FLASH0LAYOUT1_GF(v, x)				\
+	((GPMI_IS_MX6Q(x) && ((v) == 14))			\
+		? (((1) << MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14)	\
+			& MX6Q_BM_BCH_FLASH0LAYOUT1_GF_13_14)	\
+		: 0						\
+	)
+
 #define BP_BCH_FLASH0LAYOUT1_DATAN_SIZE		0
 #define BM_BCH_FLASH0LAYOUT1_DATAN_SIZE		\
 			(0xfff << BP_BCH_FLASH0LAYOUT1_DATAN_SIZE)
@@ -103,4 +123,6 @@
 		? (((v) >> 2) & MX6Q_BM_BCH_FLASH0LAYOUT1_DATAN_SIZE)	\
 		: ((v) & BM_BCH_FLASH0LAYOUT1_DATAN_SIZE)		\
 	)
+
+#define HW_BCH_VERSION				0x00000160
 #endif
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index d84699c7968e..4f8857fa48a7 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -208,6 +208,11 @@ void gpmi_dump_info(struct gpmi_nand_data *this)
 	}
 
 	/* start to print out the BCH info */
+	pr_err("Show BCH registers :\n");
+	for (i = 0; i <= HW_BCH_VERSION / 0x10 + 1; i++) {
+		reg = readl(r->bch_regs + i * 0x10);
+		pr_err("offset 0x%.3x : 0x%.8x\n", i * 0x10, reg);
+	}
 	pr_err("BCH Geometry :\n");
 	pr_err("GF length              : %u\n", geo->gf_len);
 	pr_err("ECC Strength           : %u\n", geo->ecc_strength);
@@ -232,6 +237,7 @@ int bch_set_geometry(struct gpmi_nand_data *this)
 	unsigned int metadata_size;
 	unsigned int ecc_strength;
 	unsigned int page_size;
+	unsigned int gf_len;
 	int ret;
 
 	if (common_nfc_set_geometry(this))
@@ -242,6 +248,7 @@ int bch_set_geometry(struct gpmi_nand_data *this)
 	metadata_size = bch_geo->metadata_size;
 	ecc_strength  = bch_geo->ecc_strength >> 1;
 	page_size     = bch_geo->page_size;
+	gf_len        = bch_geo->gf_len;
 
 	ret = gpmi_enable_clk(this);
 	if (ret)
@@ -263,11 +270,13 @@ int bch_set_geometry(struct gpmi_nand_data *this)
 	writel(BF_BCH_FLASH0LAYOUT0_NBLOCKS(block_count)
 			| BF_BCH_FLASH0LAYOUT0_META_SIZE(metadata_size)
 			| BF_BCH_FLASH0LAYOUT0_ECC0(ecc_strength, this)
+			| BF_BCH_FLASH0LAYOUT0_GF(gf_len, this)
 			| BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(block_size, this),
 			r->bch_regs + HW_BCH_FLASH0LAYOUT0);
 
 	writel(BF_BCH_FLASH0LAYOUT1_PAGE_SIZE(page_size)
 			| BF_BCH_FLASH0LAYOUT1_ECCN(ecc_strength, this)
+			| BF_BCH_FLASH0LAYOUT1_GF(gf_len, this)
 			| BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this),
 			r->bch_regs + HW_BCH_FLASH0LAYOUT1);
 
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index e9b1c47e3cf9..717881a3d1b8 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -94,6 +94,25 @@ static inline int get_ecc_strength(struct gpmi_nand_data *this)
 	return round_down(ecc_strength, 2);
 }
 
+static inline bool gpmi_check_ecc(struct gpmi_nand_data *this)
+{
+	struct bch_geometry *geo = &this->bch_geometry;
+
+	/* Do the sanity check. */
+	if (GPMI_IS_MX23(this) || GPMI_IS_MX28(this)) {
+		/* The mx23/mx28 only support the GF13. */
+		if (geo->gf_len == 14)
+			return false;
+
+		if (geo->ecc_strength > MXS_ECC_STRENGTH_MAX)
+			return false;
+	} else if (GPMI_IS_MX6Q(this)) {
+		if (geo->ecc_strength > MX6_ECC_STRENGTH_MAX)
+			return false;
+	}
+	return true;
+}
+
 int common_nfc_set_geometry(struct gpmi_nand_data *this)
 {
 	struct bch_geometry *geo = &this->bch_geometry;
@@ -112,17 +131,24 @@ int common_nfc_set_geometry(struct gpmi_nand_data *this)
 	/* The default for the length of Galois Field. */
 	geo->gf_len = 13;
 
-	/* The default for chunk size. There is no oobsize greater then 512. */
+	/* The default for chunk size. */
 	geo->ecc_chunk_size = 512;
-	while (geo->ecc_chunk_size < mtd->oobsize)
+	while (geo->ecc_chunk_size < mtd->oobsize) {
 		geo->ecc_chunk_size *= 2; /* keep C >= O */
+		geo->gf_len = 14;
+	}
 
 	geo->ecc_chunk_count = mtd->writesize / geo->ecc_chunk_size;
 
 	/* We use the same ECC strength for all chunks. */
 	geo->ecc_strength = get_ecc_strength(this);
-	if (!geo->ecc_strength) {
-		pr_err("wrong ECC strength.\n");
+	if (!gpmi_check_ecc(this)) {
+		dev_err(this->dev,
+			"We can not support this nand chip."
+			" Its required ecc strength(%d) is beyond our"
+			" capability(%d).\n", geo->ecc_strength,
+			(GPMI_IS_MX6Q(this) ? MX6_ECC_STRENGTH_MAX
+					: MXS_ECC_STRENGTH_MAX));
 		return -EINVAL;
 	}
 
@@ -920,8 +946,7 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	dma_addr_t    auxiliary_phys;
 	unsigned int  i;
 	unsigned char *status;
-	unsigned int  failed;
-	unsigned int  corrected;
+	unsigned int  max_bitflips = 0;
 	int           ret;
 
 	pr_debug("page number is : %d\n", page);
@@ -945,35 +970,25 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			payload_virt, payload_phys);
 	if (ret) {
 		pr_err("Error in ECC-based read: %d\n", ret);
-		goto exit_nfc;
+		return ret;
 	}
 
 	/* handle the block mark swapping */
 	block_mark_swapping(this, payload_virt, auxiliary_virt);
 
 	/* Loop over status bytes, accumulating ECC status. */
-	failed		= 0;
-	corrected	= 0;
-	status		= auxiliary_virt + nfc_geo->auxiliary_status_offset;
+	status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
 
 	for (i = 0; i < nfc_geo->ecc_chunk_count; i++, status++) {
 		if ((*status == STATUS_GOOD) || (*status == STATUS_ERASED))
 			continue;
 
 		if (*status == STATUS_UNCORRECTABLE) {
-			failed++;
+			mtd->ecc_stats.failed++;
 			continue;
 		}
-		corrected += *status;
-	}
-
-	/*
-	 * Propagate ECC status to the owning MTD only when failed or
-	 * corrected times nearly reaches our ECC correction threshold.
-	 */
-	if (failed || corrected >= (nfc_geo->ecc_strength - 1)) {
-		mtd->ecc_stats.failed    += failed;
-		mtd->ecc_stats.corrected += corrected;
+		mtd->ecc_stats.corrected += *status;
+		max_bitflips = max_t(unsigned int, max_bitflips, *status);
 	}
 
 	if (oob_required) {
@@ -995,8 +1010,8 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			this->payload_virt, this->payload_phys,
 			nfc_geo->payload_size,
 			payload_virt, payload_phys);
-exit_nfc:
-	return ret;
+
+	return max_bitflips;
 }
 
 static int gpmi_ecc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
@@ -1668,8 +1683,8 @@ exit_nfc_init:
 	release_resources(this);
 exit_acquire_resources:
 	platform_set_drvdata(pdev, NULL);
-	kfree(this);
 	dev_err(this->dev, "driver registration failed: %d\n", ret);
+	kfree(this);
 
 	return ret;
 }
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 3d93a5e39090..072947731277 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -284,6 +284,10 @@ extern int gpmi_read_page(struct gpmi_nand_data *,
 #define STATUS_ERASED		0xff
 #define STATUS_UNCORRECTABLE	0xfe
 
+/* BCH's bit correction capability. */
+#define MXS_ECC_STRENGTH_MAX	20	/* mx23 and mx28 */
+#define MX6_ECC_STRENGTH_MAX	40
+
 /* Use the platform_id to distinguish different Archs. */
 #define IS_MX23			0x0
 #define IS_MX28			0x1
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 60ac5b98b718..07e5784e5cd3 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -530,12 +530,23 @@ static void send_page_v1(struct mtd_info *mtd, unsigned int ops)
 
 static void send_read_id_v3(struct mxc_nand_host *host)
 {
+	struct nand_chip *this = &host->nand;
+
 	/* Read ID into main buffer */
 	writel(NFC_ID, NFC_V3_LAUNCH);
 
 	wait_op_done(host, true);
 
 	memcpy32_fromio(host->data_buf, host->main_area0, 16);
+
+	if (this->options & NAND_BUSWIDTH_16) {
+		/* compress the ID info */
+		host->data_buf[1] = host->data_buf[2];
+		host->data_buf[2] = host->data_buf[4];
+		host->data_buf[3] = host->data_buf[6];
+		host->data_buf[4] = host->data_buf[8];
+		host->data_buf[5] = host->data_buf[10];
+	}
 }
 
 /* Request the NANDFC to perform a read of the NAND device ID. */
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 3766682a0289..43214151b882 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -825,13 +825,8 @@ static void panic_nand_wait(struct mtd_info *mtd, struct nand_chip *chip,
 static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 {
 
-	unsigned long timeo = jiffies;
 	int status, state = chip->state;
-
-	if (state == FL_ERASING)
-		timeo += (HZ * 400) / 1000;
-	else
-		timeo += (HZ * 20) / 1000;
+	unsigned long timeo = (state == FL_ERASING ? 400 : 20);
 
 	led_trigger_event(nand_led_trigger, LED_FULL);
 
@@ -849,6 +844,7 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 	if (in_interrupt() || oops_in_progress)
 		panic_nand_wait(mtd, chip, timeo);
 	else {
+		timeo = jiffies + msecs_to_jiffies(timeo);
 		while (time_before(jiffies, timeo)) {
 			if (chip->dev_ready) {
 				if (chip->dev_ready(mtd))
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index b7cfe0d37121..053c9a2d47c3 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -55,8 +55,7 @@ struct mtd_info;
 #define MODULE_AUTHOR(x)	/* x */
 #define MODULE_DESCRIPTION(x)	/* x */
 
-#define printk printf
-#define KERN_ERR		""
+#define pr_err printf
 #endif
 
 /*
@@ -507,7 +506,7 @@ int __nand_correct_data(unsigned char *buf,
 	if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
 		return 1;	/* error in ECC data; no action needed */
 
-	printk(KERN_ERR "uncorrectable error : ");
+	pr_err("%s: uncorrectable ECC error", __func__);
 	return -1;
 }
 EXPORT_SYMBOL(__nand_correct_data);
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 8f30d385bfa3..891c52a30e6a 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -1468,12 +1468,12 @@ int do_read_error(struct nandsim *ns, int num)
 
 void do_bit_flips(struct nandsim *ns, int num)
 {
-	if (bitflips && random32() < (1 << 22)) {
+	if (bitflips && prandom_u32() < (1 << 22)) {
 		int flips = 1;
 		if (bitflips > 1)
-			flips = (random32() % (int) bitflips) + 1;
+			flips = (prandom_u32() % (int) bitflips) + 1;
 		while (flips--) {
-			int pos = random32() % (num * 8);
+			int pos = prandom_u32() % (num * 8);
 			ns->buf.byte[pos / 8] ^= (1 << (pos % 8));
 			NS_WARN("read_page: flipping bit %d in page %d "
 				"reading from %d ecc: corrected=%u failed=%u\n",
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 1d333497cfcb..8e820ddf4e08 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -22,9 +22,12 @@
 #include <linux/omap-dma.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #ifdef CONFIG_MTD_NAND_OMAP_BCH
 #include <linux/bch.h>
+#include <linux/platform_data/elm.h>
 #endif
 
 #include <linux/platform_data/mtd-nand-omap2.h>
@@ -117,6 +120,33 @@
 
 #define OMAP24XX_DMA_GPMC		4
 
+#define BCH8_MAX_ERROR		8	/* upto 8 bit correctable */
+#define BCH4_MAX_ERROR		4	/* upto 4 bit correctable */
+
+#define SECTOR_BYTES		512
+/* 4 bit padding to make byte aligned, 56 = 52 + 4 */
+#define BCH4_BIT_PAD		4
+#define BCH8_ECC_MAX		((SECTOR_BYTES + BCH8_ECC_OOB_BYTES) * 8)
+#define BCH4_ECC_MAX		((SECTOR_BYTES + BCH4_ECC_OOB_BYTES) * 8)
+
+/* GPMC ecc engine settings for read */
+#define BCH_WRAPMODE_1		1	/* BCH wrap mode 1 */
+#define BCH8R_ECC_SIZE0		0x1a	/* ecc_size0 = 26 */
+#define BCH8R_ECC_SIZE1		0x2	/* ecc_size1 = 2 */
+#define BCH4R_ECC_SIZE0		0xd	/* ecc_size0 = 13 */
+#define BCH4R_ECC_SIZE1		0x3	/* ecc_size1 = 3 */
+
+/* GPMC ecc engine settings for write */
+#define BCH_WRAPMODE_6		6	/* BCH wrap mode 6 */
+#define BCH_ECC_SIZE0		0x0	/* ecc_size0 = 0, no oob protection */
+#define BCH_ECC_SIZE1		0x20	/* ecc_size1 = 32 */
+
+#ifdef CONFIG_MTD_NAND_OMAP_BCH
+static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc,
+	0xac, 0x6b, 0xff, 0x99, 0x7b};
+static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10};
+#endif
+
 /* oob info generated runtime depending on ecc algorithm and layout selected */
 static struct nand_ecclayout omap_oobinfo;
 /* Define some generic bad / good block scan pattern which are used
@@ -156,6 +186,9 @@ struct omap_nand_info {
 #ifdef CONFIG_MTD_NAND_OMAP_BCH
 	struct bch_control             *bch;
 	struct nand_ecclayout           ecclayout;
+	bool				is_elm_used;
+	struct device			*elm_dev;
+	struct device_node		*of_node;
 #endif
 };
 
@@ -1031,6 +1064,13 @@ static int omap_dev_ready(struct mtd_info *mtd)
  * omap3_enable_hwecc_bch - Program OMAP3 GPMC to perform BCH ECC correction
  * @mtd: MTD device structure
  * @mode: Read/Write mode
+ *
+ * When using BCH, sector size is hardcoded to 512 bytes.
+ * Using wrapping mode 6 both for reading and writing if ELM module not uses
+ * for error correction.
+ * On writing,
+ * eccsize0 = 0  (no additional protected byte in spare area)
+ * eccsize1 = 32 (skip 32 nibbles = 16 bytes per sector in spare area)
  */
 static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode)
 {
@@ -1039,32 +1079,57 @@ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode)
 	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
 						   mtd);
 	struct nand_chip *chip = mtd->priv;
-	u32 val;
+	u32 val, wr_mode;
+	unsigned int ecc_size1, ecc_size0;
+
+	/* Using wrapping mode 6 for writing */
+	wr_mode = BCH_WRAPMODE_6;
 
-	nerrors = (info->nand.ecc.bytes == 13) ? 8 : 4;
-	dev_width = (chip->options & NAND_BUSWIDTH_16) ? 1 : 0;
-	nsectors = 1;
 	/*
-	 * Program GPMC to perform correction on one 512-byte sector at a time.
-	 * Using 4 sectors at a time (i.e. ecc.size = 2048) is also possible and
-	 * gives a slight (5%) performance gain (but requires additional code).
+	 * ECC engine enabled for valid ecc_size0 nibbles
+	 * and disabled for ecc_size1 nibbles.
 	 */
+	ecc_size0 = BCH_ECC_SIZE0;
+	ecc_size1 = BCH_ECC_SIZE1;
+
+	/* Perform ecc calculation on 512-byte sector */
+	nsectors = 1;
+
+	/* Update number of error correction */
+	nerrors = info->nand.ecc.strength;
+
+	/* Multi sector reading/writing for NAND flash with page size < 4096 */
+	if (info->is_elm_used && (mtd->writesize <= 4096)) {
+		if (mode == NAND_ECC_READ) {
+			/* Using wrapping mode 1 for reading */
+			wr_mode = BCH_WRAPMODE_1;
+
+			/*
+			 * ECC engine enabled for ecc_size0 nibbles
+			 * and disabled for ecc_size1 nibbles.
+			 */
+			ecc_size0 = (nerrors == 8) ?
+				BCH8R_ECC_SIZE0 : BCH4R_ECC_SIZE0;
+			ecc_size1 = (nerrors == 8) ?
+				BCH8R_ECC_SIZE1 : BCH4R_ECC_SIZE1;
+		}
+
+		/* Perform ecc calculation for one page (< 4096) */
+		nsectors = info->nand.ecc.steps;
+	}
 
 	writel(ECC1, info->reg.gpmc_ecc_control);
 
-	/*
-	 * When using BCH, sector size is hardcoded to 512 bytes.
-	 * Here we are using wrapping mode 6 both for reading and writing, with:
-	 *  size0 = 0  (no additional protected byte in spare area)
-	 *  size1 = 32 (skip 32 nibbles = 16 bytes per sector in spare area)
-	 */
-	val = (32 << ECCSIZE1_SHIFT) | (0 << ECCSIZE0_SHIFT);
+	/* Configure ecc size for BCH */
+	val = (ecc_size1 << ECCSIZE1_SHIFT) | (ecc_size0 << ECCSIZE0_SHIFT);
 	writel(val, info->reg.gpmc_ecc_size_config);
 
+	dev_width = (chip->options & NAND_BUSWIDTH_16) ? 1 : 0;
+
 	/* BCH configuration */
 	val = ((1                        << 16) | /* enable BCH */
 	       (((nerrors == 8) ? 1 : 0) << 12) | /* 8 or 4 bits */
-	       (0x06                     <<  8) | /* wrap mode = 6 */
+	       (wr_mode                  <<  8) | /* wrap mode */
 	       (dev_width                <<  7) | /* bus width */
 	       (((nsectors-1) & 0x7)     <<  4) | /* number of sectors */
 	       (info->gpmc_cs            <<  1) | /* ECC CS */
@@ -1072,7 +1137,7 @@ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode)
 
 	writel(val, info->reg.gpmc_ecc_config);
 
-	/* clear ecc and enable bits */
+	/* Clear ecc and enable bits */
 	writel(ECCCLEAR | ECC1, info->reg.gpmc_ecc_control);
 }
 
@@ -1162,6 +1227,298 @@ static int omap3_calculate_ecc_bch8(struct mtd_info *mtd, const u_char *dat,
 }
 
 /**
+ * omap3_calculate_ecc_bch - Generate bytes of ECC bytes
+ * @mtd:	MTD device structure
+ * @dat:	The pointer to data on which ecc is computed
+ * @ecc_code:	The ecc_code buffer
+ *
+ * Support calculating of BCH4/8 ecc vectors for the page
+ */
+static int omap3_calculate_ecc_bch(struct mtd_info *mtd, const u_char *dat,
+				    u_char *ecc_code)
+{
+	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
+						   mtd);
+	unsigned long nsectors, bch_val1, bch_val2, bch_val3, bch_val4;
+	int i, eccbchtsel;
+
+	nsectors = ((readl(info->reg.gpmc_ecc_config) >> 4) & 0x7) + 1;
+	/*
+	 * find BCH scheme used
+	 * 0 -> BCH4
+	 * 1 -> BCH8
+	 */
+	eccbchtsel = ((readl(info->reg.gpmc_ecc_config) >> 12) & 0x3);
+
+	for (i = 0; i < nsectors; i++) {
+
+		/* Read hw-computed remainder */
+		bch_val1 = readl(info->reg.gpmc_bch_result0[i]);
+		bch_val2 = readl(info->reg.gpmc_bch_result1[i]);
+		if (eccbchtsel) {
+			bch_val3 = readl(info->reg.gpmc_bch_result2[i]);
+			bch_val4 = readl(info->reg.gpmc_bch_result3[i]);
+		}
+
+		if (eccbchtsel) {
+			/* BCH8 ecc scheme */
+			*ecc_code++ = (bch_val4 & 0xFF);
+			*ecc_code++ = ((bch_val3 >> 24) & 0xFF);
+			*ecc_code++ = ((bch_val3 >> 16) & 0xFF);
+			*ecc_code++ = ((bch_val3 >> 8) & 0xFF);
+			*ecc_code++ = (bch_val3 & 0xFF);
+			*ecc_code++ = ((bch_val2 >> 24) & 0xFF);
+			*ecc_code++ = ((bch_val2 >> 16) & 0xFF);
+			*ecc_code++ = ((bch_val2 >> 8) & 0xFF);
+			*ecc_code++ = (bch_val2 & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 24) & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 16) & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 8) & 0xFF);
+			*ecc_code++ = (bch_val1 & 0xFF);
+			/*
+			 * Setting 14th byte to zero to handle
+			 * erased page & maintain compatibility
+			 * with RBL
+			 */
+			*ecc_code++ = 0x0;
+		} else {
+			/* BCH4 ecc scheme */
+			*ecc_code++ = ((bch_val2 >> 12) & 0xFF);
+			*ecc_code++ = ((bch_val2 >> 4) & 0xFF);
+			*ecc_code++ = ((bch_val2 & 0xF) << 4) |
+				((bch_val1 >> 28) & 0xF);
+			*ecc_code++ = ((bch_val1 >> 20) & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 12) & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 4) & 0xFF);
+			*ecc_code++ = ((bch_val1 & 0xF) << 4);
+			/*
+			 * Setting 8th byte to zero to handle
+			 * erased page
+			 */
+			*ecc_code++ = 0x0;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * erased_sector_bitflips - count bit flips
+ * @data:	data sector buffer
+ * @oob:	oob buffer
+ * @info:	omap_nand_info
+ *
+ * Check the bit flips in erased page falls below correctable level.
+ * If falls below, report the page as erased with correctable bit
+ * flip, else report as uncorrectable page.
+ */
+static int erased_sector_bitflips(u_char *data, u_char *oob,
+		struct omap_nand_info *info)
+{
+	int flip_bits = 0, i;
+
+	for (i = 0; i < info->nand.ecc.size; i++) {
+		flip_bits += hweight8(~data[i]);
+		if (flip_bits > info->nand.ecc.strength)
+			return 0;
+	}
+
+	for (i = 0; i < info->nand.ecc.bytes - 1; i++) {
+		flip_bits += hweight8(~oob[i]);
+		if (flip_bits > info->nand.ecc.strength)
+			return 0;
+	}
+
+	/*
+	 * Bit flips falls in correctable level.
+	 * Fill data area with 0xFF
+	 */
+	if (flip_bits) {
+		memset(data, 0xFF, info->nand.ecc.size);
+		memset(oob, 0xFF, info->nand.ecc.bytes);
+	}
+
+	return flip_bits;
+}
+
+/**
+ * omap_elm_correct_data - corrects page data area in case error reported
+ * @mtd:	MTD device structure
+ * @data:	page data
+ * @read_ecc:	ecc read from nand flash
+ * @calc_ecc:	ecc read from HW ECC registers
+ *
+ * Calculated ecc vector reported as zero in case of non-error pages.
+ * In case of error/erased pages non-zero error vector is reported.
+ * In case of non-zero ecc vector, check read_ecc at fixed offset
+ * (x = 13/7 in case of BCH8/4 == 0) to find page programmed or not.
+ * To handle bit flips in this data, count the number of 0's in
+ * read_ecc[x] and check if it greater than 4. If it is less, it is
+ * programmed page, else erased page.
+ *
+ * 1. If page is erased, check with standard ecc vector (ecc vector
+ * for erased page to find any bit flip). If check fails, bit flip
+ * is present in erased page. Count the bit flips in erased page and
+ * if it falls under correctable level, report page with 0xFF and
+ * update the correctable bit information.
+ * 2. If error is reported on programmed page, update elm error
+ * vector and correct the page with ELM error correction routine.
+ *
+ */
+static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
+				u_char *read_ecc, u_char *calc_ecc)
+{
+	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
+			mtd);
+	int eccsteps = info->nand.ecc.steps;
+	int i , j, stat = 0;
+	int eccsize, eccflag, ecc_vector_size;
+	struct elm_errorvec err_vec[ERROR_VECTOR_MAX];
+	u_char *ecc_vec = calc_ecc;
+	u_char *spare_ecc = read_ecc;
+	u_char *erased_ecc_vec;
+	enum bch_ecc type;
+	bool is_error_reported = false;
+
+	/* Initialize elm error vector to zero */
+	memset(err_vec, 0, sizeof(err_vec));
+
+	if (info->nand.ecc.strength == BCH8_MAX_ERROR) {
+		type = BCH8_ECC;
+		erased_ecc_vec = bch8_vector;
+	} else {
+		type = BCH4_ECC;
+		erased_ecc_vec = bch4_vector;
+	}
+
+	ecc_vector_size = info->nand.ecc.bytes;
+
+	/*
+	 * Remove extra byte padding for BCH8 RBL
+	 * compatibility and erased page handling
+	 */
+	eccsize = ecc_vector_size - 1;
+
+	for (i = 0; i < eccsteps ; i++) {
+		eccflag = 0;	/* initialize eccflag */
+
+		/*
+		 * Check any error reported,
+		 * In case of error, non zero ecc reported.
+		 */
+
+		for (j = 0; (j < eccsize); j++) {
+			if (calc_ecc[j] != 0) {
+				eccflag = 1; /* non zero ecc, error present */
+				break;
+			}
+		}
+
+		if (eccflag == 1) {
+			/*
+			 * Set threshold to minimum of 4, half of ecc.strength/2
+			 * to allow max bit flip in byte to 4
+			 */
+			unsigned int threshold = min_t(unsigned int, 4,
+					info->nand.ecc.strength / 2);
+
+			/*
+			 * Check data area is programmed by counting
+			 * number of 0's at fixed offset in spare area.
+			 * Checking count of 0's against threshold.
+			 * In case programmed page expects at least threshold
+			 * zeros in byte.
+			 * If zeros are less than threshold for programmed page/
+			 * zeros are more than threshold erased page, either
+			 * case page reported as uncorrectable.
+			 */
+			if (hweight8(~read_ecc[eccsize]) >= threshold) {
+				/*
+				 * Update elm error vector as
+				 * data area is programmed
+				 */
+				err_vec[i].error_reported = true;
+				is_error_reported = true;
+			} else {
+				/* Error reported in erased page */
+				int bitflip_count;
+				u_char *buf = &data[info->nand.ecc.size * i];
+
+				if (memcmp(calc_ecc, erased_ecc_vec, eccsize)) {
+					bitflip_count = erased_sector_bitflips(
+							buf, read_ecc, info);
+
+					if (bitflip_count)
+						stat += bitflip_count;
+					else
+						return -EINVAL;
+				}
+			}
+		}
+
+		/* Update the ecc vector */
+		calc_ecc += ecc_vector_size;
+		read_ecc += ecc_vector_size;
+	}
+
+	/* Check if any error reported */
+	if (!is_error_reported)
+		return 0;
+
+	/* Decode BCH error using ELM module */
+	elm_decode_bch_error_page(info->elm_dev, ecc_vec, err_vec);
+
+	for (i = 0; i < eccsteps; i++) {
+		if (err_vec[i].error_reported) {
+			for (j = 0; j < err_vec[i].error_count; j++) {
+				u32 bit_pos, byte_pos, error_max, pos;
+
+				if (type == BCH8_ECC)
+					error_max = BCH8_ECC_MAX;
+				else
+					error_max = BCH4_ECC_MAX;
+
+				if (info->nand.ecc.strength == BCH8_MAX_ERROR)
+					pos = err_vec[i].error_loc[j];
+				else
+					/* Add 4 to take care 4 bit padding */
+					pos = err_vec[i].error_loc[j] +
+						BCH4_BIT_PAD;
+
+				/* Calculate bit position of error */
+				bit_pos = pos % 8;
+
+				/* Calculate byte position of error */
+				byte_pos = (error_max - pos - 1) / 8;
+
+				if (pos < error_max) {
+					if (byte_pos < 512)
+						data[byte_pos] ^= 1 << bit_pos;
+					else
+						spare_ecc[byte_pos - 512] ^=
+							1 << bit_pos;
+				}
+				/* else, not interested to correct ecc */
+			}
+		}
+
+		/* Update number of correctable errors */
+		stat += err_vec[i].error_count;
+
+		/* Update page data with sector size */
+		data += info->nand.ecc.size;
+		spare_ecc += ecc_vector_size;
+	}
+
+	for (i = 0; i < eccsteps; i++)
+		/* Return error if uncorrectable error present */
+		if (err_vec[i].error_uncorrectable)
+			return -EINVAL;
+
+	return stat;
+}
+
+/**
  * omap3_correct_data_bch - Decode received data and correct errors
  * @mtd: MTD device structure
  * @data: page data
@@ -1194,6 +1551,92 @@ static int omap3_correct_data_bch(struct mtd_info *mtd, u_char *data,
 }
 
 /**
+ * omap_write_page_bch - BCH ecc based write page function for entire page
+ * @mtd:		mtd info structure
+ * @chip:		nand chip info structure
+ * @buf:		data buffer
+ * @oob_required:	must write chip->oob_poi to OOB
+ *
+ * Custom write page method evolved to support multi sector writing in one shot
+ */
+static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
+				  const uint8_t *buf, int oob_required)
+{
+	int i;
+	uint8_t *ecc_calc = chip->buffers->ecccalc;
+	uint32_t *eccpos = chip->ecc.layout->eccpos;
+
+	/* Enable GPMC ecc engine */
+	chip->ecc.hwctl(mtd, NAND_ECC_WRITE);
+
+	/* Write data */
+	chip->write_buf(mtd, buf, mtd->writesize);
+
+	/* Update ecc vector from GPMC result registers */
+	chip->ecc.calculate(mtd, buf, &ecc_calc[0]);
+
+	for (i = 0; i < chip->ecc.total; i++)
+		chip->oob_poi[eccpos[i]] = ecc_calc[i];
+
+	/* Write ecc vector to OOB area */
+	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	return 0;
+}
+
+/**
+ * omap_read_page_bch - BCH ecc based page read function for entire page
+ * @mtd:		mtd info structure
+ * @chip:		nand chip info structure
+ * @buf:		buffer to store read data
+ * @oob_required:	caller requires OOB data read to chip->oob_poi
+ * @page:		page number to read
+ *
+ * For BCH ecc scheme, GPMC used for syndrome calculation and ELM module
+ * used for error correction.
+ * Custom method evolved to support ELM error correction & multi sector
+ * reading. On reading page data area is read along with OOB data with
+ * ecc engine enabled. ecc vector updated after read of OOB data.
+ * For non error pages ecc vector reported as zero.
+ */
+static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
+{
+	uint8_t *ecc_calc = chip->buffers->ecccalc;
+	uint8_t *ecc_code = chip->buffers->ecccode;
+	uint32_t *eccpos = chip->ecc.layout->eccpos;
+	uint8_t *oob = &chip->oob_poi[eccpos[0]];
+	uint32_t oob_pos = mtd->writesize + chip->ecc.layout->eccpos[0];
+	int stat;
+	unsigned int max_bitflips = 0;
+
+	/* Enable GPMC ecc engine */
+	chip->ecc.hwctl(mtd, NAND_ECC_READ);
+
+	/* Read data */
+	chip->read_buf(mtd, buf, mtd->writesize);
+
+	/* Read oob bytes */
+	chip->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_pos, -1);
+	chip->read_buf(mtd, oob, chip->ecc.total);
+
+	/* Calculate ecc bytes */
+	chip->ecc.calculate(mtd, buf, ecc_calc);
+
+	memcpy(ecc_code, &chip->oob_poi[eccpos[0]], chip->ecc.total);
+
+	stat = chip->ecc.correct(mtd, buf, ecc_code, ecc_calc);
+
+	if (stat < 0) {
+		mtd->ecc_stats.failed++;
+	} else {
+		mtd->ecc_stats.corrected += stat;
+		max_bitflips = max_t(unsigned int, max_bitflips, stat);
+	}
+
+	return max_bitflips;
+}
+
+/**
  * omap3_free_bch - Release BCH ecc resources
  * @mtd: MTD device structure
  */
@@ -1218,43 +1661,86 @@ static int omap3_init_bch(struct mtd_info *mtd, int ecc_opt)
 	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
 						   mtd);
 #ifdef CONFIG_MTD_NAND_OMAP_BCH8
-	const int hw_errors = 8;
+	const int hw_errors = BCH8_MAX_ERROR;
 #else
-	const int hw_errors = 4;
+	const int hw_errors = BCH4_MAX_ERROR;
 #endif
+	enum bch_ecc bch_type;
+	const __be32 *parp;
+	int lenp;
+	struct device_node *elm_node;
+
 	info->bch = NULL;
 
-	max_errors = (ecc_opt == OMAP_ECC_BCH8_CODE_HW) ? 8 : 4;
+	max_errors = (ecc_opt == OMAP_ECC_BCH8_CODE_HW) ?
+		BCH8_MAX_ERROR : BCH4_MAX_ERROR;
 	if (max_errors != hw_errors) {
 		pr_err("cannot configure %d-bit BCH ecc, only %d-bit supported",
 		       max_errors, hw_errors);
 		goto fail;
 	}
 
-	/* software bch library is only used to detect and locate errors */
-	info->bch = init_bch(13, max_errors, 0x201b /* hw polynomial */);
-	if (!info->bch)
-		goto fail;
+	info->nand.ecc.size = 512;
+	info->nand.ecc.hwctl = omap3_enable_hwecc_bch;
+	info->nand.ecc.mode = NAND_ECC_HW;
+	info->nand.ecc.strength = max_errors;
 
-	info->nand.ecc.size    = 512;
-	info->nand.ecc.hwctl   = omap3_enable_hwecc_bch;
-	info->nand.ecc.correct = omap3_correct_data_bch;
-	info->nand.ecc.mode    = NAND_ECC_HW;
+	if (hw_errors == BCH8_MAX_ERROR)
+		bch_type = BCH8_ECC;
+	else
+		bch_type = BCH4_ECC;
 
-	/*
-	 * The number of corrected errors in an ecc block that will trigger
-	 * block scrubbing defaults to the ecc strength (4 or 8).
-	 * Set mtd->bitflip_threshold here to define a custom threshold.
-	 */
+	/* Detect availability of ELM module */
+	parp = of_get_property(info->of_node, "elm_id", &lenp);
+	if ((parp == NULL) && (lenp != (sizeof(void *) * 2))) {
+		pr_err("Missing elm_id property, fall back to Software BCH\n");
+		info->is_elm_used = false;
+	} else {
+		struct platform_device *pdev;
 
-	if (max_errors == 8) {
-		info->nand.ecc.strength  = 8;
-		info->nand.ecc.bytes     = 13;
-		info->nand.ecc.calculate = omap3_calculate_ecc_bch8;
+		elm_node = of_find_node_by_phandle(be32_to_cpup(parp));
+		pdev = of_find_device_by_node(elm_node);
+		info->elm_dev = &pdev->dev;
+		elm_config(info->elm_dev, bch_type);
+		info->is_elm_used = true;
+	}
+
+	if (info->is_elm_used && (mtd->writesize <= 4096)) {
+
+		if (hw_errors == BCH8_MAX_ERROR)
+			info->nand.ecc.bytes = BCH8_SIZE;
+		else
+			info->nand.ecc.bytes = BCH4_SIZE;
+
+		info->nand.ecc.correct = omap_elm_correct_data;
+		info->nand.ecc.calculate = omap3_calculate_ecc_bch;
+		info->nand.ecc.read_page = omap_read_page_bch;
+		info->nand.ecc.write_page = omap_write_page_bch;
 	} else {
-		info->nand.ecc.strength  = 4;
-		info->nand.ecc.bytes     = 7;
-		info->nand.ecc.calculate = omap3_calculate_ecc_bch4;
+		/*
+		 * software bch library is only used to detect and
+		 * locate errors
+		 */
+		info->bch = init_bch(13, max_errors,
+				0x201b /* hw polynomial */);
+		if (!info->bch)
+			goto fail;
+
+		info->nand.ecc.correct = omap3_correct_data_bch;
+
+		/*
+		 * The number of corrected errors in an ecc block that will
+		 * trigger block scrubbing defaults to the ecc strength (4 or 8)
+		 * Set mtd->bitflip_threshold here to define a custom threshold.
+		 */
+
+		if (max_errors == 8) {
+			info->nand.ecc.bytes = 13;
+			info->nand.ecc.calculate = omap3_calculate_ecc_bch8;
+		} else {
+			info->nand.ecc.bytes = 7;
+			info->nand.ecc.calculate = omap3_calculate_ecc_bch4;
+		}
 	}
 
 	pr_info("enabling NAND BCH ecc with %d-bit correction\n", max_errors);
@@ -1270,7 +1756,7 @@ fail:
  */
 static int omap3_init_bch_tail(struct mtd_info *mtd)
 {
-	int i, steps;
+	int i, steps, offset;
 	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
 						   mtd);
 	struct nand_ecclayout *layout = &info->ecclayout;
@@ -1292,11 +1778,21 @@ static int omap3_init_bch_tail(struct mtd_info *mtd)
 		goto fail;
 	}
 
+	/* ECC layout compatible with RBL for BCH8 */
+	if (info->is_elm_used && (info->nand.ecc.bytes == BCH8_SIZE))
+		offset = 2;
+	else
+		offset = mtd->oobsize - layout->eccbytes;
+
 	/* put ecc bytes at oob tail */
 	for (i = 0; i < layout->eccbytes; i++)
-		layout->eccpos[i] = mtd->oobsize-layout->eccbytes+i;
+		layout->eccpos[i] = offset + i;
+
+	if (info->is_elm_used && (info->nand.ecc.bytes == BCH8_SIZE))
+		layout->oobfree[0].offset = 2 + layout->eccbytes * steps;
+	else
+		layout->oobfree[0].offset = 2;
 
-	layout->oobfree[0].offset = 2;
 	layout->oobfree[0].length = mtd->oobsize-2-layout->eccbytes;
 	info->nand.ecc.layout = layout;
 
@@ -1360,6 +1856,9 @@ static int omap_nand_probe(struct platform_device *pdev)
 
 	info->nand.options	= pdata->devsize;
 	info->nand.options	|= NAND_SKIP_BBTSCAN;
+#ifdef CONFIG_MTD_NAND_OMAP_BCH
+	info->of_node		= pdata->of_node;
+#endif
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index dbd3aa574eaf..30bd907a260a 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -174,7 +174,14 @@ out:
 	return rc;
 }
 
+static void __exit ofpart_parser_exit(void)
+{
+	deregister_mtd_parser(&ofpart_parser);
+	deregister_mtd_parser(&ofoldpart_parser);
+}
+
 module_init(ofpart_parser_init);
+module_exit(ofpart_parser_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Parser for MTD partitioning information in device tree");
diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c
index 1eee264509a8..70106607c247 100644
--- a/drivers/mtd/tests/mtd_nandecctest.c
+++ b/drivers/mtd/tests/mtd_nandecctest.c
@@ -44,7 +44,7 @@ struct nand_ecc_test {
 static void single_bit_error_data(void *error_data, void *correct_data,
 				size_t size)
 {
-	unsigned int offset = random32() % (size * BITS_PER_BYTE);
+	unsigned int offset = prandom_u32() % (size * BITS_PER_BYTE);
 
 	memcpy(error_data, correct_data, size);
 	__change_bit_le(offset, error_data);
@@ -55,9 +55,9 @@ static void double_bit_error_data(void *error_data, void *correct_data,
 {
 	unsigned int offset[2];
 
-	offset[0] = random32() % (size * BITS_PER_BYTE);
+	offset[0] = prandom_u32() % (size * BITS_PER_BYTE);
 	do {
-		offset[1] = random32() % (size * BITS_PER_BYTE);
+		offset[1] = prandom_u32() % (size * BITS_PER_BYTE);
 	} while (offset[0] == offset[1]);
 
 	memcpy(error_data, correct_data, size);
@@ -68,7 +68,7 @@ static void double_bit_error_data(void *error_data, void *correct_data,
 
 static unsigned int random_ecc_bit(size_t size)
 {
-	unsigned int offset = random32() % (3 * BITS_PER_BYTE);
+	unsigned int offset = prandom_u32() % (3 * BITS_PER_BYTE);
 
 	if (size == 256) {
 		/*
@@ -76,7 +76,7 @@ static unsigned int random_ecc_bit(size_t size)
 		 * and 17th bit) in ECC code for 256 byte data block
 		 */
 		while (offset == 16 || offset == 17)
-			offset = random32() % (3 * BITS_PER_BYTE);
+			offset = prandom_u32() % (3 * BITS_PER_BYTE);
 	}
 
 	return offset;
@@ -256,7 +256,7 @@ static int nand_ecc_test_run(const size_t size)
 		goto error;
 	}
 
-	get_random_bytes(correct_data, size);
+	prandom_bytes(correct_data, size);
 	__nand_calculate_ecc(correct_data, size, correct_ecc);
 
 	for (i = 0; i < ARRAY_SIZE(nand_ecc_test); i++) {
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index e827fa8cd844..3e24b379ffa4 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -29,6 +29,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/random.h>
 
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
@@ -46,26 +47,7 @@ static int use_offset;
 static int use_len;
 static int use_len_max;
 static int vary_offset;
-static unsigned long next = 1;
-
-static inline unsigned int simple_rand(void)
-{
-	next = next * 1103515245 + 12345;
-	return (unsigned int)((next / 65536) % 32768);
-}
-
-static inline void simple_srand(unsigned long seed)
-{
-	next = seed;
-}
-
-static void set_random_data(unsigned char *buf, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len; ++i)
-		buf[i] = simple_rand();
-}
+static struct rnd_state rnd_state;
 
 static int erase_eraseblock(int ebnum)
 {
@@ -129,7 +111,7 @@ static int write_eraseblock(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 
 	for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
-		set_random_data(writebuf, use_len);
+		prandom_bytes_state(&rnd_state, writebuf, use_len);
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
@@ -182,7 +164,7 @@ static int verify_eraseblock(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 
 	for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
-		set_random_data(writebuf, use_len);
+		prandom_bytes_state(&rnd_state, writebuf, use_len);
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
@@ -273,7 +255,7 @@ static int verify_eraseblock_in_one_go(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	size_t len = mtd->ecclayout->oobavail * pgcnt;
 
-	set_random_data(writebuf, len);
+	prandom_bytes_state(&rnd_state, writebuf, len);
 	ops.mode      = MTD_OPS_AUTO_OOB;
 	ops.len       = 0;
 	ops.retlen    = 0;
@@ -424,12 +406,12 @@ static int __init mtd_oobtest_init(void)
 	if (err)
 		goto out;
 
-	simple_srand(1);
+	prandom_seed_state(&rnd_state, 1);
 	err = write_whole_device();
 	if (err)
 		goto out;
 
-	simple_srand(1);
+	prandom_seed_state(&rnd_state, 1);
 	err = verify_all_eraseblocks();
 	if (err)
 		goto out;
@@ -444,13 +426,13 @@ static int __init mtd_oobtest_init(void)
 	if (err)
 		goto out;
 
-	simple_srand(3);
+	prandom_seed_state(&rnd_state, 3);
 	err = write_whole_device();
 	if (err)
 		goto out;
 
 	/* Check all eraseblocks */
-	simple_srand(3);
+	prandom_seed_state(&rnd_state, 3);
 	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -479,7 +461,7 @@ static int __init mtd_oobtest_init(void)
 	use_len = mtd->ecclayout->oobavail;
 	use_len_max = mtd->ecclayout->oobavail;
 	vary_offset = 1;
-	simple_srand(5);
+	prandom_seed_state(&rnd_state, 5);
 
 	err = write_whole_device();
 	if (err)
@@ -490,7 +472,7 @@ static int __init mtd_oobtest_init(void)
 	use_len = mtd->ecclayout->oobavail;
 	use_len_max = mtd->ecclayout->oobavail;
 	vary_offset = 1;
-	simple_srand(5);
+	prandom_seed_state(&rnd_state, 5);
 	err = verify_all_eraseblocks();
 	if (err)
 		goto out;
@@ -649,7 +631,7 @@ static int __init mtd_oobtest_init(void)
 		goto out;
 
 	/* Write all eraseblocks */
-	simple_srand(11);
+	prandom_seed_state(&rnd_state, 11);
 	pr_info("writing OOBs of whole device\n");
 	for (i = 0; i < ebcnt - 1; ++i) {
 		int cnt = 2;
@@ -659,7 +641,7 @@ static int __init mtd_oobtest_init(void)
 			continue;
 		addr = (i + 1) * mtd->erasesize - mtd->writesize;
 		for (pg = 0; pg < cnt; ++pg) {
-			set_random_data(writebuf, sz);
+			prandom_bytes_state(&rnd_state, writebuf, sz);
 			ops.mode      = MTD_OPS_AUTO_OOB;
 			ops.len       = 0;
 			ops.retlen    = 0;
@@ -680,12 +662,13 @@ static int __init mtd_oobtest_init(void)
 	pr_info("written %u eraseblocks\n", i);
 
 	/* Check all eraseblocks */
-	simple_srand(11);
+	prandom_seed_state(&rnd_state, 11);
 	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt - 1; ++i) {
 		if (bbt[i] || bbt[i + 1])
 			continue;
-		set_random_data(writebuf, mtd->ecclayout->oobavail * 2);
+		prandom_bytes_state(&rnd_state, writebuf,
+					mtd->ecclayout->oobavail * 2);
 		addr = (i + 1) * mtd->erasesize - mtd->writesize;
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index f93a76f88113..0c1140b6c286 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -29,6 +29,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/random.h>
 
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
@@ -45,26 +46,7 @@ static int bufsize;
 static int ebcnt;
 static int pgcnt;
 static int errcnt;
-static unsigned long next = 1;
-
-static inline unsigned int simple_rand(void)
-{
-	next = next * 1103515245 + 12345;
-	return (unsigned int)((next / 65536) % 32768);
-}
-
-static inline void simple_srand(unsigned long seed)
-{
-	next = seed;
-}
-
-static void set_random_data(unsigned char *buf, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len; ++i)
-		buf[i] = simple_rand();
-}
+static struct rnd_state rnd_state;
 
 static int erase_eraseblock(int ebnum)
 {
@@ -98,7 +80,7 @@ static int write_eraseblock(int ebnum)
 	size_t written;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	set_random_data(writebuf, mtd->erasesize);
+	prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize);
 	cond_resched();
 	err = mtd_write(mtd, addr, mtd->erasesize, &written, writebuf);
 	if (err || written != mtd->erasesize)
@@ -124,7 +106,7 @@ static int verify_eraseblock(int ebnum)
 	for (i = 0; i < ebcnt && bbt[ebcnt - i - 1]; ++i)
 		addrn -= mtd->erasesize;
 
-	set_random_data(writebuf, mtd->erasesize);
+	prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize);
 	for (j = 0; j < pgcnt - 1; ++j, addr += pgsize) {
 		/* Do a read to set the internal dataRAMs to different data */
 		err = mtd_read(mtd, addr0, bufsize, &read, twopages);
@@ -160,7 +142,8 @@ static int verify_eraseblock(int ebnum)
 	}
 	/* Check boundary between eraseblocks */
 	if (addr <= addrn - pgsize - pgsize && !bbt[ebnum + 1]) {
-		unsigned long oldnext = next;
+		struct rnd_state old_state = rnd_state;
+
 		/* Do a read to set the internal dataRAMs to different data */
 		err = mtd_read(mtd, addr0, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
@@ -188,13 +171,13 @@ static int verify_eraseblock(int ebnum)
 			return err;
 		}
 		memcpy(boundary, writebuf + mtd->erasesize - pgsize, pgsize);
-		set_random_data(boundary + pgsize, pgsize);
+		prandom_bytes_state(&rnd_state, boundary + pgsize, pgsize);
 		if (memcmp(twopages, boundary, bufsize)) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
 		}
-		next = oldnext;
+		rnd_state = old_state;
 	}
 	return err;
 }
@@ -326,7 +309,7 @@ static int erasecrosstest(void)
 		return err;
 
 	pr_info("writing 1st page of block %d\n", ebnum);
-	set_random_data(writebuf, pgsize);
+	prandom_bytes_state(&rnd_state, writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
 	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
@@ -359,7 +342,7 @@ static int erasecrosstest(void)
 		return err;
 
 	pr_info("writing 1st page of block %d\n", ebnum);
-	set_random_data(writebuf, pgsize);
+	prandom_bytes_state(&rnd_state, writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
 	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
@@ -417,7 +400,7 @@ static int erasetest(void)
 		return err;
 
 	pr_info("writing 1st page of block %d\n", ebnum);
-	set_random_data(writebuf, pgsize);
+	prandom_bytes_state(&rnd_state, writebuf, pgsize);
 	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
 		pr_err("error: write failed at %#llx\n",
@@ -565,7 +548,7 @@ static int __init mtd_pagetest_init(void)
 	pr_info("erased %u eraseblocks\n", i);
 
 	/* Write all eraseblocks */
-	simple_srand(1);
+	prandom_seed_state(&rnd_state, 1);
 	pr_info("writing whole device\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -580,7 +563,7 @@ static int __init mtd_pagetest_init(void)
 	pr_info("written %u eraseblocks\n", i);
 
 	/* Check all eraseblocks */
-	simple_srand(1);
+	prandom_seed_state(&rnd_state, 1);
 	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 596cbea8df4c..a6ce9c1fa6c5 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -49,13 +49,6 @@ static int pgcnt;
 static int goodebcnt;
 static struct timeval start, finish;
 
-static void set_random_data(unsigned char *buf, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len; ++i)
-		buf[i] = random32();
-}
 
 static int erase_eraseblock(int ebnum)
 {
@@ -396,7 +389,7 @@ static int __init mtd_speedtest_init(void)
 		goto out;
 	}
 
-	set_random_data(iobuf, mtd->erasesize);
+	prandom_bytes(iobuf, mtd->erasesize);
 
 	err = scan_for_bad_eraseblocks();
 	if (err)
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index 3729f679ae5d..787f539d16ca 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -55,7 +55,7 @@ static int rand_eb(void)
 	unsigned int eb;
 
 again:
-	eb = random32();
+	eb = prandom_u32();
 	/* Read or write up 2 eraseblocks at a time - hence 'ebcnt - 1' */
 	eb %= (ebcnt - 1);
 	if (bbt[eb])
@@ -67,7 +67,7 @@ static int rand_offs(void)
 {
 	unsigned int offs;
 
-	offs = random32();
+	offs = prandom_u32();
 	offs %= bufsize;
 	return offs;
 }
@@ -76,7 +76,7 @@ static int rand_len(int offs)
 {
 	unsigned int len;
 
-	len = random32();
+	len = prandom_u32();
 	len %= (bufsize - offs);
 	return len;
 }
@@ -191,7 +191,7 @@ static int do_write(void)
 
 static int do_operation(void)
 {
-	if (random32() & 1)
+	if (prandom_u32() & 1)
 		return do_read();
 	else
 		return do_write();
@@ -282,8 +282,7 @@ static int __init mtd_stresstest_init(void)
 	}
 	for (i = 0; i < ebcnt; i++)
 		offsets[i] = mtd->erasesize;
-	for (i = 0; i < bufsize; i++)
-		writebuf[i] = random32();
+	prandom_bytes(writebuf, bufsize);
 
 	err = scan_for_bad_eraseblocks();
 	if (err)
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index c880c2229c59..aade56f27945 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -28,6 +28,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/random.h>
 
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
@@ -43,26 +44,7 @@ static int bufsize;
 static int ebcnt;
 static int pgcnt;
 static int errcnt;
-static unsigned long next = 1;
-
-static inline unsigned int simple_rand(void)
-{
-	next = next * 1103515245 + 12345;
-	return (unsigned int)((next / 65536) % 32768);
-}
-
-static inline void simple_srand(unsigned long seed)
-{
-	next = seed;
-}
-
-static void set_random_data(unsigned char *buf, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len; ++i)
-		buf[i] = simple_rand();
-}
+static struct rnd_state rnd_state;
 
 static inline void clear_data(unsigned char *buf, size_t len)
 {
@@ -119,7 +101,7 @@ static int write_eraseblock(int ebnum)
 	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	set_random_data(writebuf, subpgsize);
+	prandom_bytes_state(&rnd_state, writebuf, subpgsize);
 	err = mtd_write(mtd, addr, subpgsize, &written, writebuf);
 	if (unlikely(err || written != subpgsize)) {
 		pr_err("error: write failed at %#llx\n",
@@ -133,7 +115,7 @@ static int write_eraseblock(int ebnum)
 
 	addr += subpgsize;
 
-	set_random_data(writebuf, subpgsize);
+	prandom_bytes_state(&rnd_state, writebuf, subpgsize);
 	err = mtd_write(mtd, addr, subpgsize, &written, writebuf);
 	if (unlikely(err || written != subpgsize)) {
 		pr_err("error: write failed at %#llx\n",
@@ -157,7 +139,7 @@ static int write_eraseblock2(int ebnum)
 	for (k = 1; k < 33; ++k) {
 		if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
 			break;
-		set_random_data(writebuf, subpgsize * k);
+		prandom_bytes_state(&rnd_state, writebuf, subpgsize * k);
 		err = mtd_write(mtd, addr, subpgsize * k, &written, writebuf);
 		if (unlikely(err || written != subpgsize * k)) {
 			pr_err("error: write failed at %#llx\n",
@@ -193,7 +175,7 @@ static int verify_eraseblock(int ebnum)
 	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	set_random_data(writebuf, subpgsize);
+	prandom_bytes_state(&rnd_state, writebuf, subpgsize);
 	clear_data(readbuf, subpgsize);
 	err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 	if (unlikely(err || read != subpgsize)) {
@@ -220,7 +202,7 @@ static int verify_eraseblock(int ebnum)
 
 	addr += subpgsize;
 
-	set_random_data(writebuf, subpgsize);
+	prandom_bytes_state(&rnd_state, writebuf, subpgsize);
 	clear_data(readbuf, subpgsize);
 	err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 	if (unlikely(err || read != subpgsize)) {
@@ -257,7 +239,7 @@ static int verify_eraseblock2(int ebnum)
 	for (k = 1; k < 33; ++k) {
 		if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
 			break;
-		set_random_data(writebuf, subpgsize * k);
+		prandom_bytes_state(&rnd_state, writebuf, subpgsize * k);
 		clear_data(readbuf, subpgsize * k);
 		err = mtd_read(mtd, addr, subpgsize * k, &read, readbuf);
 		if (unlikely(err || read != subpgsize * k)) {
@@ -430,7 +412,7 @@ static int __init mtd_subpagetest_init(void)
 		goto out;
 
 	pr_info("writing whole device\n");
-	simple_srand(1);
+	prandom_seed_state(&rnd_state, 1);
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
 			continue;
@@ -443,7 +425,7 @@ static int __init mtd_subpagetest_init(void)
 	}
 	pr_info("written %u eraseblocks\n", i);
 
-	simple_srand(1);
+	prandom_seed_state(&rnd_state, 1);
 	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -466,7 +448,7 @@ static int __init mtd_subpagetest_init(void)
 		goto out;
 
 	/* Write all eraseblocks */
-	simple_srand(3);
+	prandom_seed_state(&rnd_state, 3);
 	pr_info("writing whole device\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
@@ -481,7 +463,7 @@ static int __init mtd_subpagetest_init(void)
 	pr_info("written %u eraseblocks\n", i);
 
 	/* Check all eraseblocks */
-	simple_srand(3);
+	prandom_seed_state(&rnd_state, 3);
 	pr_info("verifying all eraseblocks\n");
 	for (i = 0; i < ebcnt; ++i) {
 		if (bbt[i])
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index c4cde1e9eddb..3a9f6a6a79f9 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -208,7 +208,7 @@ static inline int write_pattern(int ebnum, void *buf)
 static int __init tort_init(void)
 {
 	int err = 0, i, infinite = !cycles_count;
-	int bad_ebs[ebcnt];
+	int *bad_ebs;
 
 	printk(KERN_INFO "\n");
 	printk(KERN_INFO "=================================================\n");
@@ -250,28 +250,24 @@ static int __init tort_init(void)
 
 	err = -ENOMEM;
 	patt_5A5 = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!patt_5A5) {
-		pr_err("error: cannot allocate memory\n");
+	if (!patt_5A5)
 		goto out_mtd;
-	}
 
 	patt_A5A = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!patt_A5A) {
-		pr_err("error: cannot allocate memory\n");
+	if (!patt_A5A)
 		goto out_patt_5A5;
-	}
 
 	patt_FF = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!patt_FF) {
-		pr_err("error: cannot allocate memory\n");
+	if (!patt_FF)
 		goto out_patt_A5A;
-	}
 
 	check_buf = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!check_buf) {
-		pr_err("error: cannot allocate memory\n");
+	if (!check_buf)
 		goto out_patt_FF;
-	}
+
+	bad_ebs = kcalloc(ebcnt, sizeof(*bad_ebs), GFP_KERNEL);
+	if (!bad_ebs)
+		goto out_check_buf;
 
 	err = 0;
 
@@ -290,7 +286,6 @@ static int __init tort_init(void)
 	/*
 	 * Check if there is a bad eraseblock among those we are going to test.
 	 */
-	memset(&bad_ebs[0], 0, sizeof(int) * ebcnt);
 	if (mtd_can_have_bb(mtd)) {
 		for (i = eb; i < eb + ebcnt; i++) {
 			err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize);
@@ -394,6 +389,8 @@ out:
 
 	pr_info("finished after %u erase cycles\n",
 	       erase_cycles);
+	kfree(bad_ebs);
+out_check_buf:
 	kfree(check_buf);
 out_patt_FF:
 	kfree(patt_FF);
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 33f8f3b2c9b2..cba89fcd1587 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -86,7 +86,7 @@ static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi)
 static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi)
 {
 	if (ubi->dbg.emulate_bitflips)
-		return !(random32() % 200);
+		return !(prandom_u32() % 200);
 	return 0;
 }
 
@@ -100,7 +100,7 @@ static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi)
 static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi)
 {
 	if (ubi->dbg.emulate_io_failures)
-		return !(random32() % 500);
+		return !(prandom_u32() % 500);
 	return 0;
 }
 
@@ -114,7 +114,7 @@ static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi)
 static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi)
 {
 	if (ubi->dbg.emulate_io_failures)
-		return !(random32() % 400);
+		return !(prandom_u32() % 400);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index a7efec293037..9b017d9c58e9 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -381,7 +381,7 @@ static void b44_set_flow_ctrl(struct b44 *bp, u32 local, u32 remote)
 }
 
 #ifdef CONFIG_BCM47XX
-#include <asm/mach-bcm47xx/nvram.h>
+#include <bcm47xx_nvram.h>
 static void b44_wap54g10_workaround(struct b44 *bp)
 {
 	char buf[20];
@@ -393,7 +393,7 @@ static void b44_wap54g10_workaround(struct b44 *bp)
 	 * see https://dev.openwrt.org/ticket/146
 	 * check and reset bit "isolate"
 	 */
-	if (nvram_getenv("boardnum", buf, sizeof(buf)) < 0)
+	if (bcm47xx_nvram_getenv("boardnum", buf, sizeof(buf)) < 0)
 		return;
 	if (simple_strtoul(buf, NULL, 0) == 2) {
 		err = __b44_readphy(bp, 0, MII_BMCR, &val);
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index bf985c04524e..639049d7e92d 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -15,7 +15,7 @@
 #include <linux/mii.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
-#include <asm/mach-bcm47xx/nvram.h>
+#include <bcm47xx_nvram.h>
 
 static const struct bcma_device_id bgmac_bcma_tbl[] = {
 	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
@@ -908,7 +908,7 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 			     BGMAC_CHIPCTL_1_IF_TYPE_RMII;
 		char buf[2];
 
-		if (nvram_getenv("et_swtype", buf, 1) > 0) {
+		if (bcm47xx_nvram_getenv("et_swtype", buf, 1) > 0) {
 			if (kstrtou8(buf, 0, &et_swtype))
 				bgmac_err(bgmac, "Failed to parse et_swtype (%s)\n",
 					  buf);
@@ -1386,7 +1386,7 @@ static int bgmac_probe(struct bcma_device *core)
 	}
 
 	bgmac->int_mask = BGMAC_IS_ERRMASK | BGMAC_IS_RX | BGMAC_IS_TX_MASK;
-	if (nvram_getenv("et0_no_txint", NULL, 0) == 0)
+	if (bcm47xx_nvram_getenv("et0_no_txint", NULL, 0) == 0)
 		bgmac->int_mask &= ~BGMAC_IS_TX_MASK;
 
 	/* TODO: reset the external phy. Specs are needed */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index f4d2e9e3c6d5..c3f1afd86906 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2197,13 +2197,13 @@ static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 	union ixgbe_atr_input *mask = &adapter->fdir_mask;
 	struct ethtool_rx_flow_spec *fsp =
 		(struct ethtool_rx_flow_spec *)&cmd->fs;
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct ixgbe_fdir_filter *rule = NULL;
 
 	/* report total rule count */
 	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
 
-	hlist_for_each_entry_safe(rule, node, node2,
+	hlist_for_each_entry_safe(rule, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		if (fsp->location <= rule->sw_idx)
 			break;
@@ -2264,14 +2264,14 @@ static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter,
 				      struct ethtool_rxnfc *cmd,
 				      u32 *rule_locs)
 {
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct ixgbe_fdir_filter *rule;
 	int cnt = 0;
 
 	/* report total rule count */
 	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
 
-	hlist_for_each_entry_safe(rule, node, node2,
+	hlist_for_each_entry_safe(rule, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		if (cnt == cmd->rule_cnt)
 			return -EMSGSIZE;
@@ -2358,19 +2358,19 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 					   u16 sw_idx)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	struct hlist_node *node, *node2, *parent;
-	struct ixgbe_fdir_filter *rule;
+	struct hlist_node *node2;
+	struct ixgbe_fdir_filter *rule, *parent;
 	int err = -EINVAL;
 
 	parent = NULL;
 	rule = NULL;
 
-	hlist_for_each_entry_safe(rule, node, node2,
+	hlist_for_each_entry_safe(rule, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		/* hash found, or no matching entry */
 		if (rule->sw_idx >= sw_idx)
 			break;
-		parent = node;
+		parent = rule;
 	}
 
 	/* if there is an old rule occupying our place remove it */
@@ -2399,7 +2399,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 
 	/* add filter to the list */
 	if (parent)
-		hlist_add_after(parent, &input->fdir_node);
+		hlist_add_after(&parent->fdir_node, &input->fdir_node);
 	else
 		hlist_add_head(&input->fdir_node,
 			       &adapter->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 68478d6dfa2d..db5611ae407e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3891,7 +3891,7 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter)
 static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct ixgbe_fdir_filter *filter;
 
 	spin_lock(&adapter->fdir_perfect_lock);
@@ -3899,7 +3899,7 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
 	if (!hlist_empty(&adapter->fdir_filter_list))
 		ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask);
 
-	hlist_for_each_entry_safe(filter, node, node2,
+	hlist_for_each_entry_safe(filter, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		ixgbe_fdir_write_perfect_filter_82599(hw,
 				&filter->filter,
@@ -4356,12 +4356,12 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter)
 
 static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter)
 {
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct ixgbe_fdir_filter *filter;
 
 	spin_lock(&adapter->fdir_perfect_lock);
 
-	hlist_for_each_entry_safe(filter, node, node2,
+	hlist_for_each_entry_safe(filter, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		hlist_del(&filter->fdir_node);
 		kfree(filter);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 5385474bb526..bb4d8d99f36d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -225,11 +225,10 @@ static inline struct mlx4_en_filter *
 mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip,
 		    __be16 src_port, __be16 dst_port)
 {
-	struct hlist_node *elem;
 	struct mlx4_en_filter *filter;
 	struct mlx4_en_filter *ret = NULL;
 
-	hlist_for_each_entry(filter, elem,
+	hlist_for_each_entry(filter,
 			     filter_hash_bucket(priv, src_ip, dst_ip,
 						src_port, dst_port),
 			     filter_chain) {
@@ -574,13 +573,13 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv)
 
 	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
 		struct mlx4_mac_entry *entry;
-		struct hlist_node *n, *tmp;
+		struct hlist_node *tmp;
 		struct hlist_head *bucket;
 		unsigned int mac_hash;
 
 		mac_hash = priv->dev->dev_addr[MLX4_EN_MAC_HASH_IDX];
 		bucket = &priv->mac_hash[mac_hash];
-		hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) {
+		hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
 			if (ether_addr_equal_64bits(entry->mac,
 						    priv->dev->dev_addr)) {
 				en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n",
@@ -609,11 +608,11 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn,
 		struct hlist_head *bucket;
 		unsigned int mac_hash;
 		struct mlx4_mac_entry *entry;
-		struct hlist_node *n, *tmp;
+		struct hlist_node *tmp;
 		u64 prev_mac_u64 = mlx4_en_mac_to_u64(prev_mac);
 
 		bucket = &priv->mac_hash[prev_mac[MLX4_EN_MAC_HASH_IDX]];
-		hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) {
+		hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
 			if (ether_addr_equal_64bits(entry->mac, prev_mac)) {
 				mlx4_en_uc_steer_release(priv, entry->mac,
 							 qpn, entry->reg_id);
@@ -1019,7 +1018,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv,
 {
 	struct netdev_hw_addr *ha;
 	struct mlx4_mac_entry *entry;
-	struct hlist_node *n, *tmp;
+	struct hlist_node *tmp;
 	bool found;
 	u64 mac;
 	int err = 0;
@@ -1035,7 +1034,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv,
 	/* find what to remove */
 	for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) {
 		bucket = &priv->mac_hash[i];
-		hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) {
+		hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
 			found = false;
 			netdev_for_each_uc_addr(ha, dev) {
 				if (ether_addr_equal_64bits(entry->mac,
@@ -1078,7 +1077,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv,
 	netdev_for_each_uc_addr(ha, dev) {
 		found = false;
 		bucket = &priv->mac_hash[ha->addr[MLX4_EN_MAC_HASH_IDX]];
-		hlist_for_each_entry(entry, n, bucket, hlist) {
+		hlist_for_each_entry(entry, bucket, hlist) {
 			if (ether_addr_equal_64bits(entry->mac, ha->addr)) {
 				found = true;
 				break;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index ce38654bbdd0..c7f856308e1a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/mlx4/qp.h>
 #include <linux/skbuff.h>
+#include <linux/rculist.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
 #include <linux/vmalloc.h>
@@ -617,7 +618,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
 			if (is_multicast_ether_addr(ethh->h_dest)) {
 				struct mlx4_mac_entry *entry;
-				struct hlist_node *n;
 				struct hlist_head *bucket;
 				unsigned int mac_hash;
 
@@ -625,7 +625,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 				mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
 				bucket = &priv->mac_hash[mac_hash];
 				rcu_read_lock();
-				hlist_for_each_entry_rcu(entry, n, bucket, hlist) {
+				hlist_for_each_entry_rcu(entry, bucket, hlist) {
 					if (ether_addr_equal_64bits(entry->mac,
 								    ethh->h_source)) {
 						rcu_read_unlock();
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 325e11e1ce0f..f89cc7a3fe6c 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -576,7 +576,7 @@ void qlcnic_free_mac_list(struct qlcnic_adapter *adapter)
 void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_filter *tmp_fil;
-	struct hlist_node *tmp_hnode, *n;
+	struct hlist_node *n;
 	struct hlist_head *head;
 	int i;
 	unsigned long time;
@@ -584,7 +584,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter)
 
 	for (i = 0; i < adapter->fhash.fbucket_size; i++) {
 		head = &(adapter->fhash.fhead[i]);
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 			cmd =  tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL :
 						  QLCNIC_MAC_DEL;
 			time = tmp_fil->ftime;
@@ -604,7 +604,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter)
 	for (i = 0; i < adapter->rx_fhash.fbucket_size; i++) {
 		head = &(adapter->rx_fhash.fhead[i]);
 
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode)
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode)
 		{
 			time = tmp_fil->ftime;
 			if (jiffies > (QLCNIC_FILTER_AGE * HZ + time)) {
@@ -621,14 +621,14 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter)
 void qlcnic_delete_lb_filters(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_filter *tmp_fil;
-	struct hlist_node *tmp_hnode, *n;
+	struct hlist_node *n;
 	struct hlist_head *head;
 	int i;
 	u8 cmd;
 
 	for (i = 0; i < adapter->fhash.fbucket_size; i++) {
 		head = &(adapter->fhash.fhead[i]);
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 			cmd =  tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL :
 						  QLCNIC_MAC_DEL;
 			qlcnic_sre_macaddr_change(adapter,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 6387e0cc3ea9..0e630061bff3 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -162,7 +162,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb,
 {
 	struct ethhdr *phdr = (struct ethhdr *)(skb->data);
 	struct qlcnic_filter *fil, *tmp_fil;
-	struct hlist_node *tmp_hnode, *n;
+	struct hlist_node *n;
 	struct hlist_head *head;
 	unsigned long time;
 	u64 src_addr = 0;
@@ -179,7 +179,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb,
 			 (adapter->fhash.fbucket_size - 1);
 		head = &(adapter->rx_fhash.fhead[hindex]);
 
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 			if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) &&
 			    tmp_fil->vlan_id == vlan_id) {
 				time = tmp_fil->ftime;
@@ -205,7 +205,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb,
 			 (adapter->fhash.fbucket_size - 1);
 		head = &(adapter->rx_fhash.fhead[hindex]);
 		spin_lock(&adapter->rx_mac_learn_lock);
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 			if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) &&
 			    tmp_fil->vlan_id == vlan_id) {
 				found = 1;
@@ -272,7 +272,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
 			       struct sk_buff *skb)
 {
 	struct qlcnic_filter *fil, *tmp_fil;
-	struct hlist_node *tmp_hnode, *n;
+	struct hlist_node *n;
 	struct hlist_head *head;
 	struct net_device *netdev = adapter->netdev;
 	struct ethhdr *phdr = (struct ethhdr *)(skb->data);
@@ -294,7 +294,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
 	hindex = qlcnic_mac_hash(src_addr) & (adapter->fhash.fbucket_size - 1);
 	head = &(adapter->fhash.fhead[hindex]);
 
-	hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+	hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 		if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) &&
 		    tmp_fil->vlan_id == vlan_id) {
 			if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime))
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 289b4eefb42f..1df0ff3839e8 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -614,10 +614,9 @@ struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb)
 {
 	unsigned int hash = vnet_hashfn(skb->data);
 	struct hlist_head *hp = &vp->port_hash[hash];
-	struct hlist_node *n;
 	struct vnet_port *port;
 
-	hlist_for_each_entry(port, n, hp, hash) {
+	hlist_for_each_entry(port, hp, hash) {
 		if (ether_addr_equal(port->raddr, skb->data))
 			return port;
 	}
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index defcd8a85744..417b2af1aa80 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -55,9 +55,8 @@ static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
 					       const unsigned char *addr)
 {
 	struct macvlan_dev *vlan;
-	struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[addr[5]], hlist) {
+	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[addr[5]], hlist) {
 		if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr))
 			return vlan;
 	}
@@ -149,7 +148,6 @@ static void macvlan_broadcast(struct sk_buff *skb,
 {
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
-	struct hlist_node *n;
 	struct sk_buff *nskb;
 	unsigned int i;
 	int err;
@@ -159,7 +157,7 @@ static void macvlan_broadcast(struct sk_buff *skb,
 		return;
 
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
-		hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[i], hlist) {
+		hlist_for_each_entry_rcu(vlan, &port->vlan_hash[i], hlist) {
 			if (vlan->dev == src || !(vlan->mode & mode))
 				continue;
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 97243011d319..a449439bd653 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -279,28 +279,17 @@ static int macvtap_receive(struct sk_buff *skb)
 static int macvtap_get_minor(struct macvlan_dev *vlan)
 {
 	int retval = -ENOMEM;
-	int id;
 
 	mutex_lock(&minor_lock);
-	if (idr_pre_get(&minor_idr, GFP_KERNEL) == 0)
-		goto exit;
-
-	retval = idr_get_new_above(&minor_idr, vlan, 1, &id);
-	if (retval < 0) {
-		if (retval == -EAGAIN)
-			retval = -ENOMEM;
-		goto exit;
-	}
-	if (id < MACVTAP_NUM_DEVS) {
-		vlan->minor = id;
-	} else {
+	retval = idr_alloc(&minor_idr, vlan, 1, MACVTAP_NUM_DEVS, GFP_KERNEL);
+	if (retval >= 0) {
+		vlan->minor = retval;
+	} else if (retval == -ENOSPC) {
 		printk(KERN_ERR "too many macvtap devices\n");
 		retval = -EINVAL;
-		idr_remove(&minor_idr, id);
 	}
-exit:
 	mutex_unlock(&minor_lock);
-	return retval;
+	return retval < 0 ? retval : 0;
 }
 
 static void macvtap_free_minor(struct macvlan_dev *vlan)
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 3db9131e9229..72ff14b811c6 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2953,46 +2953,21 @@ static void __exit ppp_cleanup(void)
  * by holding all_ppp_mutex
  */
 
-static int __unit_alloc(struct idr *p, void *ptr, int n)
-{
-	int unit, err;
-
-again:
-	if (!idr_pre_get(p, GFP_KERNEL)) {
-		pr_err("PPP: No free memory for idr\n");
-		return -ENOMEM;
-	}
-
-	err = idr_get_new_above(p, ptr, n, &unit);
-	if (err < 0) {
-		if (err == -EAGAIN)
-			goto again;
-		return err;
-	}
-
-	return unit;
-}
-
 /* associate pointer with specified number */
 static int unit_set(struct idr *p, void *ptr, int n)
 {
 	int unit;
 
-	unit = __unit_alloc(p, ptr, n);
-	if (unit < 0)
-		return unit;
-	else if (unit != n) {
-		idr_remove(p, unit);
-		return -EINVAL;
-	}
-
+	unit = idr_alloc(p, ptr, n, n + 1, GFP_KERNEL);
+	if (unit == -ENOSPC)
+		unit = -EINVAL;
 	return unit;
 }
 
 /* get new free unit number and associate pointer with it */
 static int unit_get(struct idr *p, void *ptr)
 {
-	return __unit_alloc(p, ptr, 0);
+	return idr_alloc(p, ptr, 0, 0, GFP_KERNEL);
 }
 
 /* put unit number back to a pool */
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b6f45c5d84d5..2c6a22e278ea 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -197,9 +197,8 @@ static inline u32 tun_hashfn(u32 rxhash)
 static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
 {
 	struct tun_flow_entry *e;
-	struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(e, n, head, hash_link) {
+	hlist_for_each_entry_rcu(e, head, hash_link) {
 		if (e->rxhash == rxhash)
 			return e;
 	}
@@ -241,9 +240,9 @@ static void tun_flow_flush(struct tun_struct *tun)
 	spin_lock_bh(&tun->lock);
 	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
 		struct tun_flow_entry *e;
-		struct hlist_node *h, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link)
+		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
 			tun_flow_delete(tun, e);
 	}
 	spin_unlock_bh(&tun->lock);
@@ -256,9 +255,9 @@ static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
 	spin_lock_bh(&tun->lock);
 	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
 		struct tun_flow_entry *e;
-		struct hlist_node *h, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) {
+		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
 			if (e->queue_index == queue_index)
 				tun_flow_delete(tun, e);
 		}
@@ -279,9 +278,9 @@ static void tun_flow_cleanup(unsigned long data)
 	spin_lock_bh(&tun->lock);
 	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
 		struct tun_flow_entry *e;
-		struct hlist_node *h, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) {
+		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
 			unsigned long this_timer;
 			count++;
 			this_timer = e->updated + delay;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f736823f8437..f10e58ac9c1b 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -145,9 +145,8 @@ static inline struct hlist_head *vni_head(struct net *net, u32 id)
 static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id)
 {
 	struct vxlan_dev *vxlan;
-	struct hlist_node *node;
 
-	hlist_for_each_entry_rcu(vxlan, node, vni_head(net, id), hlist) {
+	hlist_for_each_entry_rcu(vxlan, vni_head(net, id), hlist) {
 		if (vxlan->vni == id)
 			return vxlan;
 	}
@@ -292,9 +291,8 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
 {
 	struct hlist_head *head = vxlan_fdb_head(vxlan, mac);
 	struct vxlan_fdb *f;
-	struct hlist_node *node;
 
-	hlist_for_each_entry_rcu(f, node, head, hlist) {
+	hlist_for_each_entry_rcu(f, head, hlist) {
 		if (compare_ether_addr(mac, f->eth_addr) == 0)
 			return f;
 	}
@@ -422,10 +420,9 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 
 	for (h = 0; h < FDB_HASH_SIZE; ++h) {
 		struct vxlan_fdb *f;
-		struct hlist_node *n;
 		int err;
 
-		hlist_for_each_entry_rcu(f, n, &vxlan->fdb_head[h], hlist) {
+		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
 			if (idx < cb->args[0])
 				goto skip;
 
@@ -483,11 +480,10 @@ static bool vxlan_group_used(struct vxlan_net *vn,
 			     const struct vxlan_dev *this)
 {
 	const struct vxlan_dev *vxlan;
-	struct hlist_node *node;
 	unsigned h;
 
 	for (h = 0; h < VNI_HASH_SIZE; ++h)
-		hlist_for_each_entry(vxlan, node, &vn->vni_list[h], hlist) {
+		hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist) {
 			if (vxlan == this)
 				continue;
 
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 48273dd05b63..4941f201d6c8 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -309,7 +309,6 @@ static void zd1201_usbrx(struct urb *urb)
 	if (data[urb->actual_length-1] == ZD1201_PACKET_RXDATA) {
 		int datalen = urb->actual_length-1;
 		unsigned short len, fc, seq;
-		struct hlist_node *node;
 
 		len = ntohs(*(__be16 *)&data[datalen-2]);
 		if (len>datalen)
@@ -362,7 +361,7 @@ static void zd1201_usbrx(struct urb *urb)
 				hlist_add_head(&frag->fnode, &zd->fraglist);
 				goto resubmit;
 			}
-			hlist_for_each_entry(frag, node, &zd->fraglist, fnode)
+			hlist_for_each_entry(frag, &zd->fraglist, fnode)
 				if (frag->seq == (seq&IEEE80211_SCTL_SEQ))
 					break;
 			if (!frag)
@@ -1831,14 +1830,14 @@ err_zd:
 static void zd1201_disconnect(struct usb_interface *interface)
 {
 	struct zd1201 *zd = usb_get_intfdata(interface);
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct zd1201_frag *frag;
 
 	if (!zd)
 		return;
 	usb_set_intfdata(interface, NULL);
 
-	hlist_for_each_entry_safe(frag, node, node2, &zd->fraglist, fnode) {
+	hlist_for_each_entry_safe(frag, node2, &zd->fraglist, fnode) {
 		hlist_del_init(&frag->fnode);
 		kfree_skb(frag->skb);
 		kfree(frag);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 924e4665bd57..b099e0025d2b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -842,9 +842,8 @@ static struct pci_cap_saved_state *pci_find_saved_cap(
 	struct pci_dev *pci_dev, char cap)
 {
 	struct pci_cap_saved_state *tmp;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) {
+	hlist_for_each_entry(tmp, &pci_dev->saved_cap_space, next) {
 		if (tmp->cap.cap_nr == cap)
 			return tmp;
 	}
@@ -1041,7 +1040,6 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
 	struct pci_saved_state *state;
 	struct pci_cap_saved_state *tmp;
 	struct pci_cap_saved_data *cap;
-	struct hlist_node *pos;
 	size_t size;
 
 	if (!dev->state_saved)
@@ -1049,7 +1047,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
 
 	size = sizeof(*state) + sizeof(struct pci_cap_saved_data);
 
-	hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next)
+	hlist_for_each_entry(tmp, &dev->saved_cap_space, next)
 		size += sizeof(struct pci_cap_saved_data) + tmp->cap.size;
 
 	state = kzalloc(size, GFP_KERNEL);
@@ -1060,7 +1058,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
 	       sizeof(state->config_space));
 
 	cap = state->cap;
-	hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) {
+	hlist_for_each_entry(tmp, &dev->saved_cap_space, next) {
 		size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size;
 		memcpy(cap, &tmp->cap, len);
 		cap = (struct pci_cap_saved_data *)((u8 *)cap + len);
@@ -2038,9 +2036,9 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
 void pci_free_cap_save_buffers(struct pci_dev *dev)
 {
 	struct pci_cap_saved_state *tmp;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 
-	hlist_for_each_entry_safe(tmp, pos, n, &dev->saved_cap_space, next)
+	hlist_for_each_entry_safe(tmp, n, &dev->saved_cap_space, next)
 		kfree(tmp);
 }
 
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 7ab0b2fba503..3338437b559b 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -79,6 +79,17 @@ config ASUS_LAPTOP
 
 	  If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
+config CHROMEOS_LAPTOP
+	tristate "Chrome OS Laptop"
+	depends on I2C
+	depends on DMI
+	---help---
+	  This driver instantiates i2c and smbus devices such as
+	  light sensors and touchpads.
+
+	  If you have a supported Chromebook, choose Y or M here.
+	  The module will be called chromeos_laptop.
+
 config DELL_LAPTOP
 	tristate "Dell Laptop Extras"
 	depends on X86
@@ -288,9 +299,11 @@ config IDEAPAD_LAPTOP
 	depends on ACPI
 	depends on RFKILL && INPUT
 	depends on SERIO_I8042
+	depends on BACKLIGHT_CLASS_DEVICE
 	select INPUT_SPARSEKMAP
 	help
-	  This is a driver for the rfkill switches on Lenovo IdeaPad netbooks.
+	  This is a driver for Lenovo IdeaPad netbooks contains drivers for
+	  rfkill switch, hotkey, fan control and backlight control.
 
 config THINKPAD_ACPI
 	tristate "ThinkPad ACPI Laptop Extras"
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index bf7e4f935b17..ace2b38942fe 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -50,3 +50,4 @@ obj-$(CONFIG_INTEL_MID_POWER_BUTTON)	+= intel_mid_powerbtn.o
 obj-$(CONFIG_INTEL_OAKTRAIL)	+= intel_oaktrail.o
 obj-$(CONFIG_SAMSUNG_Q10)	+= samsung-q10.o
 obj-$(CONFIG_APPLE_GMUX)	+= apple-gmux.o
+obj-$(CONFIG_CHROMEOS_LAPTOP)	+= chromeos_laptop.o
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index afed7018a2b5..c9076bdaf2c1 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -511,6 +511,24 @@ static struct dmi_system_id acer_quirks[] = {
 		},
 		.driver_data = &quirk_fujitsu_amilo_li_1718,
 	},
+	{
+		.callback = dmi_matched,
+		.ident = "Lenovo Ideapad S205-10382JG",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "10382JG"),
+		},
+		.driver_data = &quirk_lenovo_ideapad_s205,
+	},
+	{
+		.callback = dmi_matched,
+		.ident = "Lenovo Ideapad S205-1038DPG",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "1038DPG"),
+		},
+		.driver_data = &quirk_lenovo_ideapad_s205,
+	},
 	{}
 };
 
@@ -1204,6 +1222,9 @@ static acpi_status WMID_set_capabilities(void)
 			devices = *((u32 *) obj->buffer.pointer);
 		} else if (obj->type == ACPI_TYPE_INTEGER) {
 			devices = (u32) obj->integer.value;
+		} else {
+			kfree(out.pointer);
+			return AE_ERROR;
 		}
 	} else {
 		kfree(out.pointer);
diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index d9f9a0dbc6f3..0eea09c1c134 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -128,10 +128,12 @@ MODULE_PARM_DESC(als_status, "Set the ALS status on boot "
 /*
  * Some events we use, same for all Asus
  */
-#define ATKD_BR_UP	0x10	/* (event & ~ATKD_BR_UP) = brightness level */
-#define ATKD_BR_DOWN	0x20	/* (event & ~ATKD_BR_DOWN) = britghness level */
-#define ATKD_BR_MIN	ATKD_BR_UP
-#define ATKD_BR_MAX	(ATKD_BR_DOWN | 0xF)	/* 0x2f */
+#define ATKD_BRNUP_MIN		0x10
+#define ATKD_BRNUP_MAX		0x1f
+#define ATKD_BRNDOWN_MIN	0x20
+#define ATKD_BRNDOWN_MAX	0x2f
+#define ATKD_BRNDOWN		0x20
+#define ATKD_BRNUP		0x2f
 #define ATKD_LCD_ON	0x33
 #define ATKD_LCD_OFF	0x34
 
@@ -301,40 +303,65 @@ static const struct key_entry asus_keymap[] = {
 	{KE_KEY, 0x17, { KEY_ZOOM } },
 	{KE_KEY, 0x1f, { KEY_BATTERY } },
 	/* End of Lenovo SL Specific keycodes */
+	{KE_KEY, ATKD_BRNDOWN, { KEY_BRIGHTNESSDOWN } },
+	{KE_KEY, ATKD_BRNUP, { KEY_BRIGHTNESSUP } },
 	{KE_KEY, 0x30, { KEY_VOLUMEUP } },
 	{KE_KEY, 0x31, { KEY_VOLUMEDOWN } },
 	{KE_KEY, 0x32, { KEY_MUTE } },
-	{KE_KEY, 0x33, { KEY_SWITCHVIDEOMODE } },
-	{KE_KEY, 0x34, { KEY_SWITCHVIDEOMODE } },
+	{KE_KEY, 0x33, { KEY_DISPLAYTOGGLE } }, /* LCD on */
+	{KE_KEY, 0x34, { KEY_DISPLAY_OFF } }, /* LCD off */
 	{KE_KEY, 0x40, { KEY_PREVIOUSSONG } },
 	{KE_KEY, 0x41, { KEY_NEXTSONG } },
-	{KE_KEY, 0x43, { KEY_STOPCD } },
+	{KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */
 	{KE_KEY, 0x45, { KEY_PLAYPAUSE } },
-	{KE_KEY, 0x4c, { KEY_MEDIA } },
+	{KE_KEY, 0x4c, { KEY_MEDIA } }, /* WMP Key */
 	{KE_KEY, 0x50, { KEY_EMAIL } },
 	{KE_KEY, 0x51, { KEY_WWW } },
 	{KE_KEY, 0x55, { KEY_CALC } },
+	{KE_IGNORE, 0x57, },  /* Battery mode */
+	{KE_IGNORE, 0x58, },  /* AC mode */
 	{KE_KEY, 0x5C, { KEY_SCREENLOCK } },  /* Screenlock */
-	{KE_KEY, 0x5D, { KEY_WLAN } },
-	{KE_KEY, 0x5E, { KEY_WLAN } },
-	{KE_KEY, 0x5F, { KEY_WLAN } },
-	{KE_KEY, 0x60, { KEY_SWITCHVIDEOMODE } },
-	{KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } },
-	{KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } },
-	{KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } },
-	{KE_KEY, 0x6B, { KEY_F13 } }, /* Lock Touchpad */
+	{KE_KEY, 0x5D, { KEY_WLAN } }, /* WLAN Toggle */
+	{KE_KEY, 0x5E, { KEY_WLAN } }, /* WLAN Enable */
+	{KE_KEY, 0x5F, { KEY_WLAN } }, /* WLAN Disable */
+	{KE_KEY, 0x60, { KEY_TOUCHPAD_ON } },
+	{KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD only */
+	{KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT only */
+	{KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT */
+	{KE_KEY, 0x64, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV */
+	{KE_KEY, 0x65, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV */
+	{KE_KEY, 0x66, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV */
+	{KE_KEY, 0x67, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV */
+	{KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } }, /* Lock Touchpad */
 	{KE_KEY, 0x6C, { KEY_SLEEP } }, /* Suspend */
 	{KE_KEY, 0x6D, { KEY_SLEEP } }, /* Hibernate */
-	{KE_KEY, 0x7E, { KEY_BLUETOOTH } },
-	{KE_KEY, 0x7D, { KEY_BLUETOOTH } },
+	{KE_IGNORE, 0x6E, },  /* Low Battery notification */
+	{KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */
+	{KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */
 	{KE_KEY, 0x82, { KEY_CAMERA } },
-	{KE_KEY, 0x88, { KEY_WLAN  } },
-	{KE_KEY, 0x8A, { KEY_PROG1 } },
+	{KE_KEY, 0x88, { KEY_RFKILL  } }, /* Radio Toggle Key */
+	{KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */
+	{KE_KEY, 0x8C, { KEY_SWITCHVIDEOMODE } }, /* SDSP DVI only */
+	{KE_KEY, 0x8D, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + DVI */
+	{KE_KEY, 0x8E, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + DVI */
+	{KE_KEY, 0x8F, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + DVI */
+	{KE_KEY, 0x90, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + DVI */
+	{KE_KEY, 0x91, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + DVI */
+	{KE_KEY, 0x92, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + DVI */
+	{KE_KEY, 0x93, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + DVI */
 	{KE_KEY, 0x95, { KEY_MEDIA } },
 	{KE_KEY, 0x99, { KEY_PHONE } },
-	{KE_KEY, 0xc4, { KEY_KBDILLUMUP } },
-	{KE_KEY, 0xc5, { KEY_KBDILLUMDOWN } },
-	{KE_KEY, 0xb5, { KEY_CALC } },
+	{KE_KEY, 0xA0, { KEY_SWITCHVIDEOMODE } }, /* SDSP HDMI only */
+	{KE_KEY, 0xA1, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + HDMI */
+	{KE_KEY, 0xA2, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + HDMI */
+	{KE_KEY, 0xA3, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + HDMI */
+	{KE_KEY, 0xA4, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + HDMI */
+	{KE_KEY, 0xA5, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + HDMI */
+	{KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */
+	{KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */
+	{KE_KEY, 0xB5, { KEY_CALC } },
+	{KE_KEY, 0xC4, { KEY_KBDILLUMUP } },
+	{KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } },
 	{KE_END, 0},
 };
 
@@ -1521,15 +1548,19 @@ static void asus_acpi_notify(struct acpi_device *device, u32 event)
 					dev_name(&asus->device->dev), event,
 					count);
 
-	/* Brightness events are special */
-	if (event >= ATKD_BR_MIN && event <= ATKD_BR_MAX) {
+	if (event >= ATKD_BRNUP_MIN && event <= ATKD_BRNUP_MAX)
+		event = ATKD_BRNUP;
+	else if (event >= ATKD_BRNDOWN_MIN &&
+		 event <= ATKD_BRNDOWN_MAX)
+		event = ATKD_BRNDOWN;
 
-		/* Ignore them completely if the acpi video driver is used */
+	/* Brightness events are special */
+	if (event == ATKD_BRNDOWN || event == ATKD_BRNUP) {
 		if (asus->backlight_device != NULL) {
 			/* Update the backlight device. */
 			asus_backlight_notify(asus);
+			return ;
 		}
-		return ;
 	}
 
 	/* Accelerometer "coarse orientation change" event */
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index be790402e0f1..210b5b872125 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -59,6 +59,17 @@ static struct quirk_entry quirk_asus_unknown = {
 	.wapf = 0,
 };
 
+/*
+ * For those machines that need software to control bt/wifi status
+ * and can't adjust brightness through ACPI interface
+ * and have duplicate events(ACPI and WMI) for display toggle
+ */
+static struct quirk_entry quirk_asus_x55u = {
+	.wapf = 4,
+	.wmi_backlight_power = true,
+	.no_display_toggle = true,
+};
+
 static struct quirk_entry quirk_asus_x401u = {
 	.wapf = 4,
 };
@@ -77,6 +88,15 @@ static struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "X401U"),
 		},
+		.driver_data = &quirk_asus_x55u,
+	},
+	{
+		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. X401A",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X401A"),
+		},
 		.driver_data = &quirk_asus_x401u,
 	},
 	{
@@ -95,6 +115,15 @@ static struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "X501U"),
 		},
+		.driver_data = &quirk_asus_x55u,
+	},
+	{
+		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. X501A",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X501A"),
+		},
 		.driver_data = &quirk_asus_x401u,
 	},
 	{
@@ -131,7 +160,7 @@ static struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "X55U"),
 		},
-		.driver_data = &quirk_asus_x401u,
+		.driver_data = &quirk_asus_x55u,
 	},
 	{
 		.callback = dmi_matched,
@@ -161,6 +190,8 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver)
 }
 
 static const struct key_entry asus_nb_wmi_keymap[] = {
+	{ KE_KEY, ASUS_WMI_BRN_DOWN, { KEY_BRIGHTNESSDOWN } },
+	{ KE_KEY, ASUS_WMI_BRN_UP, { KEY_BRIGHTNESSUP } },
 	{ KE_KEY, 0x30, { KEY_VOLUMEUP } },
 	{ KE_KEY, 0x31, { KEY_VOLUMEDOWN } },
 	{ KE_KEY, 0x32, { KEY_MUTE } },
@@ -168,9 +199,9 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
 	{ KE_KEY, 0x34, { KEY_DISPLAY_OFF } }, /* LCD off */
 	{ KE_KEY, 0x40, { KEY_PREVIOUSSONG } },
 	{ KE_KEY, 0x41, { KEY_NEXTSONG } },
-	{ KE_KEY, 0x43, { KEY_STOPCD } },
+	{ KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */
 	{ KE_KEY, 0x45, { KEY_PLAYPAUSE } },
-	{ KE_KEY, 0x4c, { KEY_MEDIA } },
+	{ KE_KEY, 0x4c, { KEY_MEDIA } }, /* WMP Key */
 	{ KE_KEY, 0x50, { KEY_EMAIL } },
 	{ KE_KEY, 0x51, { KEY_WWW } },
 	{ KE_KEY, 0x55, { KEY_CALC } },
@@ -180,25 +211,42 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
 	{ KE_KEY, 0x5D, { KEY_WLAN } }, /* Wireless console Toggle */
 	{ KE_KEY, 0x5E, { KEY_WLAN } }, /* Wireless console Enable */
 	{ KE_KEY, 0x5F, { KEY_WLAN } }, /* Wireless console Disable */
-	{ KE_KEY, 0x60, { KEY_SWITCHVIDEOMODE } },
-	{ KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } },
-	{ KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } },
-	{ KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } },
+	{ KE_KEY, 0x60, { KEY_TOUCHPAD_ON } },
+	{ KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD only */
+	{ KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT only */
+	{ KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT */
+	{ KE_KEY, 0x64, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV */
+	{ KE_KEY, 0x65, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV */
+	{ KE_KEY, 0x66, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV */
+	{ KE_KEY, 0x67, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV */
 	{ KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } },
-	{ KE_KEY, 0x7D, { KEY_BLUETOOTH } },
-	{ KE_KEY, 0x7E, { KEY_BLUETOOTH } },
+	{ KE_IGNORE, 0x6E, },  /* Low Battery notification */
+	{ KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */
+	{ KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */
 	{ KE_KEY, 0x82, { KEY_CAMERA } },
-	{ KE_KEY, 0x88, { KEY_RFKILL  } },
-	{ KE_KEY, 0x8A, { KEY_PROG1 } },
+	{ KE_KEY, 0x88, { KEY_RFKILL  } }, /* Radio Toggle Key */
+	{ KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */
+	{ KE_KEY, 0x8C, { KEY_SWITCHVIDEOMODE } }, /* SDSP DVI only */
+	{ KE_KEY, 0x8D, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + DVI */
+	{ KE_KEY, 0x8E, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + DVI */
+	{ KE_KEY, 0x8F, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + DVI */
+	{ KE_KEY, 0x90, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + DVI */
+	{ KE_KEY, 0x91, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + DVI */
+	{ KE_KEY, 0x92, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + DVI */
+	{ KE_KEY, 0x93, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + DVI */
 	{ KE_KEY, 0x95, { KEY_MEDIA } },
 	{ KE_KEY, 0x99, { KEY_PHONE } },
 	{ KE_KEY, 0xA0, { KEY_SWITCHVIDEOMODE } }, /* SDSP HDMI only */
 	{ KE_KEY, 0xA1, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + HDMI */
 	{ KE_KEY, 0xA2, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + HDMI */
 	{ KE_KEY, 0xA3, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + HDMI */
-	{ KE_KEY, 0xb5, { KEY_CALC } },
-	{ KE_KEY, 0xc4, { KEY_KBDILLUMUP } },
-	{ KE_KEY, 0xc5, { KEY_KBDILLUMDOWN } },
+	{ KE_KEY, 0xA4, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + HDMI */
+	{ KE_KEY, 0xA5, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + HDMI */
+	{ KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */
+	{ KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */
+	{ KE_KEY, 0xB5, { KEY_CALC } },
+	{ KE_KEY, 0xC4, { KEY_KBDILLUMUP } },
+	{ KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } },
 	{ KE_END, 0},
 };
 
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index f80ae4d10f68..c11b2426dac1 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -187,6 +187,8 @@ struct asus_wmi {
 	struct device *hwmon_device;
 	struct platform_device *platform_device;
 
+	struct led_classdev wlan_led;
+	int wlan_led_wk;
 	struct led_classdev tpd_led;
 	int tpd_led_wk;
 	struct led_classdev kbd_led;
@@ -194,6 +196,7 @@ struct asus_wmi {
 	struct workqueue_struct *led_workqueue;
 	struct work_struct tpd_led_work;
 	struct work_struct kbd_led_work;
+	struct work_struct wlan_led_work;
 
 	struct asus_rfkill wlan;
 	struct asus_rfkill bluetooth;
@@ -456,12 +459,65 @@ static enum led_brightness kbd_led_get(struct led_classdev *led_cdev)
 	return value;
 }
 
+static int wlan_led_unknown_state(struct asus_wmi *asus)
+{
+	u32 result;
+
+	asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WIRELESS_LED, &result);
+
+	return result & ASUS_WMI_DSTS_UNKNOWN_BIT;
+}
+
+static int wlan_led_presence(struct asus_wmi *asus)
+{
+	u32 result;
+
+	asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WIRELESS_LED, &result);
+
+	return result & ASUS_WMI_DSTS_PRESENCE_BIT;
+}
+
+static void wlan_led_update(struct work_struct *work)
+{
+	int ctrl_param;
+	struct asus_wmi *asus;
+
+	asus = container_of(work, struct asus_wmi, wlan_led_work);
+
+	ctrl_param = asus->wlan_led_wk;
+	asus_wmi_set_devstate(ASUS_WMI_DEVID_WIRELESS_LED, ctrl_param, NULL);
+}
+
+static void wlan_led_set(struct led_classdev *led_cdev,
+			 enum led_brightness value)
+{
+	struct asus_wmi *asus;
+
+	asus = container_of(led_cdev, struct asus_wmi, wlan_led);
+
+	asus->wlan_led_wk = !!value;
+	queue_work(asus->led_workqueue, &asus->wlan_led_work);
+}
+
+static enum led_brightness wlan_led_get(struct led_classdev *led_cdev)
+{
+	struct asus_wmi *asus;
+	u32 result;
+
+	asus = container_of(led_cdev, struct asus_wmi, wlan_led);
+	asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WIRELESS_LED, &result);
+
+	return result & ASUS_WMI_DSTS_BRIGHTNESS_MASK;
+}
+
 static void asus_wmi_led_exit(struct asus_wmi *asus)
 {
 	if (!IS_ERR_OR_NULL(asus->kbd_led.dev))
 		led_classdev_unregister(&asus->kbd_led);
 	if (!IS_ERR_OR_NULL(asus->tpd_led.dev))
 		led_classdev_unregister(&asus->tpd_led);
+	if (!IS_ERR_OR_NULL(asus->wlan_led.dev))
+		led_classdev_unregister(&asus->wlan_led);
 	if (asus->led_workqueue)
 		destroy_workqueue(asus->led_workqueue);
 }
@@ -498,6 +554,23 @@ static int asus_wmi_led_init(struct asus_wmi *asus)
 
 		rv = led_classdev_register(&asus->platform_device->dev,
 					   &asus->kbd_led);
+		if (rv)
+			goto error;
+	}
+
+	if (wlan_led_presence(asus)) {
+		INIT_WORK(&asus->wlan_led_work, wlan_led_update);
+
+		asus->wlan_led.name = "asus::wlan";
+		asus->wlan_led.brightness_set = wlan_led_set;
+		if (!wlan_led_unknown_state(asus))
+			asus->wlan_led.brightness_get = wlan_led_get;
+		asus->wlan_led.flags = LED_CORE_SUSPENDRESUME;
+		asus->wlan_led.max_brightness = 1;
+		asus->wlan_led.default_trigger = "asus-wlan";
+
+		rv = led_classdev_register(&asus->platform_device->dev,
+					   &asus->wlan_led);
 	}
 
 error:
@@ -813,6 +886,9 @@ static int asus_new_rfkill(struct asus_wmi *asus,
 	if (!*rfkill)
 		return -EINVAL;
 
+	if (dev_id == ASUS_WMI_DEVID_WLAN)
+		rfkill_set_led_trigger_name(*rfkill, "asus-wlan");
+
 	rfkill_init_sw_state(*rfkill, !result);
 	result = rfkill_register(*rfkill);
 	if (result) {
@@ -1265,6 +1341,18 @@ static void asus_wmi_backlight_exit(struct asus_wmi *asus)
 	asus->backlight_device = NULL;
 }
 
+static int is_display_toggle(int code)
+{
+	/* display toggle keys */
+	if ((code >= 0x61 && code <= 0x67) ||
+	    (code >= 0x8c && code <= 0x93) ||
+	    (code >= 0xa0 && code <= 0xa7) ||
+	    (code >= 0xd0 && code <= 0xd5))
+		return 1;
+
+	return 0;
+}
+
 static void asus_wmi_notify(u32 value, void *context)
 {
 	struct asus_wmi *asus = context;
@@ -1298,16 +1386,24 @@ static void asus_wmi_notify(u32 value, void *context)
 	}
 
 	if (code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNUP_MAX)
-		code = NOTIFY_BRNUP_MIN;
+		code = ASUS_WMI_BRN_UP;
 	else if (code >= NOTIFY_BRNDOWN_MIN &&
 		 code <= NOTIFY_BRNDOWN_MAX)
-		code = NOTIFY_BRNDOWN_MIN;
+		code = ASUS_WMI_BRN_DOWN;
 
-	if (code == NOTIFY_BRNUP_MIN || code == NOTIFY_BRNDOWN_MIN) {
-		if (!acpi_video_backlight_support())
+	if (code == ASUS_WMI_BRN_DOWN || code == ASUS_WMI_BRN_UP) {
+		if (!acpi_video_backlight_support()) {
 			asus_wmi_backlight_notify(asus, orig_code);
-	} else if (!sparse_keymap_report_event(asus->inputdev, code,
-					       key_value, autorelease))
+			goto exit;
+		}
+	}
+
+	if (is_display_toggle(code) &&
+	    asus->driver->quirks->no_display_toggle)
+		goto exit;
+
+	if (!sparse_keymap_report_event(asus->inputdev, code,
+					key_value, autorelease))
 		pr_info("Unknown key %x pressed\n", code);
 
 exit:
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
index 4c9bd38bb0a2..4da4c8bafe70 100644
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -30,6 +30,8 @@
 #include <linux/platform_device.h>
 
 #define ASUS_WMI_KEY_IGNORE (-1)
+#define ASUS_WMI_BRN_DOWN	0x20
+#define ASUS_WMI_BRN_UP		0x2f
 
 struct module;
 struct key_entry;
@@ -41,6 +43,13 @@ struct quirk_entry {
 	bool store_backlight_power;
 	bool wmi_backlight_power;
 	int wapf;
+	/*
+	 * For machines with AMD graphic chips, it will send out WMI event
+	 * and ACPI interrupt at the same time while hitting the hotkey.
+	 * To simplify the problem, we just have to ignore the WMI event,
+	 * and let the ACPI interrupt to send out the key event.
+	 */
+	int no_display_toggle;
 };
 
 struct asus_wmi_driver {
diff --git a/drivers/platform/x86/chromeos_laptop.c b/drivers/platform/x86/chromeos_laptop.c
new file mode 100644
index 000000000000..93d66809355a
--- /dev/null
+++ b/drivers/platform/x86/chromeos_laptop.c
@@ -0,0 +1,371 @@
+/*
+ *  chromeos_laptop.c - Driver to instantiate Chromebook i2c/smbus devices.
+ *
+ *  Author : Benson Leung <bleung@chromium.org>
+ *
+ *  Copyright (C) 2012 Google, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/dmi.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+
+#define ATMEL_TP_I2C_ADDR	0x4b
+#define ATMEL_TP_I2C_BL_ADDR	0x25
+#define ATMEL_TS_I2C_ADDR	0x4a
+#define ATMEL_TS_I2C_BL_ADDR	0x26
+#define CYAPA_TP_I2C_ADDR	0x67
+#define ISL_ALS_I2C_ADDR	0x44
+#define TAOS_ALS_I2C_ADDR	0x29
+
+static struct i2c_client *als;
+static struct i2c_client *tp;
+static struct i2c_client *ts;
+
+const char *i2c_adapter_names[] = {
+	"SMBus I801 adapter",
+	"i915 gmbus vga",
+	"i915 gmbus panel",
+};
+
+/* Keep this enum consistent with i2c_adapter_names */
+enum i2c_adapter_type {
+	I2C_ADAPTER_SMBUS = 0,
+	I2C_ADAPTER_VGADDC,
+	I2C_ADAPTER_PANEL,
+};
+
+static struct i2c_board_info __initdata cyapa_device = {
+	I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR),
+	.flags		= I2C_CLIENT_WAKE,
+};
+
+static struct i2c_board_info __initdata isl_als_device = {
+	I2C_BOARD_INFO("isl29018", ISL_ALS_I2C_ADDR),
+};
+
+static struct i2c_board_info __initdata tsl2583_als_device = {
+	I2C_BOARD_INFO("tsl2583", TAOS_ALS_I2C_ADDR),
+};
+
+static struct i2c_board_info __initdata tsl2563_als_device = {
+	I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR),
+};
+
+static struct i2c_board_info __initdata atmel_224s_tp_device = {
+	I2C_BOARD_INFO("atmel_mxt_tp", ATMEL_TP_I2C_ADDR),
+	.platform_data = NULL,
+	.flags		= I2C_CLIENT_WAKE,
+};
+
+static struct i2c_board_info __initdata atmel_1664s_device = {
+	I2C_BOARD_INFO("atmel_mxt_ts", ATMEL_TS_I2C_ADDR),
+	.platform_data = NULL,
+	.flags		= I2C_CLIENT_WAKE,
+};
+
+static struct i2c_client __init *__add_probed_i2c_device(
+		const char *name,
+		int bus,
+		struct i2c_board_info *info,
+		const unsigned short *addrs)
+{
+	const struct dmi_device *dmi_dev;
+	const struct dmi_dev_onboard *dev_data;
+	struct i2c_adapter *adapter;
+	struct i2c_client *client;
+
+	if (bus < 0)
+		return NULL;
+	/*
+	 * If a name is specified, look for irq platform information stashed
+	 * in DMI_DEV_TYPE_DEV_ONBOARD by the Chrome OS custom system firmware.
+	 */
+	if (name) {
+		dmi_dev = dmi_find_device(DMI_DEV_TYPE_DEV_ONBOARD, name, NULL);
+		if (!dmi_dev) {
+			pr_err("%s failed to dmi find device %s.\n",
+			       __func__,
+			       name);
+			return NULL;
+		}
+		dev_data = (struct dmi_dev_onboard *)dmi_dev->device_data;
+		if (!dev_data) {
+			pr_err("%s failed to get data from dmi for %s.\n",
+			       __func__, name);
+			return NULL;
+		}
+		info->irq = dev_data->instance;
+	}
+
+	adapter = i2c_get_adapter(bus);
+	if (!adapter) {
+		pr_err("%s failed to get i2c adapter %d.\n", __func__, bus);
+		return NULL;
+	}
+
+	/* add the i2c device */
+	client = i2c_new_probed_device(adapter, info, addrs, NULL);
+	if (!client)
+		pr_err("%s failed to register device %d-%02x\n",
+		       __func__, bus, info->addr);
+	else
+		pr_debug("%s added i2c device %d-%02x\n",
+			 __func__, bus, info->addr);
+
+	i2c_put_adapter(adapter);
+	return client;
+}
+
+static int __init __find_i2c_adap(struct device *dev, void *data)
+{
+	const char *name = data;
+	static const char *prefix = "i2c-";
+	struct i2c_adapter *adapter;
+	if (strncmp(dev_name(dev), prefix, strlen(prefix)) != 0)
+		return 0;
+	adapter = to_i2c_adapter(dev);
+	return (strncmp(adapter->name, name, strlen(name)) == 0);
+}
+
+static int __init find_i2c_adapter_num(enum i2c_adapter_type type)
+{
+	struct device *dev = NULL;
+	struct i2c_adapter *adapter;
+	const char *name = i2c_adapter_names[type];
+	/* find the adapter by name */
+	dev = bus_find_device(&i2c_bus_type, NULL, (void *)name,
+			      __find_i2c_adap);
+	if (!dev) {
+		pr_err("%s: i2c adapter %s not found on system.\n", __func__,
+		       name);
+		return -ENODEV;
+	}
+	adapter = to_i2c_adapter(dev);
+	return adapter->nr;
+}
+
+/*
+ * Takes a list of addresses in addrs as such :
+ * { addr1, ... , addrn, I2C_CLIENT_END };
+ * add_probed_i2c_device will use i2c_new_probed_device
+ * and probe for devices at all of the addresses listed.
+ * Returns NULL if no devices found.
+ * See Documentation/i2c/instantiating-devices for more information.
+ */
+static __init struct i2c_client *add_probed_i2c_device(
+		const char *name,
+		enum i2c_adapter_type type,
+		struct i2c_board_info *info,
+		const unsigned short *addrs)
+{
+	return __add_probed_i2c_device(name,
+				       find_i2c_adapter_num(type),
+				       info,
+				       addrs);
+}
+
+/*
+ * Probes for a device at a single address, the one provided by
+ * info->addr.
+ * Returns NULL if no device found.
+ */
+static __init struct i2c_client *add_i2c_device(const char *name,
+						enum i2c_adapter_type type,
+						struct i2c_board_info *info)
+{
+	const unsigned short addr_list[] = { info->addr, I2C_CLIENT_END };
+	return __add_probed_i2c_device(name,
+				       find_i2c_adapter_num(type),
+				       info,
+				       addr_list);
+}
+
+
+static struct i2c_client __init *add_smbus_device(const char *name,
+						  struct i2c_board_info *info)
+{
+	return add_i2c_device(name, I2C_ADAPTER_SMBUS, info);
+}
+
+static int __init setup_cyapa_smbus_tp(const struct dmi_system_id *id)
+{
+	/* add cyapa touchpad on smbus */
+	tp = add_smbus_device("trackpad", &cyapa_device);
+	return 0;
+}
+
+static int __init setup_atmel_224s_tp(const struct dmi_system_id *id)
+{
+	const unsigned short addr_list[] = { ATMEL_TP_I2C_BL_ADDR,
+					     ATMEL_TP_I2C_ADDR,
+					     I2C_CLIENT_END };
+
+	/* add atmel mxt touchpad on VGA DDC GMBus */
+	tp = add_probed_i2c_device("trackpad", I2C_ADAPTER_VGADDC,
+				   &atmel_224s_tp_device, addr_list);
+	return 0;
+}
+
+static int __init setup_atmel_1664s_ts(const struct dmi_system_id *id)
+{
+	const unsigned short addr_list[] = { ATMEL_TS_I2C_BL_ADDR,
+					     ATMEL_TS_I2C_ADDR,
+					     I2C_CLIENT_END };
+
+	/* add atmel mxt touch device on PANEL GMBus */
+	ts = add_probed_i2c_device("touchscreen", I2C_ADAPTER_PANEL,
+				   &atmel_1664s_device, addr_list);
+	return 0;
+}
+
+
+static int __init setup_isl29018_als(const struct dmi_system_id *id)
+{
+	/* add isl29018 light sensor */
+	als = add_smbus_device("lightsensor", &isl_als_device);
+	return 0;
+}
+
+static int __init setup_isl29023_als(const struct dmi_system_id *id)
+{
+	/* add isl29023 light sensor on Panel GMBus */
+	als = add_i2c_device("lightsensor", I2C_ADAPTER_PANEL,
+			     &isl_als_device);
+	return 0;
+}
+
+static int __init setup_tsl2583_als(const struct dmi_system_id *id)
+{
+	/* add tsl2583 light sensor on smbus */
+	als = add_smbus_device(NULL, &tsl2583_als_device);
+	return 0;
+}
+
+static int __init setup_tsl2563_als(const struct dmi_system_id *id)
+{
+	/* add tsl2563 light sensor on smbus */
+	als = add_smbus_device(NULL, &tsl2563_als_device);
+	return 0;
+}
+
+static struct dmi_system_id __initdata chromeos_laptop_dmi_table[] = {
+	{
+		.ident = "Samsung Series 5 550 - Touchpad",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Lumpy"),
+		},
+		.callback = setup_cyapa_smbus_tp,
+	},
+	{
+		.ident = "Chromebook Pixel - Touchscreen",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Link"),
+		},
+		.callback = setup_atmel_1664s_ts,
+	},
+	{
+		.ident = "Chromebook Pixel - Touchpad",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Link"),
+		},
+		.callback = setup_atmel_224s_tp,
+	},
+	{
+		.ident = "Samsung Series 5 550 - Light Sensor",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Lumpy"),
+		},
+		.callback = setup_isl29018_als,
+	},
+	{
+		.ident = "Chromebook Pixel - Light Sensor",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Link"),
+		},
+		.callback = setup_isl29023_als,
+	},
+	{
+		.ident = "Acer C7 Chromebook - Touchpad",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Parrot"),
+		},
+		.callback = setup_cyapa_smbus_tp,
+	},
+	{
+		.ident = "HP Pavilion 14 Chromebook - Touchpad",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Butterfly"),
+		},
+		.callback = setup_cyapa_smbus_tp,
+	},
+	{
+		.ident = "Samsung Series 5 - Light Sensor",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Alex"),
+		},
+		.callback = setup_tsl2583_als,
+	},
+	{
+		.ident = "Cr-48 - Light Sensor",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Mario"),
+		},
+		.callback = setup_tsl2563_als,
+	},
+	{
+		.ident = "Acer AC700 - Light Sensor",
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "ZGB"),
+		},
+		.callback = setup_tsl2563_als,
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(dmi, chromeos_laptop_dmi_table);
+
+static int __init chromeos_laptop_init(void)
+{
+	if (!dmi_check_system(chromeos_laptop_dmi_table)) {
+		pr_debug("%s unsupported system.\n", __func__);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static void __exit chromeos_laptop_exit(void)
+{
+	if (als)
+		i2c_unregister_device(als);
+	if (tp)
+		i2c_unregister_device(tp);
+	if (ts)
+		i2c_unregister_device(ts);
+}
+
+module_init(chromeos_laptop_init);
+module_exit(chromeos_laptop_exit);
+
+MODULE_DESCRIPTION("Chrome OS Laptop driver");
+MODULE_AUTHOR("Benson Leung <bleung@chromium.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c
index 60cb76a5b513..af67e6e56ebb 100644
--- a/drivers/platform/x86/eeepc-wmi.c
+++ b/drivers/platform/x86/eeepc-wmi.c
@@ -63,6 +63,8 @@ MODULE_PARM_DESC(hotplug_wireless,
 #define HOME_RELEASE	0xe5
 
 static const struct key_entry eeepc_wmi_keymap[] = {
+	{ KE_KEY, ASUS_WMI_BRN_DOWN, { KEY_BRIGHTNESSDOWN } },
+	{ KE_KEY, ASUS_WMI_BRN_UP, { KEY_BRIGHTNESSUP } },
 	/* Sleep already handled via generic ACPI code */
 	{ KE_KEY, 0x30, { KEY_VOLUMEUP } },
 	{ KE_KEY, 0x31, { KEY_VOLUMEDOWN } },
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 1dde7accf27c..45cacf79f3a7 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -60,6 +60,7 @@ enum hp_wmi_radio {
 	HPWMI_WIFI = 0,
 	HPWMI_BLUETOOTH = 1,
 	HPWMI_WWAN = 2,
+	HPWMI_GPS = 3,
 };
 
 enum hp_wmi_event_ids {
@@ -72,10 +73,6 @@ enum hp_wmi_event_ids {
 	HPWMI_LOCK_SWITCH = 7,
 };
 
-static int hp_wmi_bios_setup(struct platform_device *device);
-static int __exit hp_wmi_bios_remove(struct platform_device *device);
-static int hp_wmi_resume_handler(struct device *device);
-
 struct bios_args {
 	u32 signature;
 	u32 command;
@@ -137,6 +134,7 @@ static const struct key_entry hp_wmi_keymap[] = {
 	{ KE_KEY, 0x2142, { KEY_MEDIA } },
 	{ KE_KEY, 0x213b, { KEY_INFO } },
 	{ KE_KEY, 0x2169, { KEY_DIRECTION } },
+	{ KE_KEY, 0x216a, { KEY_SETUP } },
 	{ KE_KEY, 0x231b, { KEY_HELP } },
 	{ KE_END, 0 }
 };
@@ -147,6 +145,7 @@ static struct platform_device *hp_wmi_platform_dev;
 static struct rfkill *wifi_rfkill;
 static struct rfkill *bluetooth_rfkill;
 static struct rfkill *wwan_rfkill;
+static struct rfkill *gps_rfkill;
 
 struct rfkill2_device {
 	u8 id;
@@ -157,21 +156,6 @@ struct rfkill2_device {
 static int rfkill2_count;
 static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES];
 
-static const struct dev_pm_ops hp_wmi_pm_ops = {
-	.resume  = hp_wmi_resume_handler,
-	.restore  = hp_wmi_resume_handler,
-};
-
-static struct platform_driver hp_wmi_driver = {
-	.driver = {
-		.name = "hp-wmi",
-		.owner = THIS_MODULE,
-		.pm = &hp_wmi_pm_ops,
-	},
-	.probe = hp_wmi_bios_setup,
-	.remove = hp_wmi_bios_remove,
-};
-
 /*
  * hp_wmi_perform_query
  *
@@ -543,6 +527,10 @@ static void hp_wmi_notify(u32 value, void *context)
 			rfkill_set_states(wwan_rfkill,
 					  hp_wmi_get_sw_state(HPWMI_WWAN),
 					  hp_wmi_get_hw_state(HPWMI_WWAN));
+		if (gps_rfkill)
+			rfkill_set_states(gps_rfkill,
+					  hp_wmi_get_sw_state(HPWMI_GPS),
+					  hp_wmi_get_hw_state(HPWMI_GPS));
 		break;
 	case HPWMI_CPU_BATTERY_THROTTLE:
 		pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n");
@@ -670,7 +658,7 @@ static int hp_wmi_rfkill_setup(struct platform_device *device)
 					   (void *) HPWMI_WWAN);
 		if (!wwan_rfkill) {
 			err = -ENOMEM;
-			goto register_bluetooth_error;
+			goto register_gps_error;
 		}
 		rfkill_init_sw_state(wwan_rfkill,
 				     hp_wmi_get_sw_state(HPWMI_WWAN));
@@ -681,10 +669,33 @@ static int hp_wmi_rfkill_setup(struct platform_device *device)
 			goto register_wwan_err;
 	}
 
+	if (wireless & 0x8) {
+		gps_rfkill = rfkill_alloc("hp-gps", &device->dev,
+						RFKILL_TYPE_GPS,
+						&hp_wmi_rfkill_ops,
+						(void *) HPWMI_GPS);
+		if (!gps_rfkill) {
+			err = -ENOMEM;
+			goto register_bluetooth_error;
+		}
+		rfkill_init_sw_state(gps_rfkill,
+				     hp_wmi_get_sw_state(HPWMI_GPS));
+		rfkill_set_hw_state(bluetooth_rfkill,
+				    hp_wmi_get_hw_state(HPWMI_GPS));
+		err = rfkill_register(gps_rfkill);
+		if (err)
+			goto register_gps_error;
+	}
+
 	return 0;
 register_wwan_err:
 	rfkill_destroy(wwan_rfkill);
 	wwan_rfkill = NULL;
+	if (gps_rfkill)
+		rfkill_unregister(gps_rfkill);
+register_gps_error:
+	rfkill_destroy(gps_rfkill);
+	gps_rfkill = NULL;
 	if (bluetooth_rfkill)
 		rfkill_unregister(bluetooth_rfkill);
 register_bluetooth_error:
@@ -729,6 +740,10 @@ static int hp_wmi_rfkill2_setup(struct platform_device *device)
 			type = RFKILL_TYPE_WWAN;
 			name = "hp-wwan";
 			break;
+		case HPWMI_GPS:
+			type = RFKILL_TYPE_GPS;
+			name = "hp-gps";
+			break;
 		default:
 			pr_warn("unknown device type 0x%x\n",
 				state.device[i].radio_type);
@@ -778,7 +793,7 @@ fail:
 	return err;
 }
 
-static int hp_wmi_bios_setup(struct platform_device *device)
+static int __init hp_wmi_bios_setup(struct platform_device *device)
 {
 	int err;
 
@@ -786,6 +801,7 @@ static int hp_wmi_bios_setup(struct platform_device *device)
 	wifi_rfkill = NULL;
 	bluetooth_rfkill = NULL;
 	wwan_rfkill = NULL;
+	gps_rfkill = NULL;
 	rfkill2_count = 0;
 
 	if (hp_wmi_rfkill_setup(device))
@@ -835,6 +851,10 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device)
 		rfkill_unregister(wwan_rfkill);
 		rfkill_destroy(wwan_rfkill);
 	}
+	if (gps_rfkill) {
+		rfkill_unregister(gps_rfkill);
+		rfkill_destroy(gps_rfkill);
+	}
 
 	return 0;
 }
@@ -870,51 +890,70 @@ static int hp_wmi_resume_handler(struct device *device)
 		rfkill_set_states(wwan_rfkill,
 				  hp_wmi_get_sw_state(HPWMI_WWAN),
 				  hp_wmi_get_hw_state(HPWMI_WWAN));
+	if (gps_rfkill)
+		rfkill_set_states(gps_rfkill,
+				  hp_wmi_get_sw_state(HPWMI_GPS),
+				  hp_wmi_get_hw_state(HPWMI_GPS));
 
 	return 0;
 }
 
+static const struct dev_pm_ops hp_wmi_pm_ops = {
+	.resume  = hp_wmi_resume_handler,
+	.restore  = hp_wmi_resume_handler,
+};
+
+static struct platform_driver hp_wmi_driver = {
+	.driver = {
+		.name = "hp-wmi",
+		.owner = THIS_MODULE,
+		.pm = &hp_wmi_pm_ops,
+	},
+	.remove = __exit_p(hp_wmi_bios_remove),
+};
+
 static int __init hp_wmi_init(void)
 {
 	int err;
 	int event_capable = wmi_has_guid(HPWMI_EVENT_GUID);
 	int bios_capable = wmi_has_guid(HPWMI_BIOS_GUID);
 
+	if (!bios_capable && !event_capable)
+		return -ENODEV;
+
 	if (event_capable) {
 		err = hp_wmi_input_setup();
 		if (err)
 			return err;
+		
+		//Enable magic for hotkeys that run on the SMBus
+		ec_write(0xe6,0x6e);
 	}
 
 	if (bios_capable) {
-		err = platform_driver_register(&hp_wmi_driver);
-		if (err)
-			goto err_driver_reg;
-		hp_wmi_platform_dev = platform_device_alloc("hp-wmi", -1);
-		if (!hp_wmi_platform_dev) {
-			err = -ENOMEM;
-			goto err_device_alloc;
+		hp_wmi_platform_dev =
+			platform_device_register_simple("hp-wmi", -1, NULL, 0);
+		if (IS_ERR(hp_wmi_platform_dev)) {
+			err = PTR_ERR(hp_wmi_platform_dev);
+			goto err_destroy_input;
 		}
-		err = platform_device_add(hp_wmi_platform_dev);
+
+		err = platform_driver_probe(&hp_wmi_driver, hp_wmi_bios_setup);
 		if (err)
-			goto err_device_add;
+			goto err_unregister_device;
 	}
 
-	if (!bios_capable && !event_capable)
-		return -ENODEV;
-
 	return 0;
 
-err_device_add:
-	platform_device_put(hp_wmi_platform_dev);
-err_device_alloc:
-	platform_driver_unregister(&hp_wmi_driver);
-err_driver_reg:
+err_unregister_device:
+	platform_device_unregister(hp_wmi_platform_dev);
+err_destroy_input:
 	if (event_capable)
 		hp_wmi_input_destroy();
 
 	return err;
 }
+module_init(hp_wmi_init);
 
 static void __exit hp_wmi_exit(void)
 {
@@ -926,6 +965,4 @@ static void __exit hp_wmi_exit(void)
 		platform_driver_unregister(&hp_wmi_driver);
 	}
 }
-
-module_init(hp_wmi_init);
 module_exit(hp_wmi_exit);
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index 2111dbb7e1e3..6b2293875672 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -82,8 +82,19 @@
 #define MSI_STANDARD_EC_SCM_LOAD_ADDRESS	0x2d
 #define MSI_STANDARD_EC_SCM_LOAD_MASK		(1 << 0)
 
-#define MSI_STANDARD_EC_TOUCHPAD_ADDRESS	0xe4
+#define MSI_STANDARD_EC_FUNCTIONS_ADDRESS	0xe4
+/* Power LED is orange - Turbo mode */
+#define MSI_STANDARD_EC_TURBO_MASK		(1 << 1)
+/* Power LED is green - ECO mode */
+#define MSI_STANDARD_EC_ECO_MASK		(1 << 3)
+/* Touchpad is turned on */
 #define MSI_STANDARD_EC_TOUCHPAD_MASK		(1 << 4)
+/* If this bit != bit 1, turbo mode can't be toggled */
+#define MSI_STANDARD_EC_TURBO_COOLDOWN_MASK	(1 << 7)
+
+#define MSI_STANDARD_EC_FAN_ADDRESS		0x33
+/* If zero, fan rotates at maximal speed */
+#define MSI_STANDARD_EC_AUTOFAN_MASK		(1 << 0)
 
 #ifdef CONFIG_PM_SLEEP
 static int msi_laptop_resume(struct device *device);
@@ -108,23 +119,38 @@ static const struct key_entry msi_laptop_keymap[] = {
 
 static struct input_dev *msi_laptop_input_dev;
 
-static bool old_ec_model;
 static int wlan_s, bluetooth_s, threeg_s;
 static int threeg_exists;
-
-/* Some MSI 3G netbook only have one fn key to control Wlan/Bluetooth/3G,
- * those netbook will load the SCM (windows app) to disable the original
- * Wlan/Bluetooth control by BIOS when user press fn key, then control
- * Wlan/Bluetooth/3G by SCM (software control by OS). Without SCM, user
- * cann't on/off 3G module on those 3G netbook.
- * On Linux, msi-laptop driver will do the same thing to disable the
- * original BIOS control, then might need use HAL or other userland
- * application to do the software control that simulate with SCM.
- * e.g. MSI N034 netbook
- */
-static bool load_scm_model;
 static struct rfkill *rfk_wlan, *rfk_bluetooth, *rfk_threeg;
 
+/* MSI laptop quirks */
+struct quirk_entry {
+	bool old_ec_model;
+
+	/* Some MSI 3G netbook only have one fn key to control
+	 * Wlan/Bluetooth/3G, those netbook will load the SCM (windows app) to
+	 * disable the original Wlan/Bluetooth control by BIOS when user press
+	 * fn key, then control Wlan/Bluetooth/3G by SCM (software control by
+	 * OS). Without SCM, user cann't on/off 3G module on those 3G netbook.
+	 * On Linux, msi-laptop driver will do the same thing to disable the
+	 * original BIOS control, then might need use HAL or other userland
+	 * application to do the software control that simulate with SCM.
+	 * e.g. MSI N034 netbook
+	 */
+	bool load_scm_model;
+
+	/* Some MSI laptops need delay before reading from EC */
+	bool ec_delay;
+
+	/* Some MSI Wind netbooks (e.g. MSI Wind U100) need loading SCM to get
+	 * some features working (e.g. ECO mode), but we cannot change
+	 * Wlan/Bluetooth state in software and we can only read its state.
+	 */
+	bool ec_read_only;
+};
+
+static struct quirk_entry *quirks;
+
 /* Hardware access */
 
 static int set_lcd_level(int level)
@@ -195,10 +221,13 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask)
 	if (sscanf(buf, "%i", &status) != 1 || (status < 0 || status > 1))
 		return -EINVAL;
 
+	if (quirks->ec_read_only)
+		return -EOPNOTSUPP;
+
 	/* read current device state */
 	result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata);
 	if (result < 0)
-		return -EINVAL;
+		return result;
 
 	if (!!(rdata & mask) != status) {
 		/* reverse device bit */
@@ -209,7 +238,7 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask)
 
 		result = ec_write(MSI_STANDARD_EC_COMMAND_ADDRESS, wdata);
 		if (result < 0)
-			return -EINVAL;
+			return result;
 	}
 
 	return count;
@@ -222,7 +251,7 @@ static int get_wireless_state(int *wlan, int *bluetooth)
 
 	result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1);
 	if (result < 0)
-		return -1;
+		return result;
 
 	if (wlan)
 		*wlan = !!(rdata & 8);
@@ -240,7 +269,7 @@ static int get_wireless_state_ec_standard(void)
 
 	result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata);
 	if (result < 0)
-		return -1;
+		return result;
 
 	wlan_s = !!(rdata & MSI_STANDARD_EC_WLAN_MASK);
 
@@ -258,7 +287,7 @@ static int get_threeg_exists(void)
 
 	result = ec_read(MSI_STANDARD_EC_DEVICES_EXISTS_ADDRESS, &rdata);
 	if (result < 0)
-		return -1;
+		return result;
 
 	threeg_exists = !!(rdata & MSI_STANDARD_EC_3G_MASK);
 
@@ -291,9 +320,9 @@ static ssize_t show_wlan(struct device *dev,
 	struct device_attribute *attr, char *buf)
 {
 
-	int ret, enabled;
+	int ret, enabled = 0;
 
-	if (old_ec_model) {
+	if (quirks->old_ec_model) {
 		ret = get_wireless_state(&enabled, NULL);
 	} else {
 		ret = get_wireless_state_ec_standard();
@@ -315,9 +344,9 @@ static ssize_t show_bluetooth(struct device *dev,
 	struct device_attribute *attr, char *buf)
 {
 
-	int ret, enabled;
+	int ret, enabled = 0;
 
-	if (old_ec_model) {
+	if (quirks->old_ec_model) {
 		ret = get_wireless_state(NULL, &enabled);
 	} else {
 		ret = get_wireless_state_ec_standard();
@@ -342,8 +371,8 @@ static ssize_t show_threeg(struct device *dev,
 	int ret;
 
 	/* old msi ec not support 3G */
-	if (old_ec_model)
-		return -1;
+	if (quirks->old_ec_model)
+		return -ENODEV;
 
 	ret = get_wireless_state_ec_standard();
 	if (ret < 0)
@@ -417,18 +446,119 @@ static ssize_t store_auto_brightness(struct device *dev,
 	return count;
 }
 
+static ssize_t show_touchpad(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+
+	u8 rdata;
+	int result;
+
+	result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata);
+	if (result < 0)
+		return result;
+
+	return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_TOUCHPAD_MASK));
+}
+
+static ssize_t show_turbo(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+
+	u8 rdata;
+	int result;
+
+	result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata);
+	if (result < 0)
+		return result;
+
+	return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_TURBO_MASK));
+}
+
+static ssize_t show_eco(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+
+	u8 rdata;
+	int result;
+
+	result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata);
+	if (result < 0)
+		return result;
+
+	return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_ECO_MASK));
+}
+
+static ssize_t show_turbo_cooldown(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+
+	u8 rdata;
+	int result;
+
+	result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata);
+	if (result < 0)
+		return result;
+
+	return sprintf(buf, "%i\n", (!!(rdata & MSI_STANDARD_EC_TURBO_MASK)) |
+		(!!(rdata & MSI_STANDARD_EC_TURBO_COOLDOWN_MASK) << 1));
+}
+
+static ssize_t show_auto_fan(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+
+	u8 rdata;
+	int result;
+
+	result = ec_read(MSI_STANDARD_EC_FAN_ADDRESS, &rdata);
+	if (result < 0)
+		return result;
+
+	return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_AUTOFAN_MASK));
+}
+
+static ssize_t store_auto_fan(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+
+	int enable, result;
+
+	if (sscanf(buf, "%i", &enable) != 1 || (enable != (enable & 1)))
+		return -EINVAL;
+
+	result = ec_write(MSI_STANDARD_EC_FAN_ADDRESS, enable);
+	if (result < 0)
+		return result;
+
+	return count;
+}
+
 static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level);
 static DEVICE_ATTR(auto_brightness, 0644, show_auto_brightness,
 		   store_auto_brightness);
 static DEVICE_ATTR(bluetooth, 0444, show_bluetooth, NULL);
 static DEVICE_ATTR(wlan, 0444, show_wlan, NULL);
 static DEVICE_ATTR(threeg, 0444, show_threeg, NULL);
+static DEVICE_ATTR(touchpad, 0444, show_touchpad, NULL);
+static DEVICE_ATTR(turbo_mode, 0444, show_turbo, NULL);
+static DEVICE_ATTR(eco_mode, 0444, show_eco, NULL);
+static DEVICE_ATTR(turbo_cooldown, 0444, show_turbo_cooldown, NULL);
+static DEVICE_ATTR(auto_fan, 0644, show_auto_fan, store_auto_fan);
 
 static struct attribute *msipf_attributes[] = {
-	&dev_attr_lcd_level.attr,
-	&dev_attr_auto_brightness.attr,
 	&dev_attr_bluetooth.attr,
 	&dev_attr_wlan.attr,
+	&dev_attr_touchpad.attr,
+	&dev_attr_turbo_mode.attr,
+	&dev_attr_eco_mode.attr,
+	&dev_attr_turbo_cooldown.attr,
+	&dev_attr_auto_fan.attr,
+	NULL
+};
+
+static struct attribute *msipf_old_attributes[] = {
+	&dev_attr_lcd_level.attr,
+	&dev_attr_auto_brightness.attr,
 	NULL
 };
 
@@ -436,6 +566,10 @@ static struct attribute_group msipf_attribute_group = {
 	.attrs = msipf_attributes
 };
 
+static struct attribute_group msipf_old_attribute_group = {
+	.attrs = msipf_old_attributes
+};
+
 static struct platform_driver msipf_driver = {
 	.driver = {
 		.name = "msi-laptop-pf",
@@ -448,9 +582,26 @@ static struct platform_device *msipf_device;
 
 /* Initialization */
 
-static int dmi_check_cb(const struct dmi_system_id *id)
+static struct quirk_entry quirk_old_ec_model = {
+	.old_ec_model = true,
+};
+
+static struct quirk_entry quirk_load_scm_model = {
+	.load_scm_model = true,
+	.ec_delay = true,
+};
+
+static struct quirk_entry quirk_load_scm_ro_model = {
+	.load_scm_model = true,
+	.ec_read_only = true,
+};
+
+static int dmi_check_cb(const struct dmi_system_id *dmi)
 {
-	pr_info("Identified laptop model '%s'\n", id->ident);
+	pr_info("Identified laptop model '%s'\n", dmi->ident);
+
+	quirks = dmi->driver_data;
+
 	return 1;
 }
 
@@ -464,6 +615,7 @@ static struct dmi_system_id __initdata msi_dmi_table[] = {
 			DMI_MATCH(DMI_CHASSIS_VENDOR,
 				  "MICRO-STAR INT'L CO.,LTD")
 		},
+		.driver_data = &quirk_old_ec_model,
 		.callback = dmi_check_cb
 	},
 	{
@@ -474,6 +626,7 @@ static struct dmi_system_id __initdata msi_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "0581"),
 			DMI_MATCH(DMI_BOARD_NAME, "MS-1058")
 		},
+		.driver_data = &quirk_old_ec_model,
 		.callback = dmi_check_cb
 	},
 	{
@@ -484,6 +637,7 @@ static struct dmi_system_id __initdata msi_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "MSI"),
 			DMI_MATCH(DMI_BOARD_NAME, "MS-1412")
 		},
+		.driver_data = &quirk_old_ec_model,
 		.callback = dmi_check_cb
 	},
 	{
@@ -495,12 +649,9 @@ static struct dmi_system_id __initdata msi_dmi_table[] = {
 			DMI_MATCH(DMI_CHASSIS_VENDOR,
 				  "MICRO-STAR INT'L CO.,LTD")
 		},
+		.driver_data = &quirk_old_ec_model,
 		.callback = dmi_check_cb
 	},
-	{ }
-};
-
-static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = {
 	{
 		.ident = "MSI N034",
 		.matches = {
@@ -510,6 +661,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = {
 			DMI_MATCH(DMI_CHASSIS_VENDOR,
 			"MICRO-STAR INTERNATIONAL CO., LTD")
 		},
+		.driver_data = &quirk_load_scm_model,
 		.callback = dmi_check_cb
 	},
 	{
@@ -521,6 +673,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = {
 			DMI_MATCH(DMI_CHASSIS_VENDOR,
 			"MICRO-STAR INTERNATIONAL CO., LTD")
 		},
+		.driver_data = &quirk_load_scm_model,
 		.callback = dmi_check_cb
 	},
 	{
@@ -530,6 +683,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = {
 				"MICRO-STAR INTERNATIONAL CO., LTD"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "MS-N014"),
 		},
+		.driver_data = &quirk_load_scm_model,
 		.callback = dmi_check_cb
 	},
 	{
@@ -539,6 +693,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = {
 				"Micro-Star International"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "CR620"),
 		},
+		.driver_data = &quirk_load_scm_model,
 		.callback = dmi_check_cb
 	},
 	{
@@ -548,6 +703,17 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = {
 				"Micro-Star International Co., Ltd."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "U270 series"),
 		},
+		.driver_data = &quirk_load_scm_model,
+		.callback = dmi_check_cb
+	},
+	{
+		.ident = "MSI U90/U100",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR,
+				"MICRO-STAR INTERNATIONAL CO., LTD"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "U90/U100"),
+		},
+		.driver_data = &quirk_load_scm_ro_model,
 		.callback = dmi_check_cb
 	},
 	{ }
@@ -560,32 +726,26 @@ static int rfkill_bluetooth_set(void *data, bool blocked)
 	 * blocked == false is on
 	 * blocked == true is off
 	 */
-	if (blocked)
-		set_device_state("0", 0, MSI_STANDARD_EC_BLUETOOTH_MASK);
-	else
-		set_device_state("1", 0, MSI_STANDARD_EC_BLUETOOTH_MASK);
+	int result = set_device_state(blocked ? "0" : "1", 0,
+			MSI_STANDARD_EC_BLUETOOTH_MASK);
 
-	return 0;
+	return min(result, 0);
 }
 
 static int rfkill_wlan_set(void *data, bool blocked)
 {
-	if (blocked)
-		set_device_state("0", 0, MSI_STANDARD_EC_WLAN_MASK);
-	else
-		set_device_state("1", 0, MSI_STANDARD_EC_WLAN_MASK);
+	int result = set_device_state(blocked ? "0" : "1", 0,
+			MSI_STANDARD_EC_WLAN_MASK);
 
-	return 0;
+	return min(result, 0);
 }
 
 static int rfkill_threeg_set(void *data, bool blocked)
 {
-	if (blocked)
-		set_device_state("0", 0, MSI_STANDARD_EC_3G_MASK);
-	else
-		set_device_state("1", 0, MSI_STANDARD_EC_3G_MASK);
+	int result = set_device_state(blocked ? "0" : "1", 0,
+			MSI_STANDARD_EC_3G_MASK);
 
-	return 0;
+	return min(result, 0);
 }
 
 static const struct rfkill_ops rfkill_bluetooth_ops = {
@@ -618,25 +778,34 @@ static void rfkill_cleanup(void)
 	}
 }
 
+static bool msi_rfkill_set_state(struct rfkill *rfkill, bool blocked)
+{
+	if (quirks->ec_read_only)
+		return rfkill_set_hw_state(rfkill, blocked);
+	else
+		return rfkill_set_sw_state(rfkill, blocked);
+}
+
 static void msi_update_rfkill(struct work_struct *ignored)
 {
 	get_wireless_state_ec_standard();
 
 	if (rfk_wlan)
-		rfkill_set_sw_state(rfk_wlan, !wlan_s);
+		msi_rfkill_set_state(rfk_wlan, !wlan_s);
 	if (rfk_bluetooth)
-		rfkill_set_sw_state(rfk_bluetooth, !bluetooth_s);
+		msi_rfkill_set_state(rfk_bluetooth, !bluetooth_s);
 	if (rfk_threeg)
-		rfkill_set_sw_state(rfk_threeg, !threeg_s);
+		msi_rfkill_set_state(rfk_threeg, !threeg_s);
 }
-static DECLARE_DELAYED_WORK(msi_rfkill_work, msi_update_rfkill);
+static DECLARE_DELAYED_WORK(msi_rfkill_dwork, msi_update_rfkill);
+static DECLARE_WORK(msi_rfkill_work, msi_update_rfkill);
 
 static void msi_send_touchpad_key(struct work_struct *ignored)
 {
 	u8 rdata;
 	int result;
 
-	result = ec_read(MSI_STANDARD_EC_TOUCHPAD_ADDRESS, &rdata);
+	result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata);
 	if (result < 0)
 		return;
 
@@ -644,7 +813,8 @@ static void msi_send_touchpad_key(struct work_struct *ignored)
 		(rdata & MSI_STANDARD_EC_TOUCHPAD_MASK) ?
 		KEY_TOUCHPAD_ON : KEY_TOUCHPAD_OFF, 1, true);
 }
-static DECLARE_DELAYED_WORK(msi_touchpad_work, msi_send_touchpad_key);
+static DECLARE_DELAYED_WORK(msi_touchpad_dwork, msi_send_touchpad_key);
+static DECLARE_WORK(msi_touchpad_work, msi_send_touchpad_key);
 
 static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str,
 				struct serio *port)
@@ -662,14 +832,20 @@ static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str,
 		extended = false;
 		switch (data) {
 		case 0xE4:
-			schedule_delayed_work(&msi_touchpad_work,
-				round_jiffies_relative(0.5 * HZ));
+			if (quirks->ec_delay) {
+				schedule_delayed_work(&msi_touchpad_dwork,
+					round_jiffies_relative(0.5 * HZ));
+			} else
+				schedule_work(&msi_touchpad_work);
 			break;
 		case 0x54:
 		case 0x62:
 		case 0x76:
-			schedule_delayed_work(&msi_rfkill_work,
-				round_jiffies_relative(0.5 * HZ));
+			if (quirks->ec_delay) {
+				schedule_delayed_work(&msi_rfkill_dwork,
+					round_jiffies_relative(0.5 * HZ));
+			} else
+				schedule_work(&msi_rfkill_work);
 			break;
 		}
 	}
@@ -736,8 +912,11 @@ static int rfkill_init(struct platform_device *sdev)
 	}
 
 	/* schedule to run rfkill state initial */
-	schedule_delayed_work(&msi_rfkill_init,
-				round_jiffies_relative(1 * HZ));
+	if (quirks->ec_delay) {
+		schedule_delayed_work(&msi_rfkill_init,
+			round_jiffies_relative(1 * HZ));
+	} else
+		schedule_work(&msi_rfkill_work);
 
 	return 0;
 
@@ -761,7 +940,7 @@ static int msi_laptop_resume(struct device *device)
 	u8 data;
 	int result;
 
-	if (!load_scm_model)
+	if (!quirks->load_scm_model)
 		return 0;
 
 	/* set load SCM to disable hardware control by fn key */
@@ -819,13 +998,15 @@ static int __init load_scm_model_init(struct platform_device *sdev)
 	u8 data;
 	int result;
 
-	/* allow userland write sysfs file  */
-	dev_attr_bluetooth.store = store_bluetooth;
-	dev_attr_wlan.store = store_wlan;
-	dev_attr_threeg.store = store_threeg;
-	dev_attr_bluetooth.attr.mode |= S_IWUSR;
-	dev_attr_wlan.attr.mode |= S_IWUSR;
-	dev_attr_threeg.attr.mode |= S_IWUSR;
+	if (!quirks->ec_read_only) {
+		/* allow userland write sysfs file  */
+		dev_attr_bluetooth.store = store_bluetooth;
+		dev_attr_wlan.store = store_wlan;
+		dev_attr_threeg.store = store_threeg;
+		dev_attr_bluetooth.attr.mode |= S_IWUSR;
+		dev_attr_wlan.attr.mode |= S_IWUSR;
+		dev_attr_threeg.attr.mode |= S_IWUSR;
+	}
 
 	/* disable hardware control by fn key */
 	result = ec_read(MSI_STANDARD_EC_SCM_LOAD_ADDRESS, &data);
@@ -874,21 +1055,22 @@ static int __init msi_init(void)
 	if (acpi_disabled)
 		return -ENODEV;
 
-	if (force || dmi_check_system(msi_dmi_table))
-		old_ec_model = 1;
+	dmi_check_system(msi_dmi_table);
+	if (!quirks)
+		/* quirks may be NULL if no match in DMI table */
+		quirks = &quirk_load_scm_model;
+	if (force)
+		quirks = &quirk_old_ec_model;
 
-	if (!old_ec_model)
+	if (!quirks->old_ec_model)
 		get_threeg_exists();
 
-	if (!old_ec_model && dmi_check_system(msi_load_scm_models_dmi_table))
-		load_scm_model = 1;
-
 	if (auto_brightness < 0 || auto_brightness > 2)
 		return -EINVAL;
 
 	/* Register backlight stuff */
 
-	if (acpi_video_backlight_support()) {
+	if (!quirks->old_ec_model || acpi_video_backlight_support()) {
 		pr_info("Brightness ignored, must be controlled by ACPI video driver\n");
 	} else {
 		struct backlight_properties props;
@@ -918,7 +1100,7 @@ static int __init msi_init(void)
 	if (ret)
 		goto fail_platform_device1;
 
-	if (load_scm_model && (load_scm_model_init(msipf_device) < 0)) {
+	if (quirks->load_scm_model && (load_scm_model_init(msipf_device) < 0)) {
 		ret = -EINVAL;
 		goto fail_platform_device1;
 	}
@@ -928,20 +1110,25 @@ static int __init msi_init(void)
 	if (ret)
 		goto fail_platform_device2;
 
-	if (!old_ec_model) {
+	if (!quirks->old_ec_model) {
 		if (threeg_exists)
 			ret = device_create_file(&msipf_device->dev,
 						&dev_attr_threeg);
 		if (ret)
 			goto fail_platform_device2;
-	}
+	} else {
+		ret = sysfs_create_group(&msipf_device->dev.kobj,
+					 &msipf_old_attribute_group);
+		if (ret)
+			goto fail_platform_device2;
 
-	/* Disable automatic brightness control by default because
-	 * this module was probably loaded to do brightness control in
-	 * software. */
+		/* Disable automatic brightness control by default because
+		 * this module was probably loaded to do brightness control in
+		 * software. */
 
-	if (auto_brightness != 2)
-		set_auto_brightness(auto_brightness);
+		if (auto_brightness != 2)
+			set_auto_brightness(auto_brightness);
+	}
 
 	pr_info("driver " MSI_DRIVER_VERSION " successfully loaded\n");
 
@@ -949,9 +1136,10 @@ static int __init msi_init(void)
 
 fail_platform_device2:
 
-	if (load_scm_model) {
+	if (quirks->load_scm_model) {
 		i8042_remove_filter(msi_laptop_i8042_filter);
-		cancel_delayed_work_sync(&msi_rfkill_work);
+		cancel_delayed_work_sync(&msi_rfkill_dwork);
+		cancel_work_sync(&msi_rfkill_work);
 		rfkill_cleanup();
 	}
 	platform_device_del(msipf_device);
@@ -973,23 +1161,26 @@ fail_backlight:
 
 static void __exit msi_cleanup(void)
 {
-	if (load_scm_model) {
+	if (quirks->load_scm_model) {
 		i8042_remove_filter(msi_laptop_i8042_filter);
 		msi_laptop_input_destroy();
-		cancel_delayed_work_sync(&msi_rfkill_work);
+		cancel_delayed_work_sync(&msi_rfkill_dwork);
+		cancel_work_sync(&msi_rfkill_work);
 		rfkill_cleanup();
 	}
 
 	sysfs_remove_group(&msipf_device->dev.kobj, &msipf_attribute_group);
-	if (!old_ec_model && threeg_exists)
+	if (!quirks->old_ec_model && threeg_exists)
 		device_remove_file(&msipf_device->dev, &dev_attr_threeg);
 	platform_device_unregister(msipf_device);
 	platform_driver_unregister(&msipf_driver);
 	backlight_device_unregister(msibl_device);
 
-	/* Enable automatic brightness control again */
-	if (auto_brightness != 2)
-		set_auto_brightness(1);
+	if (quirks->old_ec_model) {
+		/* Enable automatic brightness control again */
+		if (auto_brightness != 2)
+			set_auto_brightness(1);
+	}
 
 	pr_info("driver unloaded\n");
 }
@@ -1011,3 +1202,4 @@ MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N051:*");
 MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N014:*");
 MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnCR620:*");
 MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnU270series:*");
+MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnU90/U100:*");
diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index 2264331bd48e..70222f265f68 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c
@@ -34,29 +34,65 @@ MODULE_AUTHOR("Thomas Renninger <trenn@suse.de>");
 MODULE_DESCRIPTION("MSI laptop WMI hotkeys driver");
 MODULE_LICENSE("GPL");
 
-MODULE_ALIAS("wmi:551A1F84-FBDD-4125-91DB-3EA8F44F1D45");
-MODULE_ALIAS("wmi:B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2");
-
 #define DRV_NAME "msi-wmi"
 
 #define MSIWMI_BIOS_GUID "551A1F84-FBDD-4125-91DB-3EA8F44F1D45"
-#define MSIWMI_EVENT_GUID "B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2"
-
-#define SCANCODE_BASE 0xD0
-#define MSI_WMI_BRIGHTNESSUP   SCANCODE_BASE
-#define MSI_WMI_BRIGHTNESSDOWN (SCANCODE_BASE + 1)
-#define MSI_WMI_VOLUMEUP       (SCANCODE_BASE + 2)
-#define MSI_WMI_VOLUMEDOWN     (SCANCODE_BASE + 3)
-#define MSI_WMI_MUTE           (SCANCODE_BASE + 4)
+#define MSIWMI_MSI_EVENT_GUID "B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2"
+#define MSIWMI_WIND_EVENT_GUID "5B3CC38A-40D9-7245-8AE6-1145B751BE3F"
+
+MODULE_ALIAS("wmi:" MSIWMI_BIOS_GUID);
+MODULE_ALIAS("wmi:" MSIWMI_MSI_EVENT_GUID);
+MODULE_ALIAS("wmi:" MSIWMI_WIND_EVENT_GUID);
+
+enum msi_scancodes {
+	/* Generic MSI keys (not present on MSI Wind) */
+	MSI_KEY_BRIGHTNESSUP	= 0xD0,
+	MSI_KEY_BRIGHTNESSDOWN,
+	MSI_KEY_VOLUMEUP,
+	MSI_KEY_VOLUMEDOWN,
+	MSI_KEY_MUTE,
+	/* MSI Wind keys */
+	WIND_KEY_TOUCHPAD	= 0x08,	/* Fn+F3 touchpad toggle */
+	WIND_KEY_BLUETOOTH	= 0x56,	/* Fn+F11 Bluetooth toggle */
+	WIND_KEY_CAMERA,		/* Fn+F6 webcam toggle */
+	WIND_KEY_WLAN		= 0x5f,	/* Fn+F11 Wi-Fi toggle */
+	WIND_KEY_TURBO,			/* Fn+F10 turbo mode toggle */
+	WIND_KEY_ECO		= 0x69,	/* Fn+F10 ECO mode toggle */
+};
 static struct key_entry msi_wmi_keymap[] = {
-	{ KE_KEY, MSI_WMI_BRIGHTNESSUP,   {KEY_BRIGHTNESSUP} },
-	{ KE_KEY, MSI_WMI_BRIGHTNESSDOWN, {KEY_BRIGHTNESSDOWN} },
-	{ KE_KEY, MSI_WMI_VOLUMEUP,       {KEY_VOLUMEUP} },
-	{ KE_KEY, MSI_WMI_VOLUMEDOWN,     {KEY_VOLUMEDOWN} },
-	{ KE_KEY, MSI_WMI_MUTE,           {KEY_MUTE} },
-	{ KE_END, 0}
+	{ KE_KEY, MSI_KEY_BRIGHTNESSUP,		{KEY_BRIGHTNESSUP} },
+	{ KE_KEY, MSI_KEY_BRIGHTNESSDOWN,	{KEY_BRIGHTNESSDOWN} },
+	{ KE_KEY, MSI_KEY_VOLUMEUP,		{KEY_VOLUMEUP} },
+	{ KE_KEY, MSI_KEY_VOLUMEDOWN,		{KEY_VOLUMEDOWN} },
+	{ KE_KEY, MSI_KEY_MUTE,			{KEY_MUTE} },
+
+	/* These keys work without WMI. Ignore them to avoid double keycodes */
+	{ KE_IGNORE, WIND_KEY_TOUCHPAD,		{KEY_TOUCHPAD_TOGGLE} },
+	{ KE_IGNORE, WIND_KEY_BLUETOOTH,	{KEY_BLUETOOTH} },
+	{ KE_IGNORE, WIND_KEY_CAMERA,		{KEY_CAMERA} },
+	{ KE_IGNORE, WIND_KEY_WLAN,		{KEY_WLAN} },
+
+	/* These are unknown WMI events found on MSI Wind */
+	{ KE_IGNORE, 0x00 },
+	{ KE_IGNORE, 0x62 },
+	{ KE_IGNORE, 0x63 },
+
+	/* These are MSI Wind keys that should be handled via WMI */
+	{ KE_KEY, WIND_KEY_TURBO,		{KEY_PROG1} },
+	{ KE_KEY, WIND_KEY_ECO,			{KEY_PROG2} },
+
+	{ KE_END, 0 }
+};
+
+static ktime_t last_pressed;
+
+static const struct {
+	const char *guid;
+	bool quirk_last_pressed;
+} *event_wmi, event_wmis[] = {
+	{ MSIWMI_MSI_EVENT_GUID, true },
+	{ MSIWMI_WIND_EVENT_GUID, false },
 };
-static ktime_t last_pressed[ARRAY_SIZE(msi_wmi_keymap) - 1];
 
 static struct backlight_device *backlight;
 
@@ -149,7 +185,6 @@ static void msi_wmi_notify(u32 value, void *context)
 	struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
 	static struct key_entry *key;
 	union acpi_object *obj;
-	ktime_t cur;
 	acpi_status status;
 
 	status = wmi_get_event_data(value, &response);
@@ -165,39 +200,67 @@ static void msi_wmi_notify(u32 value, void *context)
 		pr_debug("Eventcode: 0x%x\n", eventcode);
 		key = sparse_keymap_entry_from_scancode(msi_wmi_input_dev,
 				eventcode);
-		if (key) {
-			ktime_t diff;
-			cur = ktime_get_real();
-			diff = ktime_sub(cur, last_pressed[key->code -
-					SCANCODE_BASE]);
-			/* Ignore event if the same event happened in a 50 ms
+		if (!key) {
+			pr_info("Unknown key pressed - %x\n", eventcode);
+			goto msi_wmi_notify_exit;
+		}
+
+		if (event_wmi->quirk_last_pressed) {
+			ktime_t cur = ktime_get_real();
+			ktime_t diff = ktime_sub(cur, last_pressed);
+			/* Ignore event if any event happened in a 50 ms
 			   timeframe -> Key press may result in 10-20 GPEs */
 			if (ktime_to_us(diff) < 1000 * 50) {
 				pr_debug("Suppressed key event 0x%X - "
 					 "Last press was %lld us ago\n",
 					 key->code, ktime_to_us(diff));
-				return;
-			}
-			last_pressed[key->code - SCANCODE_BASE] = cur;
-
-			if (key->type == KE_KEY &&
-			/* Brightness is served via acpi video driver */
-			(!acpi_video_backlight_support() ||
-			(key->code != MSI_WMI_BRIGHTNESSUP &&
-			key->code != MSI_WMI_BRIGHTNESSDOWN))) {
-				pr_debug("Send key: 0x%X - "
-					 "Input layer keycode: %d\n",
-					 key->code, key->keycode);
-				sparse_keymap_report_entry(msi_wmi_input_dev,
-						key, 1, true);
+				goto msi_wmi_notify_exit;
 			}
-		} else
-			pr_info("Unknown key pressed - %x\n", eventcode);
+			last_pressed = cur;
+		}
+
+		if (key->type == KE_KEY &&
+		/* Brightness is served via acpi video driver */
+		(backlight ||
+		(key->code != MSI_KEY_BRIGHTNESSUP &&
+		key->code != MSI_KEY_BRIGHTNESSDOWN))) {
+			pr_debug("Send key: 0x%X - Input layer keycode: %d\n",
+				 key->code, key->keycode);
+			sparse_keymap_report_entry(msi_wmi_input_dev, key, 1,
+						   true);
+		}
 	} else
 		pr_info("Unknown event received\n");
+
+msi_wmi_notify_exit:
 	kfree(response.pointer);
 }
 
+static int __init msi_wmi_backlight_setup(void)
+{
+	int err;
+	struct backlight_properties props;
+
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
+	props.max_brightness = ARRAY_SIZE(backlight_map) - 1;
+	backlight = backlight_device_register(DRV_NAME, NULL, NULL,
+					      &msi_backlight_ops,
+					      &props);
+	if (IS_ERR(backlight))
+		return PTR_ERR(backlight);
+
+	err = bl_get(NULL);
+	if (err < 0) {
+		backlight_device_unregister(backlight);
+		return err;
+	}
+
+	backlight->props.brightness = err;
+
+	return 0;
+}
+
 static int __init msi_wmi_input_setup(void)
 {
 	int err;
@@ -219,7 +282,7 @@ static int __init msi_wmi_input_setup(void)
 	if (err)
 		goto err_free_keymap;
 
-	memset(last_pressed, 0, sizeof(last_pressed));
+	last_pressed = ktime_set(0, 0);
 
 	return 0;
 
@@ -233,61 +296,66 @@ err_free_dev:
 static int __init msi_wmi_init(void)
 {
 	int err;
+	int i;
 
-	if (!wmi_has_guid(MSIWMI_EVENT_GUID)) {
-		pr_err("This machine doesn't have MSI-hotkeys through WMI\n");
-		return -ENODEV;
-	}
-	err = wmi_install_notify_handler(MSIWMI_EVENT_GUID,
-			msi_wmi_notify, NULL);
-	if (ACPI_FAILURE(err))
-		return -EINVAL;
+	for (i = 0; i < ARRAY_SIZE(event_wmis); i++) {
+		if (!wmi_has_guid(event_wmis[i].guid))
+			continue;
 
-	err = msi_wmi_input_setup();
-	if (err)
-		goto err_uninstall_notifier;
-
-	if (!acpi_video_backlight_support()) {
-		struct backlight_properties props;
-		memset(&props, 0, sizeof(struct backlight_properties));
-		props.type = BACKLIGHT_PLATFORM;
-		props.max_brightness = ARRAY_SIZE(backlight_map) - 1;
-		backlight = backlight_device_register(DRV_NAME, NULL, NULL,
-						      &msi_backlight_ops,
-						      &props);
-		if (IS_ERR(backlight)) {
-			err = PTR_ERR(backlight);
+		err = msi_wmi_input_setup();
+		if (err) {
+			pr_err("Unable to setup input device\n");
+			return err;
+		}
+
+		err = wmi_install_notify_handler(event_wmis[i].guid,
+			msi_wmi_notify, NULL);
+		if (ACPI_FAILURE(err)) {
+			pr_err("Unable to setup WMI notify handler\n");
 			goto err_free_input;
 		}
 
-		err = bl_get(NULL);
-		if (err < 0)
-			goto err_free_backlight;
+		pr_debug("Event handler installed\n");
+		event_wmi = &event_wmis[i];
+		break;
+	}
 
-		backlight->props.brightness = err;
+	if (wmi_has_guid(MSIWMI_BIOS_GUID) && !acpi_video_backlight_support()) {
+		err = msi_wmi_backlight_setup();
+		if (err) {
+			pr_err("Unable to setup backlight device\n");
+			goto err_uninstall_handler;
+		}
+		pr_debug("Backlight device created\n");
+	}
+
+	if (!event_wmi && !backlight) {
+		pr_err("This machine doesn't have neither MSI-hotkeys nor backlight through WMI\n");
+		return -ENODEV;
 	}
-	pr_debug("Event handler installed\n");
 
 	return 0;
 
-err_free_backlight:
-	backlight_device_unregister(backlight);
+err_uninstall_handler:
+	if (event_wmi)
+		wmi_remove_notify_handler(event_wmi->guid);
 err_free_input:
-	sparse_keymap_free(msi_wmi_input_dev);
-	input_unregister_device(msi_wmi_input_dev);
-err_uninstall_notifier:
-	wmi_remove_notify_handler(MSIWMI_EVENT_GUID);
+	if (event_wmi) {
+		sparse_keymap_free(msi_wmi_input_dev);
+		input_unregister_device(msi_wmi_input_dev);
+	}
 	return err;
 }
 
 static void __exit msi_wmi_exit(void)
 {
-	if (wmi_has_guid(MSIWMI_EVENT_GUID)) {
-		wmi_remove_notify_handler(MSIWMI_EVENT_GUID);
+	if (event_wmi) {
+		wmi_remove_notify_handler(event_wmi->guid);
 		sparse_keymap_free(msi_wmi_input_dev);
 		input_unregister_device(msi_wmi_input_dev);
-		backlight_device_unregister(backlight);
 	}
+	if (backlight)
+		backlight_device_unregister(backlight);
 }
 
 module_init(msi_wmi_init);
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 8da21876a794..14d4dced1def 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -158,6 +158,11 @@ static void sony_nc_thermal_cleanup(struct platform_device *pd);
 static int sony_nc_lid_resume_setup(struct platform_device *pd);
 static void sony_nc_lid_resume_cleanup(struct platform_device *pd);
 
+static int sony_nc_gfx_switch_setup(struct platform_device *pd,
+		unsigned int handle);
+static void sony_nc_gfx_switch_cleanup(struct platform_device *pd);
+static int __sony_nc_gfx_switch_status_get(void);
+
 static int sony_nc_highspeed_charging_setup(struct platform_device *pd);
 static void sony_nc_highspeed_charging_cleanup(struct platform_device *pd);
 
@@ -1241,17 +1246,13 @@ static void sony_nc_notify(struct acpi_device *device, u32 event)
 			/* Hybrid GFX switching */
 			sony_call_snc_handle(handle, 0x0000, &result);
 			dprintk("GFX switch event received (reason: %s)\n",
-					(result & 0x01) ?
-					"switch change" : "unknown");
-
-			/* verify the switch state
-			 * 1: discrete GFX
-			 * 0: integrated GFX
-			 */
-			sony_call_snc_handle(handle, 0x0100, &result);
+					(result == 0x1) ? "switch change" :
+					(result == 0x2) ? "output switch" :
+					(result == 0x3) ? "output switch" :
+					"");
 
 			ev_type = GFX_SWITCH;
-			real_ev = result & 0xff;
+			real_ev = __sony_nc_gfx_switch_status_get();
 			break;
 
 		default:
@@ -1350,6 +1351,13 @@ static void sony_nc_function_setup(struct acpi_device *device,
 				pr_err("couldn't set up thermal profile function (%d)\n",
 						result);
 			break;
+		case 0x0128:
+		case 0x0146:
+			result = sony_nc_gfx_switch_setup(pf_device, handle);
+			if (result)
+				pr_err("couldn't set up GFX Switch status (%d)\n",
+						result);
+			break;
 		case 0x0131:
 			result = sony_nc_highspeed_charging_setup(pf_device);
 			if (result)
@@ -1365,6 +1373,8 @@ static void sony_nc_function_setup(struct acpi_device *device,
 			break;
 		case 0x0137:
 		case 0x0143:
+		case 0x014b:
+		case 0x014c:
 			result = sony_nc_kbd_backlight_setup(pf_device, handle);
 			if (result)
 				pr_err("couldn't set up keyboard backlight function (%d)\n",
@@ -1414,6 +1424,10 @@ static void sony_nc_function_cleanup(struct platform_device *pd)
 		case 0x0122:
 			sony_nc_thermal_cleanup(pd);
 			break;
+		case 0x0128:
+		case 0x0146:
+			sony_nc_gfx_switch_cleanup(pd);
+			break;
 		case 0x0131:
 			sony_nc_highspeed_charging_cleanup(pd);
 			break;
@@ -1423,6 +1437,8 @@ static void sony_nc_function_cleanup(struct platform_device *pd)
 			break;
 		case 0x0137:
 		case 0x0143:
+		case 0x014b:
+		case 0x014c:
 			sony_nc_kbd_backlight_cleanup(pd);
 			break;
 		default:
@@ -1467,6 +1483,8 @@ static void sony_nc_function_resume(void)
 			break;
 		case 0x0137:
 		case 0x0143:
+		case 0x014b:
+		case 0x014c:
 			sony_nc_kbd_backlight_resume();
 			break;
 		default:
@@ -1534,7 +1552,7 @@ static int sony_nc_rfkill_set(void *data, bool blocked)
 	int argument = sony_rfkill_address[(long) data] + 0x100;
 
 	if (!blocked)
-		argument |= 0x030000;
+		argument |= 0x070000;
 
 	return sony_call_snc_handle(sony_rfkill_handle, argument, &result);
 }
@@ -2333,7 +2351,7 @@ static int sony_nc_lid_resume_setup(struct platform_device *pd)
 	return 0;
 
 liderror:
-	for (; i > 0; i--)
+	for (i--; i >= 0; i--)
 		device_remove_file(&pd->dev, &lid_ctl->attrs[i]);
 
 	kfree(lid_ctl);
@@ -2355,6 +2373,97 @@ static void sony_nc_lid_resume_cleanup(struct platform_device *pd)
 	}
 }
 
+/* GFX Switch position */
+enum gfx_switch {
+	SPEED,
+	STAMINA,
+	AUTO
+};
+struct snc_gfx_switch_control {
+	struct device_attribute attr;
+	unsigned int handle;
+};
+static struct snc_gfx_switch_control *gfxs_ctl;
+
+/* returns 0 for speed, 1 for stamina */
+static int __sony_nc_gfx_switch_status_get(void)
+{
+	unsigned int result;
+
+	if (sony_call_snc_handle(gfxs_ctl->handle, 0x0100, &result))
+		return -EIO;
+
+	switch (gfxs_ctl->handle) {
+	case 0x0146:
+		/* 1: discrete GFX (speed)
+		 * 0: integrated GFX (stamina)
+		 */
+		return result & 0x1 ? SPEED : STAMINA;
+		break;
+	case 0x0128:
+		/* it's a more elaborated bitmask, for now:
+		 * 2: integrated GFX (stamina)
+		 * 0: discrete GFX (speed)
+		 */
+		dprintk("GFX Status: 0x%x\n", result);
+		return result & 0x80 ? AUTO :
+			result & 0x02 ? STAMINA : SPEED;
+		break;
+	}
+	return -EINVAL;
+}
+
+static ssize_t sony_nc_gfx_switch_status_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buffer)
+{
+	int pos = __sony_nc_gfx_switch_status_get();
+
+	if (pos < 0)
+		return pos;
+
+	return snprintf(buffer, PAGE_SIZE, "%s\n", pos ? "speed" : "stamina");
+}
+
+static int sony_nc_gfx_switch_setup(struct platform_device *pd,
+		unsigned int handle)
+{
+	unsigned int result;
+
+	gfxs_ctl = kzalloc(sizeof(struct snc_gfx_switch_control), GFP_KERNEL);
+	if (!gfxs_ctl)
+		return -ENOMEM;
+
+	gfxs_ctl->handle = handle;
+
+	sysfs_attr_init(&gfxs_ctl->attr.attr);
+	gfxs_ctl->attr.attr.name = "gfx_switch_status";
+	gfxs_ctl->attr.attr.mode = S_IRUGO;
+	gfxs_ctl->attr.show = sony_nc_gfx_switch_status_show;
+
+	result = device_create_file(&pd->dev, &gfxs_ctl->attr);
+	if (result)
+		goto gfxerror;
+
+	return 0;
+
+gfxerror:
+	kfree(gfxs_ctl);
+	gfxs_ctl = NULL;
+
+	return result;
+}
+
+static void sony_nc_gfx_switch_cleanup(struct platform_device *pd)
+{
+	if (gfxs_ctl) {
+		device_remove_file(&pd->dev, &gfxs_ctl->attr);
+
+		kfree(gfxs_ctl);
+		gfxs_ctl = NULL;
+	}
+}
+
 /* High speed charging function */
 static struct device_attribute *hsc_handle;
 
@@ -2533,6 +2642,8 @@ static void sony_nc_backlight_ng_read_limits(int handle,
 		lvl_table_len = 9;
 		break;
 	case 0x143:
+	case 0x14b:
+	case 0x14c:
 		lvl_table_len = 16;
 		break;
 	}
@@ -2584,6 +2695,18 @@ static void sony_nc_backlight_setup(void)
 		sony_nc_backlight_ng_read_limits(0x143, &sony_bl_props);
 		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
 
+	} else if (sony_find_snc_handle(0x14b) >= 0) {
+		ops = &sony_backlight_ng_ops;
+		sony_bl_props.cmd_base = 0x3000;
+		sony_nc_backlight_ng_read_limits(0x14b, &sony_bl_props);
+		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
+
+	} else if (sony_find_snc_handle(0x14c) >= 0) {
+		ops = &sony_backlight_ng_ops;
+		sony_bl_props.cmd_base = 0x3000;
+		sony_nc_backlight_ng_read_limits(0x14c, &sony_bl_props);
+		max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset;
+
 	} else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT",
 						&unused))) {
 		ops = &sony_backlight_ops;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index f4f8408f3b5b..9a907567f41e 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -209,9 +209,8 @@ enum tpacpi_hkey_event_t {
 	TP_HKEY_EV_ALARM_SENSOR_XHOT	= 0x6022, /* sensor critically hot */
 	TP_HKEY_EV_THM_TABLE_CHANGED	= 0x6030, /* thermal table changed */
 
-	TP_HKEY_EV_UNK_6040		= 0x6040, /* Related to AC change?
-						     some sort of APM hint,
-						     W520 */
+	/* AC-related events */
+	TP_HKEY_EV_AC_CHANGED		= 0x6040, /* AC status changed */
 
 	/* Misc */
 	TP_HKEY_EV_RFKILL_CHANGED	= 0x7000, /* rfkill switch changed */
@@ -3629,6 +3628,12 @@ static bool hotkey_notify_6xxx(const u32 hkey,
 			 "a sensor reports something is extremely hot!\n");
 		/* recommended action: immediate sleep/hibernate */
 		break;
+	case TP_HKEY_EV_AC_CHANGED:
+		/* X120e, X121e, X220, X220i, X220t, X230, T420, T420s, W520:
+		 * AC status changed; can be triggered by plugging or
+		 * unplugging AC adapter, docking or undocking. */
+
+		/* fallthrough */
 
 	case TP_HKEY_EV_KEY_NUMLOCK:
 	case TP_HKEY_EV_KEY_FN:
@@ -8574,7 +8579,8 @@ static bool __pure __init tpacpi_is_valid_fw_id(const char* const s,
 	return s && strlen(s) >= 8 &&
 		tpacpi_is_fw_digit(s[0]) &&
 		tpacpi_is_fw_digit(s[1]) &&
-		s[2] == t && s[3] == 'T' &&
+		s[2] == t &&
+		(s[3] == 'T' || s[3] == 'N') &&
 		tpacpi_is_fw_digit(s[4]) &&
 		tpacpi_is_fw_digit(s[5]);
 }
@@ -8607,7 +8613,8 @@ static int __must_check __init get_thinkpad_model_data(
 		return -ENOMEM;
 
 	/* Really ancient ThinkPad 240X will fail this, which is fine */
-	if (!tpacpi_is_valid_fw_id(tp->bios_version_str, 'E'))
+	if (!(tpacpi_is_valid_fw_id(tp->bios_version_str, 'E') ||
+	      tpacpi_is_valid_fw_id(tp->bios_version_str, 'C')))
 		return 0;
 
 	tp->bios_model = tp->bios_version_str[0]
diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c
index ca91396fc48e..0727f9256138 100644
--- a/drivers/power/bq2415x_charger.c
+++ b/drivers/power/bq2415x_charger.c
@@ -1515,16 +1515,11 @@ static int bq2415x_probe(struct i2c_client *client,
 	}
 
 	/* Get new ID for the new device */
-	ret = idr_pre_get(&bq2415x_id, GFP_KERNEL);
-	if (ret == 0)
-		return -ENOMEM;
-
 	mutex_lock(&bq2415x_id_mutex);
-	ret = idr_get_new(&bq2415x_id, client, &num);
+	num = idr_alloc(&bq2415x_id, client, 0, 0, GFP_KERNEL);
 	mutex_unlock(&bq2415x_id_mutex);
-
-	if (ret < 0)
-		return ret;
+	if (num < 0)
+		return num;
 
 	name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num);
 	if (!name) {
diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c
index 8ccf5d7d0add..26037ca7efb4 100644
--- a/drivers/power/bq27x00_battery.c
+++ b/drivers/power/bq27x00_battery.c
@@ -791,14 +791,11 @@ static int bq27x00_battery_probe(struct i2c_client *client,
 	int retval = 0;
 
 	/* Get new ID for the new battery device */
-	retval = idr_pre_get(&battery_id, GFP_KERNEL);
-	if (retval == 0)
-		return -ENOMEM;
 	mutex_lock(&battery_mutex);
-	retval = idr_get_new(&battery_id, client, &num);
+	num = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL);
 	mutex_unlock(&battery_mutex);
-	if (retval < 0)
-		return retval;
+	if (num < 0)
+		return num;
 
 	name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num);
 	if (!name) {
diff --git a/drivers/power/ds2782_battery.c b/drivers/power/ds2782_battery.c
index e7301b3ed623..c09e7726c96c 100644
--- a/drivers/power/ds2782_battery.c
+++ b/drivers/power/ds2782_battery.c
@@ -395,17 +395,12 @@ static int ds278x_battery_probe(struct i2c_client *client,
 	}
 
 	/* Get an ID for this battery */
-	ret = idr_pre_get(&battery_id, GFP_KERNEL);
-	if (ret == 0) {
-		ret = -ENOMEM;
-		goto fail_id;
-	}
-
 	mutex_lock(&battery_lock);
-	ret = idr_get_new(&battery_id, client, &num);
+	ret = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL);
 	mutex_unlock(&battery_lock);
 	if (ret < 0)
 		goto fail_id;
+	num = ret;
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c
index 2bf0c1b608dd..d3db26e46489 100644
--- a/drivers/pps/clients/pps-gpio.c
+++ b/drivers/pps/clients/pps-gpio.c
@@ -128,7 +128,8 @@ static int pps_gpio_probe(struct platform_device *pdev)
 	}
 
 	/* allocate space for device info */
-	data = kzalloc(sizeof(struct pps_gpio_device_data), GFP_KERNEL);
+	data = devm_kzalloc(&pdev->dev, sizeof(struct pps_gpio_device_data),
+			    GFP_KERNEL);
 	if (data == NULL) {
 		err = -ENOMEM;
 		goto return_error;
@@ -150,7 +151,6 @@ static int pps_gpio_probe(struct platform_device *pdev)
 		pps_default_params |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR;
 	data->pps = pps_register_source(&data->info, pps_default_params);
 	if (data->pps == NULL) {
-		kfree(data);
 		pr_err("failed to register IRQ %d as PPS source\n", irq);
 		err = -EINVAL;
 		goto return_error;
@@ -164,7 +164,6 @@ static int pps_gpio_probe(struct platform_device *pdev)
 			get_irqf_trigger_flags(pdata), data->info.name, data);
 	if (ret) {
 		pps_unregister_source(data->pps);
-		kfree(data);
 		pr_err("failed to acquire IRQ %d\n", irq);
 		err = -EINVAL;
 		goto return_error;
@@ -190,7 +189,6 @@ static int pps_gpio_remove(struct platform_device *pdev)
 	gpio_free(pdata->gpio_pin);
 	pps_unregister_source(data->pps);
 	pr_info("removed IRQ %d as PPS source\n", data->irq);
-	kfree(data);
 	return 0;
 }
 
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
index f197e8ea185c..cdad4d95b20e 100644
--- a/drivers/pps/kapi.c
+++ b/drivers/pps/kapi.c
@@ -102,7 +102,7 @@ struct pps_device *pps_register_source(struct pps_source_info *info,
 		goto pps_register_source_exit;
 	}
 
-	/* These initializations must be done before calling idr_get_new()
+	/* These initializations must be done before calling idr_alloc()
 	 * in order to avoid reces into pps_event().
 	 */
 	pps->params.api_version = PPS_API_VERS;
diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
index 6437703eb10f..7173e3ad475d 100644
--- a/drivers/pps/pps.c
+++ b/drivers/pps/pps.c
@@ -295,29 +295,21 @@ int pps_register_cdev(struct pps_device *pps)
 	dev_t devt;
 
 	mutex_lock(&pps_idr_lock);
-	/* Get new ID for the new PPS source */
-	if (idr_pre_get(&pps_idr, GFP_KERNEL) == 0) {
-		mutex_unlock(&pps_idr_lock);
-		return -ENOMEM;
-	}
-
-	/* Now really allocate the PPS source.
-	 * After idr_get_new() calling the new source will be freely available
-	 * into the kernel.
+	/*
+	 * Get new ID for the new PPS source.  After idr_alloc() calling
+	 * the new source will be freely available into the kernel.
 	 */
-	err = idr_get_new(&pps_idr, pps, &pps->id);
-	mutex_unlock(&pps_idr_lock);
-
-	if (err < 0)
-		return err;
-
-	pps->id &= MAX_IDR_MASK;
-	if (pps->id >= PPS_MAX_SOURCES) {
-		pr_err("%s: too many PPS sources in the system\n",
-					pps->info.name);
-		err = -EBUSY;
-		goto free_idr;
+	err = idr_alloc(&pps_idr, pps, 0, PPS_MAX_SOURCES, GFP_KERNEL);
+	if (err < 0) {
+		if (err == -ENOSPC) {
+			pr_err("%s: too many PPS sources in the system\n",
+			       pps->info.name);
+			err = -EBUSY;
+		}
+		goto out_unlock;
 	}
+	pps->id = err;
+	mutex_unlock(&pps_idr_lock);
 
 	devt = MKDEV(MAJOR(pps_devt), pps->id);
 
@@ -351,8 +343,8 @@ del_cdev:
 free_idr:
 	mutex_lock(&pps_idr_lock);
 	idr_remove(&pps_idr, pps->id);
+out_unlock:
 	mutex_unlock(&pps_idr_lock);
-
 	return err;
 }
 
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index dd3bfaf1ad40..29387df4bfc9 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -199,11 +199,6 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
 	/* actual size of vring (in bytes) */
 	size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
 
-	if (!idr_pre_get(&rproc->notifyids, GFP_KERNEL)) {
-		dev_err(dev, "idr_pre_get failed\n");
-		return -ENOMEM;
-	}
-
 	/*
 	 * Allocate non-cacheable memory for the vring. In the future
 	 * this call will also configure the IOMMU for us
@@ -221,12 +216,13 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
 	 * TODO: let the rproc know the notifyid of this vring
 	 * TODO: support predefined notifyids (via resource table)
 	 */
-	ret = idr_get_new(&rproc->notifyids, rvring, &notifyid);
+	ret = idr_alloc(&rproc->notifyids, rvring, 0, 0, GFP_KERNEL);
 	if (ret) {
-		dev_err(dev, "idr_get_new failed: %d\n", ret);
+		dev_err(dev, "idr_alloc failed: %d\n", ret);
 		dma_free_coherent(dev->parent, size, va, dma);
 		return ret;
 	}
+	notifyid = ret;
 
 	/* Store largest notifyid */
 	rproc->max_notifyid = max(rproc->max_notifyid, notifyid);
@@ -1180,7 +1176,6 @@ static void rproc_type_release(struct device *dev)
 
 	rproc_delete_debug_dir(rproc);
 
-	idr_remove_all(&rproc->notifyids);
 	idr_destroy(&rproc->notifyids);
 
 	if (rproc->index >= 0)
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index d85446021ddb..a59684b5fc68 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -213,13 +213,10 @@ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp,
 		struct rpmsg_channel *rpdev, rpmsg_rx_cb_t cb,
 		void *priv, u32 addr)
 {
-	int err, tmpaddr, request;
+	int id_min, id_max, id;
 	struct rpmsg_endpoint *ept;
 	struct device *dev = rpdev ? &rpdev->dev : &vrp->vdev->dev;
 
-	if (!idr_pre_get(&vrp->endpoints, GFP_KERNEL))
-		return NULL;
-
 	ept = kzalloc(sizeof(*ept), GFP_KERNEL);
 	if (!ept) {
 		dev_err(dev, "failed to kzalloc a new ept\n");
@@ -234,31 +231,28 @@ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp,
 	ept->priv = priv;
 
 	/* do we need to allocate a local address ? */
-	request = addr == RPMSG_ADDR_ANY ? RPMSG_RESERVED_ADDRESSES : addr;
+	if (addr == RPMSG_ADDR_ANY) {
+		id_min = RPMSG_RESERVED_ADDRESSES;
+		id_max = 0;
+	} else {
+		id_min = addr;
+		id_max = addr + 1;
+	}
 
 	mutex_lock(&vrp->endpoints_lock);
 
 	/* bind the endpoint to an rpmsg address (and allocate one if needed) */
-	err = idr_get_new_above(&vrp->endpoints, ept, request, &tmpaddr);
-	if (err) {
-		dev_err(dev, "idr_get_new_above failed: %d\n", err);
+	id = idr_alloc(&vrp->endpoints, ept, id_min, id_max, GFP_KERNEL);
+	if (id < 0) {
+		dev_err(dev, "idr_alloc failed: %d\n", id);
 		goto free_ept;
 	}
-
-	/* make sure the user's address request is fulfilled, if relevant */
-	if (addr != RPMSG_ADDR_ANY && tmpaddr != addr) {
-		dev_err(dev, "address 0x%x already in use\n", addr);
-		goto rem_idr;
-	}
-
-	ept->addr = tmpaddr;
+	ept->addr = id;
 
 	mutex_unlock(&vrp->endpoints_lock);
 
 	return ept;
 
-rem_idr:
-	idr_remove(&vrp->endpoints, request);
 free_ept:
 	mutex_unlock(&vrp->endpoints_lock);
 	kref_put(&ept->refcount, __ept_release);
@@ -1036,7 +1030,6 @@ static void rpmsg_remove(struct virtio_device *vdev)
 	if (vrp->ns_ept)
 		__rpmsg_destroy_ept(vrp, vrp->ns_ept);
 
-	idr_remove_all(&vrp->endpoints);
 	idr_destroy(&vrp->endpoints);
 
 	vdev->config->del_vqs(vrp->vdev);
diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
index b2a8ed99b2bf..98f0d3c30738 100644
--- a/drivers/rtc/rtc-stmp3xxx.c
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -27,6 +27,8 @@
 #include <linux/slab.h>
 #include <linux/of_device.h>
 #include <linux/of.h>
+#include <linux/stmp_device.h>
+#include <linux/stmp3xxx_rtc_wdt.h>
 
 #include <mach/common.h>
 
@@ -36,6 +38,7 @@
 #define STMP3XXX_RTC_CTRL_ALARM_IRQ_EN		0x00000001
 #define STMP3XXX_RTC_CTRL_ONEMSEC_IRQ_EN	0x00000002
 #define STMP3XXX_RTC_CTRL_ALARM_IRQ		0x00000004
+#define STMP3XXX_RTC_CTRL_WATCHDOGEN		0x00000010
 
 #define STMP3XXX_RTC_STAT			0x10
 #define STMP3XXX_RTC_STAT_STALE_SHIFT		16
@@ -45,6 +48,8 @@
 
 #define STMP3XXX_RTC_ALARM			0x40
 
+#define STMP3XXX_RTC_WATCHDOG			0x50
+
 #define STMP3XXX_RTC_PERSISTENT0		0x60
 #define STMP3XXX_RTC_PERSISTENT0_SET		0x64
 #define STMP3XXX_RTC_PERSISTENT0_CLR		0x68
@@ -52,12 +57,70 @@
 #define STMP3XXX_RTC_PERSISTENT0_ALARM_EN	0x00000004
 #define STMP3XXX_RTC_PERSISTENT0_ALARM_WAKE	0x00000080
 
+#define STMP3XXX_RTC_PERSISTENT1		0x70
+/* missing bitmask in headers */
+#define STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER	0x80000000
+
 struct stmp3xxx_rtc_data {
 	struct rtc_device *rtc;
 	void __iomem *io;
 	int irq_alarm;
 };
 
+#if IS_ENABLED(CONFIG_STMP3XXX_RTC_WATCHDOG)
+/**
+ * stmp3xxx_wdt_set_timeout - configure the watchdog inside the STMP3xxx RTC
+ * @dev: the parent device of the watchdog (= the RTC)
+ * @timeout: the desired value for the timeout register of the watchdog.
+ *           0 disables the watchdog
+ *
+ * The watchdog needs one register and two bits which are in the RTC domain.
+ * To handle the resource conflict, the RTC driver will create another
+ * platform_device for the watchdog driver as a child of the RTC device.
+ * The watchdog driver is passed the below accessor function via platform_data
+ * to configure the watchdog. Locking is not needed because accessing SET/CLR
+ * registers is atomic.
+ */
+
+static void stmp3xxx_wdt_set_timeout(struct device *dev, u32 timeout)
+{
+	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
+
+	if (timeout) {
+		writel(timeout, rtc_data->io + STMP3XXX_RTC_WATCHDOG);
+		writel(STMP3XXX_RTC_CTRL_WATCHDOGEN,
+		       rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_SET);
+		writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER,
+		       rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_SET);
+	} else {
+		writel(STMP3XXX_RTC_CTRL_WATCHDOGEN,
+		       rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_CLR);
+		writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER,
+		       rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_CLR);
+	}
+}
+
+static struct stmp3xxx_wdt_pdata wdt_pdata = {
+	.wdt_set_timeout = stmp3xxx_wdt_set_timeout,
+};
+
+static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev)
+{
+	struct platform_device *wdt_pdev =
+		platform_device_alloc("stmp3xxx_rtc_wdt", rtc_pdev->id);
+
+	if (wdt_pdev) {
+		wdt_pdev->dev.parent = &rtc_pdev->dev;
+		wdt_pdev->dev.platform_data = &wdt_pdata;
+		platform_device_add(wdt_pdev);
+	}
+}
+#else
+static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev)
+{
+}
+#endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */
+
 static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data)
 {
 	/*
@@ -233,6 +296,7 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev)
 		goto out_irq_alarm;
 	}
 
+	stmp3xxx_wdt_register(pdev);
 	return 0;
 
 out_irq_alarm:
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 33f26bfa62f2..6999fd919e94 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1573,7 +1573,10 @@ static void dasd_eckd_do_validate_server(struct work_struct *work)
 {
 	struct dasd_device *device = container_of(work, struct dasd_device,
 						  kick_validate);
-	if (dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST)
+	unsigned long flags = 0;
+
+	set_bit(DASD_CQR_FLAGS_FAILFAST, &flags);
+	if (dasd_eckd_validate_server(device, flags)
 	    == -EAGAIN) {
 		/* schedule worker again if failed */
 		schedule_work(&device->kick_validate);
@@ -4157,6 +4160,7 @@ static int dasd_eckd_restore_device(struct dasd_device *device)
 	int rc;
 	struct dasd_uid temp_uid;
 	unsigned long flags;
+	unsigned long cqr_flags = 0;
 
 	private = (struct dasd_eckd_private *) device->private;
 
@@ -4178,7 +4182,9 @@ static int dasd_eckd_restore_device(struct dasd_device *device)
 	rc = dasd_alias_make_device_known_to_lcu(device);
 	if (rc)
 		return rc;
-	dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST);
+
+	set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr_flags);
+	dasd_eckd_validate_server(device, cqr_flags);
 
 	/* RE-Read Configuration Data */
 	dasd_eckd_read_conf(device);
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index 749b72739c4a..ccaae9d63d27 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -232,7 +232,8 @@ static struct file_operations debugfs_perf_fops = {
 	.llseek  = seq_lseek,
 	.release = single_release,
 };
-static void setup_debugfs_entry(struct qdio_q *q, struct ccw_device *cdev)
+
+static void setup_debugfs_entry(struct qdio_q *q)
 {
 	char name[QDIO_DEBUGFS_NAME_LEN];
 
@@ -263,12 +264,12 @@ void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev)
 		irq_ptr->debugfs_perf = NULL;
 
 	for_each_input_queue(irq_ptr, q, i)
-		setup_debugfs_entry(q, cdev);
+		setup_debugfs_entry(q);
 	for_each_output_queue(irq_ptr, q, i)
-		setup_debugfs_entry(q, cdev);
+		setup_debugfs_entry(q);
 }
 
-void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev)
+void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr)
 {
 	struct qdio_q *q;
 	int i;
diff --git a/drivers/s390/cio/qdio_debug.h b/drivers/s390/cio/qdio_debug.h
index 7f8b973da298..647b422bb22a 100644
--- a/drivers/s390/cio/qdio_debug.h
+++ b/drivers/s390/cio/qdio_debug.h
@@ -85,8 +85,7 @@ void qdio_allocate_dbf(struct qdio_initialize *init_data,
 		       struct qdio_irq *irq_ptr);
 void qdio_setup_debug_entries(struct qdio_irq *irq_ptr,
 			      struct ccw_device *cdev);
-void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr,
-				 struct ccw_device *cdev);
+void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr);
 int qdio_debug_init(void);
 void qdio_debug_exit(void);
 
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index abc550e5dd35..843051bc20f1 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -1226,7 +1226,7 @@ int qdio_shutdown(struct ccw_device *cdev, int how)
 
 	tiqdio_remove_input_queues(irq_ptr);
 	qdio_shutdown_queues(cdev);
-	qdio_shutdown_debug_entries(irq_ptr, cdev);
+	qdio_shutdown_debug_entries(irq_ptr);
 
 	/* cleanup subchannel */
 	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 742f5d7eb0f5..a6f7190c09a4 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -12,13 +12,13 @@
  *----------------------------------------------------------------------------*/
 
 #ifndef AAC_DRIVER_BUILD
-# define AAC_DRIVER_BUILD 29801
+# define AAC_DRIVER_BUILD 30000
 # define AAC_DRIVER_BRANCH "-ms"
 #endif
 #define MAXIMUM_NUM_CONTAINERS	32
 
 #define AAC_NUM_MGT_FIB         8
-#define AAC_NUM_IO_FIB		(512 - AAC_NUM_MGT_FIB)
+#define AAC_NUM_IO_FIB		(1024 - AAC_NUM_MGT_FIB)
 #define AAC_NUM_FIB		(AAC_NUM_IO_FIB + AAC_NUM_MGT_FIB)
 
 #define AAC_MAX_LUN		(8)
@@ -36,6 +36,10 @@
 #define CONTAINER_TO_ID(cont)		(cont)
 #define CONTAINER_TO_LUN(cont)		(0)
 
+#define PMC_DEVICE_S7	0x28c
+#define PMC_DEVICE_S8	0x28d
+#define PMC_DEVICE_S9	0x28f
+
 #define aac_phys_to_logical(x)  ((x)+1)
 #define aac_logical_to_phys(x)  ((x)?(x)-1:0)
 
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 8e5d3be16127..3f759957f4b4 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -404,7 +404,13 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
 		dev->max_fib_size = status[1] & 0xFFE0;
 		host->sg_tablesize = status[2] >> 16;
 		dev->sg_tablesize = status[2] & 0xFFFF;
-		host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB;
+		if (dev->pdev->device == PMC_DEVICE_S7 ||
+		    dev->pdev->device == PMC_DEVICE_S8 ||
+		    dev->pdev->device == PMC_DEVICE_S9)
+			host->can_queue = ((status[3] >> 16) ? (status[3] >> 16) :
+				(status[3] & 0xFFFF)) - AAC_NUM_MGT_FIB;
+		else
+			host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB;
 		dev->max_num_aif = status[4] & 0xFFFF;
 		/*
 		 *	NOTE:
@@ -452,6 +458,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
 		}
 	}
 
+	if (host->can_queue > AAC_NUM_IO_FIB)
+		host->can_queue = AAC_NUM_IO_FIB;
+
 	/*
 	 *	Ok now init the communication subsystem
 	 */
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 3b021ec63255..e2e349204e7d 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -407,7 +407,7 @@ static int aac_src_deliver_message(struct fib *fib)
 		fib->hw_fib_va->header.StructType = FIB_MAGIC2;
 		fib->hw_fib_va->header.SenderFibAddress = (u32)address;
 		fib->hw_fib_va->header.u.TimeStamp = 0;
-		BUG_ON((u32)(address >> 32) != 0L);
+		BUG_ON(upper_32_bits(address) != 0L);
 		address |= fibsize;
 	} else {
 		/* Calculate the amount to the fibsize bits */
@@ -431,7 +431,7 @@ static int aac_src_deliver_message(struct fib *fib)
 		address |= fibsize;
 	}
 
-	src_writel(dev, MUnit.IQ_H, (address >> 32) & 0xffffffff);
+	src_writel(dev, MUnit.IQ_H, upper_32_bits(address) & 0xffffffff);
 	src_writel(dev, MUnit.IQ_L, address & 0xffffffff);
 
 	return 0;
diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index e6bf12675db8..a5f7690e819e 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -1034,7 +1034,7 @@ bfad_start_ops(struct bfad_s *bfad) {
 			sizeof(driver_info.host_os_patch) - 1);
 
 	strncpy(driver_info.os_device_name, bfad->pci_name,
-		sizeof(driver_info.os_device_name - 1));
+		sizeof(driver_info.os_device_name) - 1);
 
 	/* FCS driver info init */
 	spin_lock_irqsave(&bfad->bfad_lock, flags);
diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c
index 8f92732655c7..5864f987f206 100644
--- a/drivers/scsi/bfa/bfad_im.c
+++ b/drivers/scsi/bfa/bfad_im.c
@@ -523,20 +523,13 @@ bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port,
 	int error = 1;
 
 	mutex_lock(&bfad_mutex);
-	if (!idr_pre_get(&bfad_im_port_index, GFP_KERNEL)) {
+	error = idr_alloc(&bfad_im_port_index, im_port, 0, 0, GFP_KERNEL);
+	if (error < 0) {
 		mutex_unlock(&bfad_mutex);
-		printk(KERN_WARNING "idr_pre_get failure\n");
+		printk(KERN_WARNING "idr_alloc failure\n");
 		goto out;
 	}
-
-	error = idr_get_new(&bfad_im_port_index, im_port,
-					 &im_port->idr_id);
-	if (error) {
-		mutex_unlock(&bfad_mutex);
-		printk(KERN_WARNING "idr_get_new failure\n");
-		goto out;
-	}
-
+	im_port->idr_id = error;
 	mutex_unlock(&bfad_mutex);
 
 	im_port->shost = bfad_scsi_host_alloc(im_port, bfad);
diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h
index 3486845ba301..50fcd018d14b 100644
--- a/drivers/scsi/bnx2fc/bnx2fc.h
+++ b/drivers/scsi/bnx2fc/bnx2fc.h
@@ -64,7 +64,7 @@
 #include "bnx2fc_constants.h"
 
 #define BNX2FC_NAME		"bnx2fc"
-#define BNX2FC_VERSION		"1.0.12"
+#define BNX2FC_VERSION		"1.0.13"
 
 #define PFX			"bnx2fc: "
 
@@ -156,6 +156,18 @@
 #define BNX2FC_RELOGIN_WAIT_TIME	200
 #define BNX2FC_RELOGIN_WAIT_CNT		10
 
+#define BNX2FC_STATS(hba, stat, cnt)					\
+	do {								\
+		u32 val;						\
+									\
+		val = fw_stats->stat.cnt;				\
+		if (hba->prev_stats.stat.cnt <= val)			\
+			val -= hba->prev_stats.stat.cnt;		\
+		else							\
+			val += (0xfffffff - hba->prev_stats.stat.cnt);	\
+		hba->bfw_stats.cnt += val;				\
+	} while (0)
+
 /* bnx2fc driver uses only one instance of fcoe_percpu_s */
 extern struct fcoe_percpu_s bnx2fc_global;
 
@@ -167,6 +179,14 @@ struct bnx2fc_percpu_s {
 	spinlock_t fp_work_lock;
 };
 
+struct bnx2fc_fw_stats {
+	u64	fc_crc_cnt;
+	u64	fcoe_tx_pkt_cnt;
+	u64	fcoe_rx_pkt_cnt;
+	u64	fcoe_tx_byte_cnt;
+	u64	fcoe_rx_byte_cnt;
+};
+
 struct bnx2fc_hba {
 	struct list_head list;
 	struct cnic_dev *cnic;
@@ -207,6 +227,8 @@ struct bnx2fc_hba {
 	struct bnx2fc_rport **tgt_ofld_list;
 
 	/* statistics */
+	struct bnx2fc_fw_stats bfw_stats;
+	struct fcoe_statistics_params prev_stats;
 	struct fcoe_statistics_params *stats_buffer;
 	dma_addr_t stats_buf_dma;
 	struct completion stat_req_done;
@@ -280,6 +302,7 @@ struct bnx2fc_rport {
 #define BNX2FC_FLAG_UPLD_REQ_COMPL	0x7
 #define BNX2FC_FLAG_EXPL_LOGO		0x8
 #define BNX2FC_FLAG_DISABLE_FAILED	0x9
+#define BNX2FC_FLAG_ENABLED		0xa
 
 	u8 src_addr[ETH_ALEN];
 	u32 max_sqes;
@@ -468,6 +491,8 @@ int bnx2fc_send_fw_fcoe_init_msg(struct bnx2fc_hba *hba);
 int bnx2fc_send_fw_fcoe_destroy_msg(struct bnx2fc_hba *hba);
 int bnx2fc_send_session_ofld_req(struct fcoe_port *port,
 					struct bnx2fc_rport *tgt);
+int bnx2fc_send_session_enable_req(struct fcoe_port *port,
+					struct bnx2fc_rport *tgt);
 int bnx2fc_send_session_disable_req(struct fcoe_port *port,
 				    struct bnx2fc_rport *tgt);
 int bnx2fc_send_session_destroy_req(struct bnx2fc_hba *hba,
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 70ecd953a579..2daf4b0da434 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -22,7 +22,7 @@ DEFINE_PER_CPU(struct bnx2fc_percpu_s, bnx2fc_percpu);
 
 #define DRV_MODULE_NAME		"bnx2fc"
 #define DRV_MODULE_VERSION	BNX2FC_VERSION
-#define DRV_MODULE_RELDATE	"Jun 04, 2012"
+#define DRV_MODULE_RELDATE	"Dec 21, 2012"
 
 
 static char version[] =
@@ -62,6 +62,10 @@ static int bnx2fc_destroy(struct net_device *net_device);
 static int bnx2fc_enable(struct net_device *netdev);
 static int bnx2fc_disable(struct net_device *netdev);
 
+/* fcoe_syfs control interface handlers */
+static int bnx2fc_ctlr_alloc(struct net_device *netdev);
+static int bnx2fc_ctlr_enabled(struct fcoe_ctlr_device *cdev);
+
 static void bnx2fc_recv_frame(struct sk_buff *skb);
 
 static void bnx2fc_start_disc(struct bnx2fc_interface *interface);
@@ -89,7 +93,6 @@ static void bnx2fc_port_shutdown(struct fc_lport *lport);
 static void bnx2fc_stop(struct bnx2fc_interface *interface);
 static int __init bnx2fc_mod_init(void);
 static void __exit bnx2fc_mod_exit(void);
-static void bnx2fc_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev);
 
 unsigned int bnx2fc_debug_level;
 module_param_named(debug_logging, bnx2fc_debug_level, int, S_IRUGO|S_IWUSR);
@@ -107,44 +110,6 @@ static inline struct net_device *bnx2fc_netdev(const struct fc_lport *lport)
 		((struct fcoe_port *)lport_priv(lport))->priv)->netdev;
 }
 
-/**
- * bnx2fc_get_lesb() - Fill the FCoE Link Error Status Block
- * @lport: the local port
- * @fc_lesb: the link error status block
- */
-static void bnx2fc_get_lesb(struct fc_lport *lport,
-			    struct fc_els_lesb *fc_lesb)
-{
-	struct net_device *netdev = bnx2fc_netdev(lport);
-
-	__fcoe_get_lesb(lport, fc_lesb, netdev);
-}
-
-static void bnx2fc_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev)
-{
-	struct fcoe_ctlr *fip = fcoe_ctlr_device_priv(ctlr_dev);
-	struct net_device *netdev = bnx2fc_netdev(fip->lp);
-	struct fcoe_fc_els_lesb *fcoe_lesb;
-	struct fc_els_lesb fc_lesb;
-
-	__fcoe_get_lesb(fip->lp, &fc_lesb, netdev);
-	fcoe_lesb = (struct fcoe_fc_els_lesb *)(&fc_lesb);
-
-	ctlr_dev->lesb.lesb_link_fail =
-		ntohl(fcoe_lesb->lesb_link_fail);
-	ctlr_dev->lesb.lesb_vlink_fail =
-		ntohl(fcoe_lesb->lesb_vlink_fail);
-	ctlr_dev->lesb.lesb_miss_fka =
-		ntohl(fcoe_lesb->lesb_miss_fka);
-	ctlr_dev->lesb.lesb_symb_err =
-		ntohl(fcoe_lesb->lesb_symb_err);
-	ctlr_dev->lesb.lesb_err_block =
-		ntohl(fcoe_lesb->lesb_err_block);
-	ctlr_dev->lesb.lesb_fcs_error =
-		ntohl(fcoe_lesb->lesb_fcs_error);
-}
-EXPORT_SYMBOL(bnx2fc_ctlr_get_lesb);
-
 static void bnx2fc_fcf_get_vlan_id(struct fcoe_fcf_device *fcf_dev)
 {
 	struct fcoe_ctlr_device *ctlr_dev =
@@ -687,11 +652,16 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost)
 		BNX2FC_HBA_DBG(lport, "FW stat req timed out\n");
 		return bnx2fc_stats;
 	}
-	bnx2fc_stats->invalid_crc_count += fw_stats->rx_stat2.fc_crc_cnt;
-	bnx2fc_stats->tx_frames += fw_stats->tx_stat.fcoe_tx_pkt_cnt;
-	bnx2fc_stats->tx_words += (fw_stats->tx_stat.fcoe_tx_byte_cnt) / 4;
-	bnx2fc_stats->rx_frames += fw_stats->rx_stat0.fcoe_rx_pkt_cnt;
-	bnx2fc_stats->rx_words += (fw_stats->rx_stat0.fcoe_rx_byte_cnt) / 4;
+	BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt);
+	bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt;
+	BNX2FC_STATS(hba, tx_stat, fcoe_tx_pkt_cnt);
+	bnx2fc_stats->tx_frames += hba->bfw_stats.fcoe_tx_pkt_cnt;
+	BNX2FC_STATS(hba, tx_stat, fcoe_tx_byte_cnt);
+	bnx2fc_stats->tx_words += ((hba->bfw_stats.fcoe_tx_byte_cnt) / 4);
+	BNX2FC_STATS(hba, rx_stat0, fcoe_rx_pkt_cnt);
+	bnx2fc_stats->rx_frames += hba->bfw_stats.fcoe_rx_pkt_cnt;
+	BNX2FC_STATS(hba, rx_stat0, fcoe_rx_byte_cnt);
+	bnx2fc_stats->rx_words += ((hba->bfw_stats.fcoe_rx_byte_cnt) / 4);
 
 	bnx2fc_stats->dumped_frames = 0;
 	bnx2fc_stats->lip_count = 0;
@@ -700,6 +670,8 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost)
 	bnx2fc_stats->loss_of_signal_count = 0;
 	bnx2fc_stats->prim_seq_protocol_err_count = 0;
 
+	memcpy(&hba->prev_stats, hba->stats_buffer,
+	       sizeof(struct fcoe_statistics_params));
 	return bnx2fc_stats;
 }
 
@@ -734,35 +706,6 @@ static int bnx2fc_shost_config(struct fc_lport *lport, struct device *dev)
 	return 0;
 }
 
-static void bnx2fc_link_speed_update(struct fc_lport *lport)
-{
-	struct fcoe_port *port = lport_priv(lport);
-	struct bnx2fc_interface *interface = port->priv;
-	struct net_device *netdev = interface->netdev;
-	struct ethtool_cmd ecmd;
-
-	if (!__ethtool_get_settings(netdev, &ecmd)) {
-		lport->link_supported_speeds &=
-			~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT);
-		if (ecmd.supported & (SUPPORTED_1000baseT_Half |
-				      SUPPORTED_1000baseT_Full))
-			lport->link_supported_speeds |= FC_PORTSPEED_1GBIT;
-		if (ecmd.supported & SUPPORTED_10000baseT_Full)
-			lport->link_supported_speeds |= FC_PORTSPEED_10GBIT;
-
-		switch (ethtool_cmd_speed(&ecmd)) {
-		case SPEED_1000:
-			lport->link_speed = FC_PORTSPEED_1GBIT;
-			break;
-		case SPEED_2500:
-			lport->link_speed = FC_PORTSPEED_2GBIT;
-			break;
-		case SPEED_10000:
-			lport->link_speed = FC_PORTSPEED_10GBIT;
-			break;
-		}
-	}
-}
 static int bnx2fc_link_ok(struct fc_lport *lport)
 {
 	struct fcoe_port *port = lport_priv(lport);
@@ -820,7 +763,7 @@ static int bnx2fc_net_config(struct fc_lport *lport, struct net_device *netdev)
 	port->fcoe_pending_queue_active = 0;
 	setup_timer(&port->timer, fcoe_queue_timer, (unsigned long) lport);
 
-	bnx2fc_link_speed_update(lport);
+	fcoe_link_speed_update(lport);
 
 	if (!lport->vport) {
 		if (fcoe_get_wwn(netdev, &wwnn, NETDEV_FCOE_WWNN))
@@ -864,6 +807,7 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event,
 				     u16 vlan_id)
 {
 	struct bnx2fc_hba *hba = (struct bnx2fc_hba *)context;
+	struct fcoe_ctlr_device *cdev;
 	struct fc_lport *lport;
 	struct fc_lport *vport;
 	struct bnx2fc_interface *interface, *tmp;
@@ -923,30 +867,47 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event,
 		BNX2FC_HBA_DBG(lport, "netevent handler - event=%s %ld\n",
 				interface->netdev->name, event);
 
-		bnx2fc_link_speed_update(lport);
+		fcoe_link_speed_update(lport);
+
+		cdev = fcoe_ctlr_to_ctlr_dev(ctlr);
 
 		if (link_possible && !bnx2fc_link_ok(lport)) {
-			/* Reset max recv frame size to default */
-			fc_set_mfs(lport, BNX2FC_MFS);
-			/*
-			 * ctlr link up will only be handled during
-			 * enable to avoid sending discovery solicitation
-			 * on a stale vlan
-			 */
-			if (interface->enabled)
-				fcoe_ctlr_link_up(ctlr);
+			switch (cdev->enabled) {
+			case FCOE_CTLR_DISABLED:
+				pr_info("Link up while interface is disabled.\n");
+				break;
+			case FCOE_CTLR_ENABLED:
+			case FCOE_CTLR_UNUSED:
+				/* Reset max recv frame size to default */
+				fc_set_mfs(lport, BNX2FC_MFS);
+				/*
+				 * ctlr link up will only be handled during
+				 * enable to avoid sending discovery
+				 * solicitation on a stale vlan
+				 */
+				if (interface->enabled)
+					fcoe_ctlr_link_up(ctlr);
+			};
 		} else if (fcoe_ctlr_link_down(ctlr)) {
-			mutex_lock(&lport->lp_mutex);
-			list_for_each_entry(vport, &lport->vports, list)
-				fc_host_port_type(vport->host) =
-							FC_PORTTYPE_UNKNOWN;
-			mutex_unlock(&lport->lp_mutex);
-			fc_host_port_type(lport->host) = FC_PORTTYPE_UNKNOWN;
-			per_cpu_ptr(lport->stats,
-				    get_cpu())->LinkFailureCount++;
-			put_cpu();
-			fcoe_clean_pending_queue(lport);
-			wait_for_upload = 1;
+			switch (cdev->enabled) {
+			case FCOE_CTLR_DISABLED:
+				pr_info("Link down while interface is disabled.\n");
+				break;
+			case FCOE_CTLR_ENABLED:
+			case FCOE_CTLR_UNUSED:
+				mutex_lock(&lport->lp_mutex);
+				list_for_each_entry(vport, &lport->vports, list)
+					fc_host_port_type(vport->host) =
+					FC_PORTTYPE_UNKNOWN;
+				mutex_unlock(&lport->lp_mutex);
+				fc_host_port_type(lport->host) =
+					FC_PORTTYPE_UNKNOWN;
+				per_cpu_ptr(lport->stats,
+					    get_cpu())->LinkFailureCount++;
+				put_cpu();
+				fcoe_clean_pending_queue(lport);
+				wait_for_upload = 1;
+			};
 		}
 	}
 	mutex_unlock(&bnx2fc_dev_lock);
@@ -1477,6 +1438,7 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface,
 	port = lport_priv(lport);
 	port->lport = lport;
 	port->priv = interface;
+	port->get_netdev = bnx2fc_netdev;
 	INIT_WORK(&port->destroy_work, bnx2fc_destroy_work);
 
 	/* Configure fcoe_port */
@@ -1996,7 +1958,9 @@ static void bnx2fc_ulp_init(struct cnic_dev *dev)
 		set_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic);
 }
 
-
+/**
+ * Deperecated: Use bnx2fc_enabled()
+ */
 static int bnx2fc_disable(struct net_device *netdev)
 {
 	struct bnx2fc_interface *interface;
@@ -2022,7 +1986,9 @@ static int bnx2fc_disable(struct net_device *netdev)
 	return rc;
 }
 
-
+/**
+ * Deprecated: Use bnx2fc_enabled()
+ */
 static int bnx2fc_enable(struct net_device *netdev)
 {
 	struct bnx2fc_interface *interface;
@@ -2048,17 +2014,57 @@ static int bnx2fc_enable(struct net_device *netdev)
 }
 
 /**
- * bnx2fc_create - Create bnx2fc FCoE interface
+ * bnx2fc_ctlr_enabled() - Enable or disable an FCoE Controller
+ * @cdev: The FCoE Controller that is being enabled or disabled
+ *
+ * fcoe_sysfs will ensure that the state of 'enabled' has
+ * changed, so no checking is necessary here. This routine simply
+ * calls fcoe_enable or fcoe_disable, both of which are deprecated.
+ * When those routines are removed the functionality can be merged
+ * here.
+ */
+static int bnx2fc_ctlr_enabled(struct fcoe_ctlr_device *cdev)
+{
+	struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(cdev);
+	struct fc_lport *lport = ctlr->lp;
+	struct net_device *netdev = bnx2fc_netdev(lport);
+
+	switch (cdev->enabled) {
+	case FCOE_CTLR_ENABLED:
+		return bnx2fc_enable(netdev);
+	case FCOE_CTLR_DISABLED:
+		return bnx2fc_disable(netdev);
+	case FCOE_CTLR_UNUSED:
+	default:
+		return -ENOTSUPP;
+	};
+}
+
+enum bnx2fc_create_link_state {
+	BNX2FC_CREATE_LINK_DOWN,
+	BNX2FC_CREATE_LINK_UP,
+};
+
+/**
+ * _bnx2fc_create() - Create bnx2fc FCoE interface
+ * @netdev  :   The net_device object the Ethernet interface to create on
+ * @fip_mode:   The FIP mode for this creation
+ * @link_state: The ctlr link state on creation
  *
- * @buffer: The name of Ethernet interface to create on
- * @kp:     The associated kernel param
+ * Called from either the libfcoe 'create' module parameter
+ * via fcoe_create or from fcoe_syfs's ctlr_create file.
  *
- * Called from sysfs.
+ * libfcoe's 'create' module parameter is deprecated so some
+ * consolidation of code can be done when that interface is
+ * removed.
  *
  * Returns: 0 for success
  */
-static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode)
+static int _bnx2fc_create(struct net_device *netdev,
+			  enum fip_state fip_mode,
+			  enum bnx2fc_create_link_state link_state)
 {
+	struct fcoe_ctlr_device *cdev;
 	struct fcoe_ctlr *ctlr;
 	struct bnx2fc_interface *interface;
 	struct bnx2fc_hba *hba;
@@ -2153,7 +2159,15 @@ static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode)
 	/* Make this master N_port */
 	ctlr->lp = lport;
 
-	if (!bnx2fc_link_ok(lport)) {
+	cdev = fcoe_ctlr_to_ctlr_dev(ctlr);
+
+	if (link_state == BNX2FC_CREATE_LINK_UP)
+		cdev->enabled = FCOE_CTLR_ENABLED;
+	else
+		cdev->enabled = FCOE_CTLR_DISABLED;
+
+	if (link_state == BNX2FC_CREATE_LINK_UP &&
+	    !bnx2fc_link_ok(lport)) {
 		fcoe_ctlr_link_up(ctlr);
 		fc_host_port_type(lport->host) = FC_PORTTYPE_NPORT;
 		set_bit(ADAPTER_STATE_READY, &interface->hba->adapter_state);
@@ -2161,7 +2175,10 @@ static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode)
 
 	BNX2FC_HBA_DBG(lport, "create: START DISC\n");
 	bnx2fc_start_disc(interface);
-	interface->enabled = true;
+
+	if (link_state == BNX2FC_CREATE_LINK_UP)
+		interface->enabled = true;
+
 	/*
 	 * Release from kref_init in bnx2fc_interface_setup, on success
 	 * lport should be holding a reference taken in bnx2fc_if_create
@@ -2187,6 +2204,37 @@ mod_err:
 }
 
 /**
+ * bnx2fc_create() - Create a bnx2fc interface
+ * @netdev  : The net_device object the Ethernet interface to create on
+ * @fip_mode: The FIP mode for this creation
+ *
+ * Called from fcoe transport
+ *
+ * Returns: 0 for success
+ */
+static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode)
+{
+	return _bnx2fc_create(netdev, fip_mode, BNX2FC_CREATE_LINK_UP);
+}
+
+/**
+ * bnx2fc_ctlr_alloc() - Allocate a bnx2fc interface from fcoe_sysfs
+ * @netdev: The net_device to be used by the allocated FCoE Controller
+ *
+ * This routine is called from fcoe_sysfs. It will start the fcoe_ctlr
+ * in a link_down state. The allows the user an opportunity to configure
+ * the FCoE Controller from sysfs before enabling the FCoE Controller.
+ *
+ * Creating in with this routine starts the FCoE Controller in Fabric
+ * mode. The user can change to VN2VN or another mode before enabling.
+ */
+static int bnx2fc_ctlr_alloc(struct net_device *netdev)
+{
+	return _bnx2fc_create(netdev, FIP_MODE_FABRIC,
+			      BNX2FC_CREATE_LINK_DOWN);
+}
+
+/**
  * bnx2fc_find_hba_for_cnic - maps cnic instance to bnx2fc hba instance
  *
  * @cnic:	Pointer to cnic device instance
@@ -2311,6 +2359,7 @@ static struct fcoe_transport bnx2fc_transport = {
 	.name = {"bnx2fc"},
 	.attached = false,
 	.list = LIST_HEAD_INIT(bnx2fc_transport.list),
+	.alloc = bnx2fc_ctlr_alloc,
 	.match = bnx2fc_match,
 	.create = bnx2fc_create,
 	.destroy = bnx2fc_destroy,
@@ -2555,13 +2604,13 @@ module_init(bnx2fc_mod_init);
 module_exit(bnx2fc_mod_exit);
 
 static struct fcoe_sysfs_function_template bnx2fc_fcoe_sysfs_templ = {
-	.get_fcoe_ctlr_mode = fcoe_ctlr_get_fip_mode,
-	.get_fcoe_ctlr_link_fail = bnx2fc_ctlr_get_lesb,
-	.get_fcoe_ctlr_vlink_fail = bnx2fc_ctlr_get_lesb,
-	.get_fcoe_ctlr_miss_fka = bnx2fc_ctlr_get_lesb,
-	.get_fcoe_ctlr_symb_err = bnx2fc_ctlr_get_lesb,
-	.get_fcoe_ctlr_err_block = bnx2fc_ctlr_get_lesb,
-	.get_fcoe_ctlr_fcs_error = bnx2fc_ctlr_get_lesb,
+	.set_fcoe_ctlr_enabled = bnx2fc_ctlr_enabled,
+	.get_fcoe_ctlr_link_fail = fcoe_ctlr_get_lesb,
+	.get_fcoe_ctlr_vlink_fail = fcoe_ctlr_get_lesb,
+	.get_fcoe_ctlr_miss_fka = fcoe_ctlr_get_lesb,
+	.get_fcoe_ctlr_symb_err = fcoe_ctlr_get_lesb,
+	.get_fcoe_ctlr_err_block = fcoe_ctlr_get_lesb,
+	.get_fcoe_ctlr_fcs_error = fcoe_ctlr_get_lesb,
 
 	.get_fcoe_fcf_selected = fcoe_fcf_get_selected,
 	.get_fcoe_fcf_vlan_id = bnx2fc_fcf_get_vlan_id,
@@ -2660,7 +2709,7 @@ static struct scsi_host_template bnx2fc_shost_template = {
 	.can_queue		= BNX2FC_CAN_QUEUE,
 	.use_clustering		= ENABLE_CLUSTERING,
 	.sg_tablesize		= BNX2FC_MAX_BDS_PER_CMD,
-	.max_sectors		= 512,
+	.max_sectors		= 1024,
 };
 
 static struct libfc_function_template bnx2fc_libfc_fcn_templ = {
@@ -2668,7 +2717,7 @@ static struct libfc_function_template bnx2fc_libfc_fcn_templ = {
 	.elsct_send		= bnx2fc_elsct_send,
 	.fcp_abort_io		= bnx2fc_abort_io,
 	.fcp_cleanup		= bnx2fc_cleanup,
-	.get_lesb		= bnx2fc_get_lesb,
+	.get_lesb		= fcoe_get_lesb,
 	.rport_event_callback	= bnx2fc_rport_event_handler,
 };
 
diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
index ef60afa94d0e..85ea98a80f40 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
@@ -347,7 +347,7 @@ int bnx2fc_send_session_ofld_req(struct fcoe_port *port,
  * @port:		port structure pointer
  * @tgt:		bnx2fc_rport structure pointer
  */
-static int bnx2fc_send_session_enable_req(struct fcoe_port *port,
+int bnx2fc_send_session_enable_req(struct fcoe_port *port,
 					struct bnx2fc_rport *tgt)
 {
 	struct kwqe *kwqe_arr[2];
@@ -759,8 +759,6 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe)
 		case FCOE_ERROR_CODE_DATA_SOFN_SEQ_ACTIVE_RESET:
 			BNX2FC_TGT_DBG(tgt, "REC TOV popped for xid - 0x%x\n",
 				   xid);
-			memset(&io_req->err_entry, 0,
-			       sizeof(struct fcoe_err_report_entry));
 			memcpy(&io_req->err_entry, err_entry,
 			       sizeof(struct fcoe_err_report_entry));
 			if (!test_bit(BNX2FC_FLAG_SRR_SENT,
@@ -847,8 +845,6 @@ ret_err_rqe:
 			goto ret_warn_rqe;
 		}
 
-		memset(&io_req->err_entry, 0,
-		       sizeof(struct fcoe_err_report_entry));
 		memcpy(&io_req->err_entry, err_entry,
 		       sizeof(struct fcoe_err_report_entry));
 
@@ -1124,7 +1120,6 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba,
 	struct bnx2fc_interface		*interface;
 	u32				conn_id;
 	u32				context_id;
-	int				rc;
 
 	conn_id = ofld_kcqe->fcoe_conn_id;
 	context_id = ofld_kcqe->fcoe_conn_context_id;
@@ -1153,17 +1148,10 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba,
 				"resources\n");
 			set_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, &tgt->flags);
 		}
-		goto ofld_cmpl_err;
 	} else {
-
-		/* now enable the session */
-		rc = bnx2fc_send_session_enable_req(port, tgt);
-		if (rc) {
-			printk(KERN_ERR PFX "enable session failed\n");
-			goto ofld_cmpl_err;
-		}
+		/* FW offload request successfully completed */
+		set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
 	}
-	return;
 ofld_cmpl_err:
 	set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
 	wake_up_interruptible(&tgt->ofld_wait);
@@ -1210,15 +1198,9 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba,
 		printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n");
 		goto enbl_cmpl_err;
 	}
-	if (ofld_kcqe->completion_status)
-		goto enbl_cmpl_err;
-	else {
+	if (!ofld_kcqe->completion_status)
 		/* enable successful - rport ready for issuing IOs */
-		set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
-		set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
-		wake_up_interruptible(&tgt->ofld_wait);
-	}
-	return;
+		set_bit(BNX2FC_FLAG_ENABLED, &tgt->flags);
 
 enbl_cmpl_err:
 	set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
@@ -1251,6 +1233,7 @@ static void bnx2fc_process_conn_disable_cmpl(struct bnx2fc_hba *hba,
 		/* disable successful */
 		BNX2FC_TGT_DBG(tgt, "disable successful\n");
 		clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
+		clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags);
 		set_bit(BNX2FC_FLAG_DISABLED, &tgt->flags);
 		set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags);
 		wake_up_interruptible(&tgt->upld_wait);
diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index 8d4626c07a12..60798e829de6 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c
@@ -654,7 +654,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req)
 	mp_req->mp_resp_bd = dma_alloc_coherent(&hba->pcidev->dev, sz,
 						 &mp_req->mp_resp_bd_dma,
 						 GFP_ATOMIC);
-	if (!mp_req->mp_req_bd) {
+	if (!mp_req->mp_resp_bd) {
 		printk(KERN_ERR PFX "unable to alloc MP resp bd\n");
 		bnx2fc_free_mp_resc(io_req);
 		return FAILED;
@@ -685,8 +685,8 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req)
 static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags)
 {
 	struct fc_lport *lport;
-	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
-	struct fc_rport_libfc_priv *rp = rport->dd_data;
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rp;
 	struct fcoe_port *port;
 	struct bnx2fc_interface *interface;
 	struct bnx2fc_rport *tgt;
@@ -704,6 +704,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags)
 	unsigned long start = jiffies;
 
 	lport = shost_priv(host);
+	rport = starget_to_rport(scsi_target(sc_cmd->device));
 	port = lport_priv(lport);
 	interface = port->priv;
 
@@ -712,6 +713,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags)
 		rc = FAILED;
 		goto tmf_err;
 	}
+	rp = rport->dd_data;
 
 	rc = fc_block_scsi_eh(sc_cmd);
 	if (rc)
diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c
index b9d0d9cb17f9..c57a3bb8a9fb 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c
@@ -33,6 +33,7 @@ static void bnx2fc_upld_timer(unsigned long data)
 	BNX2FC_TGT_DBG(tgt, "upld_timer - Upload compl not received!!\n");
 	/* fake upload completion */
 	clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
+	clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags);
 	set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags);
 	wake_up_interruptible(&tgt->upld_wait);
 }
@@ -55,10 +56,25 @@ static void bnx2fc_ofld_timer(unsigned long data)
 	 * resources are freed up in bnx2fc_offload_session
 	 */
 	clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
+	clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags);
 	set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
 	wake_up_interruptible(&tgt->ofld_wait);
 }
 
+static void bnx2fc_ofld_wait(struct bnx2fc_rport *tgt)
+{
+	setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt);
+	mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT);
+
+	wait_event_interruptible(tgt->ofld_wait,
+				 (test_bit(
+				  BNX2FC_FLAG_OFLD_REQ_CMPL,
+				  &tgt->flags)));
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&tgt->ofld_timer);
+}
+
 static void bnx2fc_offload_session(struct fcoe_port *port,
 					struct bnx2fc_rport *tgt,
 					struct fc_rport_priv *rdata)
@@ -103,17 +119,7 @@ retry_ofld:
 	 * wait for the session is offloaded and enabled. 3 Secs
 	 * should be ample time for this process to complete.
 	 */
-	setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt);
-	mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT);
-
-	wait_event_interruptible(tgt->ofld_wait,
-				 (test_bit(
-				  BNX2FC_FLAG_OFLD_REQ_CMPL,
-				  &tgt->flags)));
-	if (signal_pending(current))
-		flush_signals(current);
-
-	del_timer_sync(&tgt->ofld_timer);
+	bnx2fc_ofld_wait(tgt);
 
 	if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) {
 		if (test_and_clear_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE,
@@ -131,14 +137,23 @@ retry_ofld:
 	}
 	if (bnx2fc_map_doorbell(tgt)) {
 		printk(KERN_ERR PFX "map doorbell failed - no mem\n");
-		/* upload will take care of cleaning up sess resc */
-		lport->tt.rport_logoff(rdata);
+		goto ofld_err;
 	}
+	clear_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
+	rval = bnx2fc_send_session_enable_req(port, tgt);
+	if (rval) {
+		pr_err(PFX "enable session failed\n");
+		goto ofld_err;
+	}
+	bnx2fc_ofld_wait(tgt);
+	if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)))
+		goto ofld_err;
 	return;
 
 ofld_err:
 	/* couldn't offload the session. log off from this rport */
 	BNX2FC_TGT_DBG(tgt, "bnx2fc_offload_session - offload error\n");
+	clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
 	/* Free session resources */
 	bnx2fc_free_session_resc(hba, tgt);
 tgt_init_err:
@@ -259,6 +274,19 @@ void bnx2fc_flush_active_ios(struct bnx2fc_rport *tgt)
 	spin_unlock_bh(&tgt->tgt_lock);
 }
 
+static void bnx2fc_upld_wait(struct bnx2fc_rport *tgt)
+{
+	setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt);
+	mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT);
+	wait_event_interruptible(tgt->upld_wait,
+				 (test_bit(
+				  BNX2FC_FLAG_UPLD_REQ_COMPL,
+				  &tgt->flags)));
+	if (signal_pending(current))
+		flush_signals(current);
+	del_timer_sync(&tgt->upld_timer);
+}
+
 static void bnx2fc_upload_session(struct fcoe_port *port,
 					struct bnx2fc_rport *tgt)
 {
@@ -279,19 +307,8 @@ static void bnx2fc_upload_session(struct fcoe_port *port,
 	 * wait for upload to complete. 3 Secs
 	 * should be sufficient time for this process to complete.
 	 */
-	setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt);
-	mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT);
-
 	BNX2FC_TGT_DBG(tgt, "waiting for disable compl\n");
-	wait_event_interruptible(tgt->upld_wait,
-				 (test_bit(
-				  BNX2FC_FLAG_UPLD_REQ_COMPL,
-				  &tgt->flags)));
-
-	if (signal_pending(current))
-		flush_signals(current);
-
-	del_timer_sync(&tgt->upld_timer);
+	bnx2fc_upld_wait(tgt);
 
 	/*
 	 * traverse thru the active_q and tmf_q and cleanup
@@ -308,24 +325,13 @@ static void bnx2fc_upload_session(struct fcoe_port *port,
 		bnx2fc_send_session_destroy_req(hba, tgt);
 
 		/* wait for destroy to complete */
-		setup_timer(&tgt->upld_timer,
-			    bnx2fc_upld_timer, (unsigned long)tgt);
-		mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT);
-
-		wait_event_interruptible(tgt->upld_wait,
-					 (test_bit(
-					  BNX2FC_FLAG_UPLD_REQ_COMPL,
-					  &tgt->flags)));
+		bnx2fc_upld_wait(tgt);
 
 		if (!(test_bit(BNX2FC_FLAG_DESTROYED, &tgt->flags)))
 			printk(KERN_ERR PFX "ERROR!! destroy timed out\n");
 
 		BNX2FC_TGT_DBG(tgt, "destroy wait complete flags = 0x%lx\n",
 			tgt->flags);
-		if (signal_pending(current))
-			flush_signals(current);
-
-		del_timer_sync(&tgt->upld_timer);
 
 	} else if (test_bit(BNX2FC_FLAG_DISABLE_FAILED, &tgt->flags)) {
 		printk(KERN_ERR PFX "ERROR!! DISABLE req failed, destroy"
@@ -381,7 +387,9 @@ static int bnx2fc_init_tgt(struct bnx2fc_rport *tgt,
 	tgt->rq_cons_idx = 0;
 	atomic_set(&tgt->num_active_ios, 0);
 
-	if (rdata->flags & FC_RP_FLAGS_RETRY) {
+	if (rdata->flags & FC_RP_FLAGS_RETRY &&
+	    rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET &&
+	    !(rdata->ids.roles & FC_RPORT_ROLE_FCP_INITIATOR)) {
 		tgt->dev_type = TYPE_TAPE;
 		tgt->io_timeout = 0; /* use default ULP timeout */
 	} else {
@@ -479,7 +487,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport,
 		tgt = (struct bnx2fc_rport *)&rp[1];
 
 		/* This can happen when ADISC finds the same target */
-		if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) {
+		if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) {
 			BNX2FC_TGT_DBG(tgt, "already offloaded\n");
 			mutex_unlock(&hba->hba_mutex);
 			return;
@@ -494,11 +502,8 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport,
 		BNX2FC_TGT_DBG(tgt, "OFFLOAD num_ofld_sess = %d\n",
 			hba->num_ofld_sess);
 
-		if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) {
-			/*
-			 * Session is offloaded and enabled. Map
-			 * doorbell register for this target
-			 */
+		if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) {
+			/* Session is offloaded and enabled.  */
 			BNX2FC_TGT_DBG(tgt, "sess offloaded\n");
 			/* This counter is protected with hba mutex */
 			hba->num_ofld_sess++;
@@ -535,7 +540,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport,
 		 */
 		tgt = (struct bnx2fc_rport *)&rp[1];
 
-		if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) {
+		if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) {
 			mutex_unlock(&hba->hba_mutex);
 			break;
 		}
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 91eec60252ee..a28b03e5a5f6 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -1317,7 +1317,7 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba)
 		(1ULL << ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN));
 	if (error_mask1) {
 		iscsi_init2.error_bit_map[0] = error_mask1;
-		mask64 &= (u32)(~mask64);
+		mask64 ^= (u32)(mask64);
 		mask64 |= error_mask1;
 	} else
 		iscsi_init2.error_bit_map[0] = (u32) mask64;
diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c
index a15474eef5f7..2a323742ce04 100644
--- a/drivers/scsi/ch.c
+++ b/drivers/scsi/ch.c
@@ -895,7 +895,7 @@ static int ch_probe(struct device *dev)
 {
 	struct scsi_device *sd = to_scsi_device(dev);
 	struct device *class_dev;
-	int minor, ret = -ENOMEM;
+	int ret;
 	scsi_changer *ch;
 
 	if (sd->type != TYPE_MEDIUM_CHANGER)
@@ -905,22 +905,19 @@ static int ch_probe(struct device *dev)
 	if (NULL == ch)
 		return -ENOMEM;
 
-	if (!idr_pre_get(&ch_index_idr, GFP_KERNEL))
-		goto free_ch;
-
+	idr_preload(GFP_KERNEL);
 	spin_lock(&ch_index_lock);
-	ret = idr_get_new(&ch_index_idr, ch, &minor);
+	ret = idr_alloc(&ch_index_idr, ch, 0, CH_MAX_DEVS + 1, GFP_NOWAIT);
 	spin_unlock(&ch_index_lock);
+	idr_preload_end();
 
-	if (ret)
+	if (ret < 0) {
+		if (ret == -ENOSPC)
+			ret = -ENODEV;
 		goto free_ch;
-
-	if (minor > CH_MAX_DEVS) {
-		ret = -ENODEV;
-		goto remove_idr;
 	}
 
-	ch->minor = minor;
+	ch->minor = ret;
 	sprintf(ch->name,"ch%d",ch->minor);
 
 	class_dev = device_create(ch_sysfs_class, dev,
@@ -944,7 +941,7 @@ static int ch_probe(struct device *dev)
 
 	return 0;
 remove_idr:
-	idr_remove(&ch_index_idr, minor);
+	idr_remove(&ch_index_idr, ch->minor);
 free_ch:
 	kfree(ch);
 	return ret;
diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c
index 8ecdb94a59f4..bdd78fb4fc70 100644
--- a/drivers/scsi/csiostor/csio_hw.c
+++ b/drivers/scsi/csiostor/csio_hw.c
@@ -2131,13 +2131,16 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path)
 		value_to_add = 4 - (cf->size % 4);
 
 	cfg_data = kzalloc(cf->size+value_to_add, GFP_KERNEL);
-	if (cfg_data == NULL)
-		return -ENOMEM;
+	if (cfg_data == NULL) {
+		ret = -ENOMEM;
+		goto leave;
+	}
 
 	memcpy((void *)cfg_data, (const void *)cf->data, cf->size);
-
-	if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0)
-		return -EINVAL;
+	if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) {
+		ret = -EINVAL;
+		goto leave;
+	}
 
 	mtype = FW_PARAMS_PARAM_Y_GET(*fw_cfg_param);
 	maddr = FW_PARAMS_PARAM_Z_GET(*fw_cfg_param) << 16;
@@ -2149,9 +2152,9 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path)
 		strncpy(path, "/lib/firmware/" CSIO_CF_FNAME, 64);
 	}
 
+leave:
 	kfree(cfg_data);
 	release_firmware(cf);
-
 	return ret;
 }
 
diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c
index c323b2030afa..0604b5ff3638 100644
--- a/drivers/scsi/csiostor/csio_init.c
+++ b/drivers/scsi/csiostor/csio_init.c
@@ -60,13 +60,6 @@ static struct scsi_transport_template *csio_fcoe_transport_vport;
 /*
  * debugfs support
  */
-static int
-csio_mem_open(struct inode *inode, struct file *file)
-{
-	file->private_data = inode->i_private;
-	return 0;
-}
-
 static ssize_t
 csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
@@ -110,7 +103,7 @@ csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
 static const struct file_operations csio_mem_debugfs_fops = {
 	.owner   = THIS_MODULE,
-	.open    = csio_mem_open,
+	.open    = simple_open,
 	.read    = csio_mem_read,
 	.llseek  = default_llseek,
 };
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index f924b3c3720e..3fecf35ba292 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -1564,6 +1564,7 @@ static int t4_uld_state_change(void *handle, enum cxgb4_state state)
 		break;
 	case CXGB4_STATE_DETACH:
 		pr_info("cdev 0x%p, DETACH.\n", cdev);
+		cxgbi_device_unregister(cdev);
 		break;
 	default:
 		pr_info("cdev 0x%p, unknown state %d.\n", cdev, state);
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
index 865c64fa923c..fed486bfd3f4 100644
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -3747,13 +3747,13 @@ static struct DeviceCtlBlk *device_alloc(struct AdapterCtlBlk *acb,
 	dcb->max_command = 1;
 	dcb->target_id = target;
 	dcb->target_lun = lun;
+	dcb->dev_mode = eeprom->target[target].cfg0;
 #ifndef DC395x_NO_DISCONNECT
 	dcb->identify_msg =
 	    IDENTIFY(dcb->dev_mode & NTC_DO_DISCONNECT, lun);
 #else
 	dcb->identify_msg = IDENTIFY(0, lun);
 #endif
-	dcb->dev_mode = eeprom->target[target].cfg0;
 	dcb->inquiry7 = 0;
 	dcb->sync_mode = 0;
 	dcb->min_nego_period = clock_period[period_index];
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 666b7ac4475f..b5d92fc93c70 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -82,11 +82,11 @@ static int fcoe_rcv(struct sk_buff *, struct net_device *,
 		    struct packet_type *, struct net_device *);
 static int fcoe_percpu_receive_thread(void *);
 static void fcoe_percpu_clean(struct fc_lport *);
-static int fcoe_link_speed_update(struct fc_lport *);
 static int fcoe_link_ok(struct fc_lport *);
 
 static struct fc_lport *fcoe_hostlist_lookup(const struct net_device *);
 static int fcoe_hostlist_add(const struct fc_lport *);
+static void fcoe_hostlist_del(const struct fc_lport *);
 
 static int fcoe_device_notification(struct notifier_block *, ulong, void *);
 static void fcoe_dev_setup(void);
@@ -117,6 +117,11 @@ static int fcoe_destroy(struct net_device *netdev);
 static int fcoe_enable(struct net_device *netdev);
 static int fcoe_disable(struct net_device *netdev);
 
+/* fcoe_syfs control interface handlers */
+static int fcoe_ctlr_alloc(struct net_device *netdev);
+static int fcoe_ctlr_enabled(struct fcoe_ctlr_device *cdev);
+
+
 static struct fc_seq *fcoe_elsct_send(struct fc_lport *,
 				      u32 did, struct fc_frame *,
 				      unsigned int op,
@@ -126,8 +131,6 @@ static struct fc_seq *fcoe_elsct_send(struct fc_lport *,
 				      void *, u32 timeout);
 static void fcoe_recv_frame(struct sk_buff *skb);
 
-static void fcoe_get_lesb(struct fc_lport *, struct fc_els_lesb *);
-
 /* notification function for packets from net device */
 static struct notifier_block fcoe_notifier = {
 	.notifier_call = fcoe_device_notification,
@@ -151,11 +154,11 @@ static int fcoe_vport_create(struct fc_vport *, bool disabled);
 static int fcoe_vport_disable(struct fc_vport *, bool disable);
 static void fcoe_set_vport_symbolic_name(struct fc_vport *);
 static void fcoe_set_port_id(struct fc_lport *, u32, struct fc_frame *);
-static void fcoe_ctlr_get_lesb(struct fcoe_ctlr_device *);
 static void fcoe_fcf_get_vlan_id(struct fcoe_fcf_device *);
 
 static struct fcoe_sysfs_function_template fcoe_sysfs_templ = {
-	.get_fcoe_ctlr_mode = fcoe_ctlr_get_fip_mode,
+	.set_fcoe_ctlr_mode = fcoe_ctlr_set_fip_mode,
+	.set_fcoe_ctlr_enabled = fcoe_ctlr_enabled,
 	.get_fcoe_ctlr_link_fail = fcoe_ctlr_get_lesb,
 	.get_fcoe_ctlr_vlink_fail = fcoe_ctlr_get_lesb,
 	.get_fcoe_ctlr_miss_fka = fcoe_ctlr_get_lesb,
@@ -1112,10 +1115,17 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe,
 	port = lport_priv(lport);
 	port->lport = lport;
 	port->priv = fcoe;
+	port->get_netdev = fcoe_netdev;
 	port->max_queue_depth = FCOE_MAX_QUEUE_DEPTH;
 	port->min_queue_depth = FCOE_MIN_QUEUE_DEPTH;
 	INIT_WORK(&port->destroy_work, fcoe_destroy_work);
 
+	/*
+	 * Need to add the lport to the hostlist
+	 * so we catch NETDEV_CHANGE events.
+	 */
+	fcoe_hostlist_add(lport);
+
 	/* configure a fc_lport including the exchange manager */
 	rc = fcoe_lport_config(lport);
 	if (rc) {
@@ -1187,6 +1197,7 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe,
 out_lp_destroy:
 	fc_exch_mgr_free(lport);
 out_host_put:
+	fcoe_hostlist_del(lport);
 	scsi_host_put(lport->host);
 out:
 	return ERR_PTR(rc);
@@ -1964,6 +1975,7 @@ static int fcoe_dcb_app_notification(struct notifier_block *notifier,
 static int fcoe_device_notification(struct notifier_block *notifier,
 				    ulong event, void *ptr)
 {
+	struct fcoe_ctlr_device *cdev;
 	struct fc_lport *lport = NULL;
 	struct net_device *netdev = ptr;
 	struct fcoe_ctlr *ctlr;
@@ -2020,13 +2032,29 @@ static int fcoe_device_notification(struct notifier_block *notifier,
 
 	fcoe_link_speed_update(lport);
 
-	if (link_possible && !fcoe_link_ok(lport))
-		fcoe_ctlr_link_up(ctlr);
-	else if (fcoe_ctlr_link_down(ctlr)) {
-		stats = per_cpu_ptr(lport->stats, get_cpu());
-		stats->LinkFailureCount++;
-		put_cpu();
-		fcoe_clean_pending_queue(lport);
+	cdev = fcoe_ctlr_to_ctlr_dev(ctlr);
+
+	if (link_possible && !fcoe_link_ok(lport)) {
+		switch (cdev->enabled) {
+		case FCOE_CTLR_DISABLED:
+			pr_info("Link up while interface is disabled.\n");
+			break;
+		case FCOE_CTLR_ENABLED:
+		case FCOE_CTLR_UNUSED:
+			fcoe_ctlr_link_up(ctlr);
+		};
+	} else if (fcoe_ctlr_link_down(ctlr)) {
+		switch (cdev->enabled) {
+		case FCOE_CTLR_DISABLED:
+			pr_info("Link down while interface is disabled.\n");
+			break;
+		case FCOE_CTLR_ENABLED:
+		case FCOE_CTLR_UNUSED:
+			stats = per_cpu_ptr(lport->stats, get_cpu());
+			stats->LinkFailureCount++;
+			put_cpu();
+			fcoe_clean_pending_queue(lport);
+		};
 	}
 out:
 	return rc;
@@ -2039,6 +2067,8 @@ out:
  * Called from fcoe transport.
  *
  * Returns: 0 for success
+ *
+ * Deprecated: use fcoe_ctlr_enabled()
  */
 static int fcoe_disable(struct net_device *netdev)
 {
@@ -2098,6 +2128,33 @@ out:
 }
 
 /**
+ * fcoe_ctlr_enabled() - Enable or disable an FCoE Controller
+ * @cdev: The FCoE Controller that is being enabled or disabled
+ *
+ * fcoe_sysfs will ensure that the state of 'enabled' has
+ * changed, so no checking is necessary here. This routine simply
+ * calls fcoe_enable or fcoe_disable, both of which are deprecated.
+ * When those routines are removed the functionality can be merged
+ * here.
+ */
+static int fcoe_ctlr_enabled(struct fcoe_ctlr_device *cdev)
+{
+	struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(cdev);
+	struct fc_lport *lport = ctlr->lp;
+	struct net_device *netdev = fcoe_netdev(lport);
+
+	switch (cdev->enabled) {
+	case FCOE_CTLR_ENABLED:
+		return fcoe_enable(netdev);
+	case FCOE_CTLR_DISABLED:
+		return fcoe_disable(netdev);
+	case FCOE_CTLR_UNUSED:
+	default:
+		return -ENOTSUPP;
+	};
+}
+
+/**
  * fcoe_destroy() - Destroy a FCoE interface
  * @netdev  : The net_device object the Ethernet interface to create on
  *
@@ -2139,8 +2196,31 @@ static void fcoe_destroy_work(struct work_struct *work)
 {
 	struct fcoe_port *port;
 	struct fcoe_interface *fcoe;
+	struct Scsi_Host *shost;
+	struct fc_host_attrs *fc_host;
+	unsigned long flags;
+	struct fc_vport *vport;
+	struct fc_vport *next_vport;
 
 	port = container_of(work, struct fcoe_port, destroy_work);
+	shost = port->lport->host;
+	fc_host = shost_to_fc_host(shost);
+
+	/* Loop through all the vports and mark them for deletion */
+	spin_lock_irqsave(shost->host_lock, flags);
+	list_for_each_entry_safe(vport, next_vport, &fc_host->vports, peers) {
+		if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) {
+			continue;
+		} else {
+			vport->flags |= FC_VPORT_DELETING;
+			queue_work(fc_host_work_q(shost),
+				   &vport->vport_delete_work);
+		}
+	}
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	flush_workqueue(fc_host_work_q(shost));
+
 	mutex_lock(&fcoe_config_mutex);
 
 	fcoe = port->priv;
@@ -2204,16 +2284,26 @@ static void fcoe_dcb_create(struct fcoe_interface *fcoe)
 #endif
 }
 
+enum fcoe_create_link_state {
+	FCOE_CREATE_LINK_DOWN,
+	FCOE_CREATE_LINK_UP,
+};
+
 /**
- * fcoe_create() - Create a fcoe interface
- * @netdev  : The net_device object the Ethernet interface to create on
- * @fip_mode: The FIP mode for this creation
+ * _fcoe_create() - (internal) Create a fcoe interface
+ * @netdev  :   The net_device object the Ethernet interface to create on
+ * @fip_mode:   The FIP mode for this creation
+ * @link_state: The ctlr link state on creation
  *
- * Called from fcoe transport
+ * Called from either the libfcoe 'create' module parameter
+ * via fcoe_create or from fcoe_syfs's ctlr_create file.
  *
- * Returns: 0 for success
+ * libfcoe's 'create' module parameter is deprecated so some
+ * consolidation of code can be done when that interface is
+ * removed.
  */
-static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
+static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode,
+			enum fcoe_create_link_state link_state)
 {
 	int rc = 0;
 	struct fcoe_ctlr_device *ctlr_dev;
@@ -2254,13 +2344,29 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
 	/* setup DCB priority attributes. */
 	fcoe_dcb_create(fcoe);
 
-	/* add to lports list */
-	fcoe_hostlist_add(lport);
-
 	/* start FIP Discovery and FLOGI */
 	lport->boot_time = jiffies;
 	fc_fabric_login(lport);
-	if (!fcoe_link_ok(lport)) {
+
+	/*
+	 * If the fcoe_ctlr_device is to be set to DISABLED
+	 * it must be done after the lport is added to the
+	 * hostlist, but before the rtnl_lock is released.
+	 * This is because the rtnl_lock protects the
+	 * hostlist that fcoe_device_notification uses. If
+	 * the FCoE Controller is intended to be created
+	 * DISABLED then 'enabled' needs to be considered
+	 * handling link events. 'enabled' must be set
+	 * before the lport can be found in the hostlist
+	 * when a link up event is received.
+	 */
+	if (link_state == FCOE_CREATE_LINK_UP)
+		ctlr_dev->enabled = FCOE_CTLR_ENABLED;
+	else
+		ctlr_dev->enabled = FCOE_CTLR_DISABLED;
+
+	if (link_state == FCOE_CREATE_LINK_UP &&
+	    !fcoe_link_ok(lport)) {
 		rtnl_unlock();
 		fcoe_ctlr_link_up(ctlr);
 		mutex_unlock(&fcoe_config_mutex);
@@ -2275,37 +2381,34 @@ out_nortnl:
 }
 
 /**
- * fcoe_link_speed_update() - Update the supported and actual link speeds
- * @lport: The local port to update speeds for
+ * fcoe_create() - Create a fcoe interface
+ * @netdev  : The net_device object the Ethernet interface to create on
+ * @fip_mode: The FIP mode for this creation
+ *
+ * Called from fcoe transport
+ *
+ * Returns: 0 for success
+ */
+static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
+{
+	return _fcoe_create(netdev, fip_mode, FCOE_CREATE_LINK_UP);
+}
+
+/**
+ * fcoe_ctlr_alloc() - Allocate a fcoe interface from fcoe_sysfs
+ * @netdev: The net_device to be used by the allocated FCoE Controller
  *
- * Returns: 0 if the ethtool query was successful
- *          -1 if the ethtool query failed
+ * This routine is called from fcoe_sysfs. It will start the fcoe_ctlr
+ * in a link_down state. The allows the user an opportunity to configure
+ * the FCoE Controller from sysfs before enabling the FCoE Controller.
+ *
+ * Creating in with this routine starts the FCoE Controller in Fabric
+ * mode. The user can change to VN2VN or another mode before enabling.
  */
-static int fcoe_link_speed_update(struct fc_lport *lport)
+static int fcoe_ctlr_alloc(struct net_device *netdev)
 {
-	struct net_device *netdev = fcoe_netdev(lport);
-	struct ethtool_cmd ecmd;
-
-	if (!__ethtool_get_settings(netdev, &ecmd)) {
-		lport->link_supported_speeds &=
-			~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT);
-		if (ecmd.supported & (SUPPORTED_1000baseT_Half |
-				      SUPPORTED_1000baseT_Full))
-			lport->link_supported_speeds |= FC_PORTSPEED_1GBIT;
-		if (ecmd.supported & SUPPORTED_10000baseT_Full)
-			lport->link_supported_speeds |=
-				FC_PORTSPEED_10GBIT;
-		switch (ethtool_cmd_speed(&ecmd)) {
-		case SPEED_1000:
-			lport->link_speed = FC_PORTSPEED_1GBIT;
-			break;
-		case SPEED_10000:
-			lport->link_speed = FC_PORTSPEED_10GBIT;
-			break;
-		}
-		return 0;
-	}
-	return -1;
+	return _fcoe_create(netdev, FIP_MODE_FABRIC,
+			    FCOE_CREATE_LINK_DOWN);
 }
 
 /**
@@ -2375,10 +2478,13 @@ static int fcoe_reset(struct Scsi_Host *shost)
 	struct fcoe_port *port = lport_priv(lport);
 	struct fcoe_interface *fcoe = port->priv;
 	struct fcoe_ctlr *ctlr = fcoe_to_ctlr(fcoe);
+	struct fcoe_ctlr_device *cdev = fcoe_ctlr_to_ctlr_dev(ctlr);
 
 	fcoe_ctlr_link_down(ctlr);
 	fcoe_clean_pending_queue(ctlr->lp);
-	if (!fcoe_link_ok(ctlr->lp))
+
+	if (cdev->enabled != FCOE_CTLR_DISABLED &&
+	    !fcoe_link_ok(ctlr->lp))
 		fcoe_ctlr_link_up(ctlr);
 	return 0;
 }
@@ -2445,12 +2551,31 @@ static int fcoe_hostlist_add(const struct fc_lport *lport)
 	return 0;
 }
 
+/**
+ * fcoe_hostlist_del() - Remove the FCoE interface identified by a local
+ *			 port to the hostlist
+ * @lport: The local port that identifies the FCoE interface to be added
+ *
+ * Locking: must be called with the RTNL mutex held
+ *
+ */
+static void fcoe_hostlist_del(const struct fc_lport *lport)
+{
+	struct fcoe_interface *fcoe;
+	struct fcoe_port *port;
+
+	port = lport_priv(lport);
+	fcoe = port->priv;
+	list_del(&fcoe->list);
+	return;
+}
 
 static struct fcoe_transport fcoe_sw_transport = {
 	.name = {FCOE_TRANSPORT_DEFAULT},
 	.attached = false,
 	.list = LIST_HEAD_INIT(fcoe_sw_transport.list),
 	.match = fcoe_match,
+	.alloc = fcoe_ctlr_alloc,
 	.create = fcoe_create,
 	.destroy = fcoe_destroy,
 	.enable = fcoe_enable,
@@ -2534,9 +2659,9 @@ static void __exit fcoe_exit(void)
 	/* releases the associated fcoe hosts */
 	rtnl_lock();
 	list_for_each_entry_safe(fcoe, tmp, &fcoe_hostlist, list) {
-		list_del(&fcoe->list);
 		ctlr = fcoe_to_ctlr(fcoe);
 		port = lport_priv(ctlr->lp);
+		fcoe_hostlist_del(port->lport);
 		queue_work(fcoe_wq, &port->destroy_work);
 	}
 	rtnl_unlock();
@@ -2776,43 +2901,6 @@ static void fcoe_set_vport_symbolic_name(struct fc_vport *vport)
 			     NULL, NULL, 3 * lport->r_a_tov);
 }
 
-/**
- * fcoe_get_lesb() - Fill the FCoE Link Error Status Block
- * @lport: the local port
- * @fc_lesb: the link error status block
- */
-static void fcoe_get_lesb(struct fc_lport *lport,
-			 struct fc_els_lesb *fc_lesb)
-{
-	struct net_device *netdev = fcoe_netdev(lport);
-
-	__fcoe_get_lesb(lport, fc_lesb, netdev);
-}
-
-static void fcoe_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev)
-{
-	struct fcoe_ctlr *fip = fcoe_ctlr_device_priv(ctlr_dev);
-	struct net_device *netdev = fcoe_netdev(fip->lp);
-	struct fcoe_fc_els_lesb *fcoe_lesb;
-	struct fc_els_lesb fc_lesb;
-
-	__fcoe_get_lesb(fip->lp, &fc_lesb, netdev);
-	fcoe_lesb = (struct fcoe_fc_els_lesb *)(&fc_lesb);
-
-	ctlr_dev->lesb.lesb_link_fail =
-		ntohl(fcoe_lesb->lesb_link_fail);
-	ctlr_dev->lesb.lesb_vlink_fail =
-		ntohl(fcoe_lesb->lesb_vlink_fail);
-	ctlr_dev->lesb.lesb_miss_fka =
-		ntohl(fcoe_lesb->lesb_miss_fka);
-	ctlr_dev->lesb.lesb_symb_err =
-		ntohl(fcoe_lesb->lesb_symb_err);
-	ctlr_dev->lesb.lesb_err_block =
-		ntohl(fcoe_lesb->lesb_err_block);
-	ctlr_dev->lesb.lesb_fcs_error =
-		ntohl(fcoe_lesb->lesb_fcs_error);
-}
-
 static void fcoe_fcf_get_vlan_id(struct fcoe_fcf_device *fcf_dev)
 {
 	struct fcoe_ctlr_device *ctlr_dev =
diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h
index b42dc32cb5eb..2b53672bf932 100644
--- a/drivers/scsi/fcoe/fcoe.h
+++ b/drivers/scsi/fcoe/fcoe.h
@@ -55,12 +55,12 @@ do {                                                            	\
 
 #define FCOE_DBG(fmt, args...)						\
 	FCOE_CHECK_LOGGING(FCOE_LOGGING,				\
-			   printk(KERN_INFO "fcoe: " fmt, ##args);)
+			   pr_info("fcoe: " fmt, ##args);)
 
 #define FCOE_NETDEV_DBG(netdev, fmt, args...)			\
 	FCOE_CHECK_LOGGING(FCOE_NETDEV_LOGGING,			\
-			   printk(KERN_INFO "fcoe: %s: " fmt,	\
-				  netdev->name, ##args);)
+			   pr_info("fcoe: %s: " fmt,		\
+				   netdev->name, ##args);)
 
 /**
  * struct fcoe_interface - A FCoE interface
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 4a909d7cfde1..08c3bc398da2 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -1291,8 +1291,16 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip,
 
 	LIBFCOE_FIP_DBG(fip, "Clear Virtual Link received\n");
 
-	if (!fcf || !lport->port_id)
+	if (!fcf || !lport->port_id) {
+		/*
+		 * We are yet to select best FCF, but we got CVL in the
+		 * meantime. reset the ctlr and let it rediscover the FCF
+		 */
+		mutex_lock(&fip->ctlr_mutex);
+		fcoe_ctlr_reset(fip);
+		mutex_unlock(&fip->ctlr_mutex);
 		return;
+	}
 
 	/*
 	 * mask of required descriptors.  Validating each one clears its bit.
@@ -1551,15 +1559,6 @@ static struct fcoe_fcf *fcoe_ctlr_select(struct fcoe_ctlr *fip)
 				fcf->fabric_name, fcf->vfid, fcf->fcf_mac,
 				fcf->fc_map, fcoe_ctlr_mtu_valid(fcf),
 				fcf->flogi_sent, fcf->pri);
-		if (fcf->fabric_name != first->fabric_name ||
-		    fcf->vfid != first->vfid ||
-		    fcf->fc_map != first->fc_map) {
-			LIBFCOE_FIP_DBG(fip, "Conflicting fabric, VFID, "
-					"or FC-MAP\n");
-			return NULL;
-		}
-		if (fcf->flogi_sent)
-			continue;
 		if (!fcoe_ctlr_fcf_usable(fcf)) {
 			LIBFCOE_FIP_DBG(fip, "FCF for fab %16.16llx "
 					"map %x %svalid %savailable\n",
@@ -1569,6 +1568,15 @@ static struct fcoe_fcf *fcoe_ctlr_select(struct fcoe_ctlr *fip)
 					"" : "un");
 			continue;
 		}
+		if (fcf->fabric_name != first->fabric_name ||
+		    fcf->vfid != first->vfid ||
+		    fcf->fc_map != first->fc_map) {
+			LIBFCOE_FIP_DBG(fip, "Conflicting fabric, VFID, "
+					"or FC-MAP\n");
+			return NULL;
+		}
+		if (fcf->flogi_sent)
+			continue;
 		if (!best || fcf->pri < best->pri || best->flogi_sent)
 			best = fcf;
 	}
@@ -2864,22 +2872,21 @@ void fcoe_fcf_get_selected(struct fcoe_fcf_device *fcf_dev)
 }
 EXPORT_SYMBOL(fcoe_fcf_get_selected);
 
-void fcoe_ctlr_get_fip_mode(struct fcoe_ctlr_device *ctlr_dev)
+void fcoe_ctlr_set_fip_mode(struct fcoe_ctlr_device *ctlr_dev)
 {
 	struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(ctlr_dev);
 
 	mutex_lock(&ctlr->ctlr_mutex);
-	switch (ctlr->mode) {
-	case FIP_MODE_FABRIC:
-		ctlr_dev->mode = FIP_CONN_TYPE_FABRIC;
-		break;
-	case FIP_MODE_VN2VN:
-		ctlr_dev->mode = FIP_CONN_TYPE_VN2VN;
+	switch (ctlr_dev->mode) {
+	case FIP_CONN_TYPE_VN2VN:
+		ctlr->mode = FIP_MODE_VN2VN;
 		break;
+	case FIP_CONN_TYPE_FABRIC:
 	default:
-		ctlr_dev->mode = FIP_CONN_TYPE_UNKNOWN;
+		ctlr->mode = FIP_MODE_FABRIC;
 		break;
 	}
+
 	mutex_unlock(&ctlr->ctlr_mutex);
 }
-EXPORT_SYMBOL(fcoe_ctlr_get_fip_mode);
+EXPORT_SYMBOL(fcoe_ctlr_set_fip_mode);
diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c
index 5e751689a089..8c05ae017f5b 100644
--- a/drivers/scsi/fcoe/fcoe_sysfs.c
+++ b/drivers/scsi/fcoe/fcoe_sysfs.c
@@ -21,8 +21,17 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/etherdevice.h>
+#include <linux/ctype.h>
 
 #include <scsi/fcoe_sysfs.h>
+#include <scsi/libfcoe.h>
+
+/*
+ * OK to include local libfcoe.h for debug_logging, but cannot include
+ * <scsi/libfcoe.h> otherwise non-netdev based fcoe solutions would have
+ * have to include more than fcoe_sysfs.h.
+ */
+#include "libfcoe.h"
 
 static atomic_t ctlr_num;
 static atomic_t fcf_num;
@@ -71,6 +80,8 @@ MODULE_PARM_DESC(fcf_dev_loss_tmo,
 	((x)->lesb.lesb_err_block)
 #define fcoe_ctlr_fcs_error(x)			\
 	((x)->lesb.lesb_fcs_error)
+#define fcoe_ctlr_enabled(x)			\
+	((x)->enabled)
 #define fcoe_fcf_state(x)			\
 	((x)->state)
 #define fcoe_fcf_fabric_name(x)			\
@@ -210,25 +221,34 @@ static ssize_t show_fcoe_fcf_device_##field(struct device *dev,	\
 #define fcoe_enum_name_search(title, table_type, table)			\
 static const char *get_fcoe_##title##_name(enum table_type table_key)	\
 {									\
-	int i;								\
-	char *name = NULL;						\
-									\
-	for (i = 0; i < ARRAY_SIZE(table); i++) {			\
-		if (table[i].value == table_key) {			\
-			name = table[i].name;				\
-			break;						\
-		}							\
-	}								\
-	return name;							\
+	if (table_key < 0 || table_key >= ARRAY_SIZE(table))		\
+		return NULL;						\
+	return table[table_key];					\
+}
+
+static char *fip_conn_type_names[] = {
+	[ FIP_CONN_TYPE_UNKNOWN ] = "Unknown",
+	[ FIP_CONN_TYPE_FABRIC ]  = "Fabric",
+	[ FIP_CONN_TYPE_VN2VN ]   = "VN2VN",
+};
+fcoe_enum_name_search(ctlr_mode, fip_conn_type, fip_conn_type_names)
+
+static enum fip_conn_type fcoe_parse_mode(const char *buf)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(fip_conn_type_names); i++) {
+		if (strcasecmp(buf, fip_conn_type_names[i]) == 0)
+			return i;
+	}
+
+	return FIP_CONN_TYPE_UNKNOWN;
 }
 
-static struct {
-	enum fcf_state value;
-	char           *name;
-} fcf_state_names[] = {
-	{ FCOE_FCF_STATE_UNKNOWN,      "Unknown" },
-	{ FCOE_FCF_STATE_DISCONNECTED, "Disconnected" },
-	{ FCOE_FCF_STATE_CONNECTED,    "Connected" },
+static char *fcf_state_names[] = {
+	[ FCOE_FCF_STATE_UNKNOWN ]      = "Unknown",
+	[ FCOE_FCF_STATE_DISCONNECTED ] = "Disconnected",
+	[ FCOE_FCF_STATE_CONNECTED ]    = "Connected",
 };
 fcoe_enum_name_search(fcf_state, fcf_state, fcf_state_names)
 #define FCOE_FCF_STATE_MAX_NAMELEN 50
@@ -246,17 +266,7 @@ static ssize_t show_fcf_state(struct device *dev,
 }
 static FCOE_DEVICE_ATTR(fcf, state, S_IRUGO, show_fcf_state, NULL);
 
-static struct {
-	enum fip_conn_type value;
-	char               *name;
-} fip_conn_type_names[] = {
-	{ FIP_CONN_TYPE_UNKNOWN, "Unknown" },
-	{ FIP_CONN_TYPE_FABRIC, "Fabric" },
-	{ FIP_CONN_TYPE_VN2VN, "VN2VN" },
-};
-fcoe_enum_name_search(ctlr_mode, fip_conn_type, fip_conn_type_names)
-#define FCOE_CTLR_MODE_MAX_NAMELEN 50
-
+#define FCOE_MAX_MODENAME_LEN 20
 static ssize_t show_ctlr_mode(struct device *dev,
 			      struct device_attribute *attr,
 			      char *buf)
@@ -264,17 +274,116 @@ static ssize_t show_ctlr_mode(struct device *dev,
 	struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev);
 	const char *name;
 
-	if (ctlr->f->get_fcoe_ctlr_mode)
-		ctlr->f->get_fcoe_ctlr_mode(ctlr);
-
 	name = get_fcoe_ctlr_mode_name(ctlr->mode);
 	if (!name)
 		return -EINVAL;
-	return snprintf(buf, FCOE_CTLR_MODE_MAX_NAMELEN,
+	return snprintf(buf, FCOE_MAX_MODENAME_LEN,
+			"%s\n", name);
+}
+
+static ssize_t store_ctlr_mode(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev);
+	char mode[FCOE_MAX_MODENAME_LEN + 1];
+
+	if (count > FCOE_MAX_MODENAME_LEN)
+		return -EINVAL;
+
+	strncpy(mode, buf, count);
+
+	if (mode[count - 1] == '\n')
+		mode[count - 1] = '\0';
+	else
+		mode[count] = '\0';
+
+	switch (ctlr->enabled) {
+	case FCOE_CTLR_ENABLED:
+		LIBFCOE_SYSFS_DBG(ctlr, "Cannot change mode when enabled.");
+		return -EBUSY;
+	case FCOE_CTLR_DISABLED:
+		if (!ctlr->f->set_fcoe_ctlr_mode) {
+			LIBFCOE_SYSFS_DBG(ctlr,
+					  "Mode change not supported by LLD.");
+			return -ENOTSUPP;
+		}
+
+		ctlr->mode = fcoe_parse_mode(mode);
+		if (ctlr->mode == FIP_CONN_TYPE_UNKNOWN) {
+			LIBFCOE_SYSFS_DBG(ctlr,
+					  "Unknown mode %s provided.", buf);
+			return -EINVAL;
+		}
+
+		ctlr->f->set_fcoe_ctlr_mode(ctlr);
+		LIBFCOE_SYSFS_DBG(ctlr, "Mode changed to %s.", buf);
+
+		return count;
+	case FCOE_CTLR_UNUSED:
+	default:
+		LIBFCOE_SYSFS_DBG(ctlr, "Mode change not supported.");
+		return -ENOTSUPP;
+	};
+}
+
+static FCOE_DEVICE_ATTR(ctlr, mode, S_IRUGO | S_IWUSR,
+			show_ctlr_mode, store_ctlr_mode);
+
+static ssize_t store_ctlr_enabled(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev);
+	int rc;
+
+	switch (ctlr->enabled) {
+	case FCOE_CTLR_ENABLED:
+		if (*buf == '1')
+			return count;
+		ctlr->enabled = FCOE_CTLR_DISABLED;
+		break;
+	case FCOE_CTLR_DISABLED:
+		if (*buf == '0')
+			return count;
+		ctlr->enabled = FCOE_CTLR_ENABLED;
+		break;
+	case FCOE_CTLR_UNUSED:
+		return -ENOTSUPP;
+	};
+
+	rc = ctlr->f->set_fcoe_ctlr_enabled(ctlr);
+	if (rc)
+		return rc;
+
+	return count;
+}
+
+static char *ctlr_enabled_state_names[] = {
+	[ FCOE_CTLR_ENABLED ]  = "1",
+	[ FCOE_CTLR_DISABLED ] = "0",
+};
+fcoe_enum_name_search(ctlr_enabled_state, ctlr_enabled_state,
+		      ctlr_enabled_state_names)
+#define FCOE_CTLR_ENABLED_MAX_NAMELEN 50
+
+static ssize_t show_ctlr_enabled_state(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev);
+	const char *name;
+
+	name = get_fcoe_ctlr_enabled_state_name(ctlr->enabled);
+	if (!name)
+		return -EINVAL;
+	return snprintf(buf, FCOE_CTLR_ENABLED_MAX_NAMELEN,
 			"%s\n", name);
 }
-static FCOE_DEVICE_ATTR(ctlr, mode, S_IRUGO,
-			show_ctlr_mode, NULL);
+
+static FCOE_DEVICE_ATTR(ctlr, enabled, S_IRUGO | S_IWUSR,
+			show_ctlr_enabled_state,
+			store_ctlr_enabled);
 
 static ssize_t
 store_private_fcoe_ctlr_fcf_dev_loss_tmo(struct device *dev,
@@ -359,6 +468,7 @@ static struct attribute_group fcoe_ctlr_lesb_attr_group = {
 
 static struct attribute *fcoe_ctlr_attrs[] = {
 	&device_attr_fcoe_ctlr_fcf_dev_loss_tmo.attr,
+	&device_attr_fcoe_ctlr_enabled.attr,
 	&device_attr_fcoe_ctlr_mode.attr,
 	NULL,
 };
@@ -443,9 +553,16 @@ struct device_type fcoe_fcf_device_type = {
 	.release = fcoe_fcf_device_release,
 };
 
+struct bus_attribute fcoe_bus_attr_group[] = {
+	__ATTR(ctlr_create, S_IWUSR, NULL, fcoe_ctlr_create_store),
+	__ATTR(ctlr_destroy, S_IWUSR, NULL, fcoe_ctlr_destroy_store),
+	__ATTR_NULL
+};
+
 struct bus_type fcoe_bus_type = {
 	.name = "fcoe",
 	.match = &fcoe_bus_match,
+	.bus_attrs = fcoe_bus_attr_group,
 };
 
 /**
@@ -566,6 +683,7 @@ struct fcoe_ctlr_device *fcoe_ctlr_device_add(struct device *parent,
 
 	ctlr->id = atomic_inc_return(&ctlr_num) - 1;
 	ctlr->f = f;
+	ctlr->mode = FIP_CONN_TYPE_FABRIC;
 	INIT_LIST_HEAD(&ctlr->fcfs);
 	mutex_init(&ctlr->lock);
 	ctlr->dev.parent = parent;
diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c
index ac76d8a042d7..f3a5a53e8631 100644
--- a/drivers/scsi/fcoe/fcoe_transport.c
+++ b/drivers/scsi/fcoe/fcoe_transport.c
@@ -83,6 +83,50 @@ static struct notifier_block libfcoe_notifier = {
 	.notifier_call = libfcoe_device_notification,
 };
 
+/**
+ * fcoe_link_speed_update() - Update the supported and actual link speeds
+ * @lport: The local port to update speeds for
+ *
+ * Returns: 0 if the ethtool query was successful
+ *          -1 if the ethtool query failed
+ */
+int fcoe_link_speed_update(struct fc_lport *lport)
+{
+	struct net_device *netdev = fcoe_get_netdev(lport);
+	struct ethtool_cmd ecmd;
+
+	if (!__ethtool_get_settings(netdev, &ecmd)) {
+		lport->link_supported_speeds &=
+			~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT);
+		if (ecmd.supported & (SUPPORTED_1000baseT_Half |
+				      SUPPORTED_1000baseT_Full))
+			lport->link_supported_speeds |= FC_PORTSPEED_1GBIT;
+		if (ecmd.supported & SUPPORTED_10000baseT_Full)
+			lport->link_supported_speeds |=
+				FC_PORTSPEED_10GBIT;
+		switch (ethtool_cmd_speed(&ecmd)) {
+		case SPEED_1000:
+			lport->link_speed = FC_PORTSPEED_1GBIT;
+			break;
+		case SPEED_10000:
+			lport->link_speed = FC_PORTSPEED_10GBIT;
+			break;
+		}
+		return 0;
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(fcoe_link_speed_update);
+
+/**
+ * __fcoe_get_lesb() - Get the Link Error Status Block (LESB) for a given lport
+ * @lport: The local port to update speeds for
+ * @fc_lesb: Pointer to the LESB to be filled up
+ * @netdev: Pointer to the netdev that is associated with the lport
+ *
+ * Note, the Link Error Status Block (LESB) for FCoE is defined in FC-BB-6
+ * Clause 7.11 in v1.04.
+ */
 void __fcoe_get_lesb(struct fc_lport *lport,
 		     struct fc_els_lesb *fc_lesb,
 		     struct net_device *netdev)
@@ -112,6 +156,51 @@ void __fcoe_get_lesb(struct fc_lport *lport,
 }
 EXPORT_SYMBOL_GPL(__fcoe_get_lesb);
 
+/**
+ * fcoe_get_lesb() - Fill the FCoE Link Error Status Block
+ * @lport: the local port
+ * @fc_lesb: the link error status block
+ */
+void fcoe_get_lesb(struct fc_lport *lport,
+			 struct fc_els_lesb *fc_lesb)
+{
+	struct net_device *netdev = fcoe_get_netdev(lport);
+
+	__fcoe_get_lesb(lport, fc_lesb, netdev);
+}
+EXPORT_SYMBOL_GPL(fcoe_get_lesb);
+
+/**
+ * fcoe_ctlr_get_lesb() - Get the Link Error Status Block (LESB) for a given
+ * fcoe controller device
+ * @ctlr_dev: The given fcoe controller device
+ *
+ */
+void fcoe_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev)
+{
+	struct fcoe_ctlr *fip = fcoe_ctlr_device_priv(ctlr_dev);
+	struct net_device *netdev = fcoe_get_netdev(fip->lp);
+	struct fcoe_fc_els_lesb *fcoe_lesb;
+	struct fc_els_lesb fc_lesb;
+
+	__fcoe_get_lesb(fip->lp, &fc_lesb, netdev);
+	fcoe_lesb = (struct fcoe_fc_els_lesb *)(&fc_lesb);
+
+	ctlr_dev->lesb.lesb_link_fail =
+		ntohl(fcoe_lesb->lesb_link_fail);
+	ctlr_dev->lesb.lesb_vlink_fail =
+		ntohl(fcoe_lesb->lesb_vlink_fail);
+	ctlr_dev->lesb.lesb_miss_fka =
+		ntohl(fcoe_lesb->lesb_miss_fka);
+	ctlr_dev->lesb.lesb_symb_err =
+		ntohl(fcoe_lesb->lesb_symb_err);
+	ctlr_dev->lesb.lesb_err_block =
+		ntohl(fcoe_lesb->lesb_err_block);
+	ctlr_dev->lesb.lesb_fcs_error =
+		ntohl(fcoe_lesb->lesb_fcs_error);
+}
+EXPORT_SYMBOL_GPL(fcoe_ctlr_get_lesb);
+
 void fcoe_wwn_to_str(u64 wwn, char *buf, int len)
 {
 	u8 wwpn[8];
@@ -627,6 +716,110 @@ static int libfcoe_device_notification(struct notifier_block *notifier,
 	return NOTIFY_OK;
 }
 
+ssize_t fcoe_ctlr_create_store(struct bus_type *bus,
+			       const char *buf, size_t count)
+{
+	struct net_device *netdev = NULL;
+	struct fcoe_transport *ft = NULL;
+	struct fcoe_ctlr_device *ctlr_dev = NULL;
+	int rc = 0;
+	int err;
+
+	mutex_lock(&ft_mutex);
+
+	netdev = fcoe_if_to_netdev(buf);
+	if (!netdev) {
+		LIBFCOE_TRANSPORT_DBG("Invalid device %s.\n", buf);
+		rc = -ENODEV;
+		goto out_nodev;
+	}
+
+	ft = fcoe_netdev_map_lookup(netdev);
+	if (ft) {
+		LIBFCOE_TRANSPORT_DBG("transport %s already has existing "
+				      "FCoE instance on %s.\n",
+				      ft->name, netdev->name);
+		rc = -EEXIST;
+		goto out_putdev;
+	}
+
+	ft = fcoe_transport_lookup(netdev);
+	if (!ft) {
+		LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n",
+				      netdev->name);
+		rc = -ENODEV;
+		goto out_putdev;
+	}
+
+	/* pass to transport create */
+	err = ft->alloc ? ft->alloc(netdev) : -ENODEV;
+	if (err) {
+		fcoe_del_netdev_mapping(netdev);
+		rc = -ENOMEM;
+		goto out_putdev;
+	}
+
+	err = fcoe_add_netdev_mapping(netdev, ft);
+	if (err) {
+		LIBFCOE_TRANSPORT_DBG("failed to add new netdev mapping "
+				      "for FCoE transport %s for %s.\n",
+				      ft->name, netdev->name);
+		rc = -ENODEV;
+		goto out_putdev;
+	}
+
+	LIBFCOE_TRANSPORT_DBG("transport %s %s to create fcoe on %s.\n",
+			      ft->name, (ctlr_dev) ? "succeeded" : "failed",
+			      netdev->name);
+
+out_putdev:
+	dev_put(netdev);
+out_nodev:
+	mutex_unlock(&ft_mutex);
+	if (rc)
+		return rc;
+	return count;
+}
+
+ssize_t fcoe_ctlr_destroy_store(struct bus_type *bus,
+				const char *buf, size_t count)
+{
+	int rc = -ENODEV;
+	struct net_device *netdev = NULL;
+	struct fcoe_transport *ft = NULL;
+
+	mutex_lock(&ft_mutex);
+
+	netdev = fcoe_if_to_netdev(buf);
+	if (!netdev) {
+		LIBFCOE_TRANSPORT_DBG("invalid device %s.\n", buf);
+		goto out_nodev;
+	}
+
+	ft = fcoe_netdev_map_lookup(netdev);
+	if (!ft) {
+		LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n",
+				      netdev->name);
+		goto out_putdev;
+	}
+
+	/* pass to transport destroy */
+	rc = ft->destroy(netdev);
+	if (rc)
+		goto out_putdev;
+
+	fcoe_del_netdev_mapping(netdev);
+	LIBFCOE_TRANSPORT_DBG("transport %s %s to destroy fcoe on %s.\n",
+			      ft->name, (rc) ? "failed" : "succeeded",
+			      netdev->name);
+	rc = count; /* required for successful return */
+out_putdev:
+	dev_put(netdev);
+out_nodev:
+	mutex_unlock(&ft_mutex);
+	return rc;
+}
+EXPORT_SYMBOL(fcoe_ctlr_destroy_store);
 
 /**
  * fcoe_transport_create() - Create a fcoe interface
@@ -769,11 +962,7 @@ out_putdev:
 	dev_put(netdev);
 out_nodev:
 	mutex_unlock(&ft_mutex);
-
-	if (rc == -ERESTARTSYS)
-		return restart_syscall();
-	else
-		return rc;
+	return rc;
 }
 
 /**
diff --git a/drivers/scsi/fcoe/libfcoe.h b/drivers/scsi/fcoe/libfcoe.h
index 6af5fc3a17d8..d3bb16d11401 100644
--- a/drivers/scsi/fcoe/libfcoe.h
+++ b/drivers/scsi/fcoe/libfcoe.h
@@ -2,9 +2,10 @@
 #define _FCOE_LIBFCOE_H_
 
 extern unsigned int libfcoe_debug_logging;
-#define LIBFCOE_LOGGING	    0x01 /* General logging, not categorized */
-#define LIBFCOE_FIP_LOGGING 0x02 /* FIP logging */
-#define LIBFCOE_TRANSPORT_LOGGING	0x04 /* FCoE transport logging */
+#define LIBFCOE_LOGGING	          0x01 /* General logging, not categorized */
+#define LIBFCOE_FIP_LOGGING       0x02 /* FIP logging */
+#define LIBFCOE_TRANSPORT_LOGGING 0x04 /* FCoE transport logging */
+#define LIBFCOE_SYSFS_LOGGING     0x08 /* fcoe_sysfs logging */
 
 #define LIBFCOE_CHECK_LOGGING(LEVEL, CMD)		\
 do {							\
@@ -16,16 +17,19 @@ do {							\
 
 #define LIBFCOE_DBG(fmt, args...)					\
 	LIBFCOE_CHECK_LOGGING(LIBFCOE_LOGGING,				\
-			      printk(KERN_INFO "libfcoe: " fmt, ##args);)
+			      pr_info("libfcoe: " fmt, ##args);)
 
 #define LIBFCOE_FIP_DBG(fip, fmt, args...)				\
 	LIBFCOE_CHECK_LOGGING(LIBFCOE_FIP_LOGGING,			\
-			      printk(KERN_INFO "host%d: fip: " fmt,	\
-				     (fip)->lp->host->host_no, ##args);)
+			      pr_info("host%d: fip: " fmt,		\
+				      (fip)->lp->host->host_no, ##args);)
 
 #define LIBFCOE_TRANSPORT_DBG(fmt, args...)				\
 	LIBFCOE_CHECK_LOGGING(LIBFCOE_TRANSPORT_LOGGING,		\
-			      printk(KERN_INFO "%s: " fmt,		\
-				     __func__, ##args);)
+			      pr_info("%s: " fmt, __func__, ##args);)
+
+#define LIBFCOE_SYSFS_DBG(cdev, fmt, args...)				\
+	LIBFCOE_CHECK_LOGGING(LIBFCOE_SYSFS_LOGGING,			\
+			      pr_info("ctlr_%d: " fmt, cdev->id, ##args);)
 
 #endif /* _FCOE_LIBFCOE_H_ */
diff --git a/drivers/scsi/fnic/Makefile b/drivers/scsi/fnic/Makefile
index 37c3440bc17c..383598fadf04 100644
--- a/drivers/scsi/fnic/Makefile
+++ b/drivers/scsi/fnic/Makefile
@@ -7,6 +7,8 @@ fnic-y	:= \
 	fnic_res.o \
 	fnic_fcs.o \
 	fnic_scsi.o \
+	fnic_trace.o \
+	fnic_debugfs.o \
 	vnic_cq.o \
 	vnic_dev.o \
 	vnic_intr.o \
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 95a5ba29320d..98436c363035 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -26,6 +26,7 @@
 #include <scsi/libfcoe.h>
 #include "fnic_io.h"
 #include "fnic_res.h"
+#include "fnic_trace.h"
 #include "vnic_dev.h"
 #include "vnic_wq.h"
 #include "vnic_rq.h"
@@ -56,6 +57,34 @@
 #define FNIC_NO_TAG             -1
 
 /*
+ * Command flags to identify the type of command and for other future
+ * use.
+ */
+#define FNIC_NO_FLAGS                   0
+#define FNIC_IO_INITIALIZED             BIT(0)
+#define FNIC_IO_ISSUED                  BIT(1)
+#define FNIC_IO_DONE                    BIT(2)
+#define FNIC_IO_REQ_NULL                BIT(3)
+#define FNIC_IO_ABTS_PENDING            BIT(4)
+#define FNIC_IO_ABORTED                 BIT(5)
+#define FNIC_IO_ABTS_ISSUED             BIT(6)
+#define FNIC_IO_TERM_ISSUED             BIT(7)
+#define FNIC_IO_INTERNAL_TERM_ISSUED    BIT(8)
+#define FNIC_IO_ABT_TERM_DONE           BIT(9)
+#define FNIC_IO_ABT_TERM_REQ_NULL       BIT(10)
+#define FNIC_IO_ABT_TERM_TIMED_OUT      BIT(11)
+#define FNIC_DEVICE_RESET               BIT(12)  /* Device reset request */
+#define FNIC_DEV_RST_ISSUED             BIT(13)
+#define FNIC_DEV_RST_TIMED_OUT          BIT(14)
+#define FNIC_DEV_RST_ABTS_ISSUED        BIT(15)
+#define FNIC_DEV_RST_TERM_ISSUED        BIT(16)
+#define FNIC_DEV_RST_DONE               BIT(17)
+#define FNIC_DEV_RST_REQ_NULL           BIT(18)
+#define FNIC_DEV_RST_ABTS_DONE          BIT(19)
+#define FNIC_DEV_RST_TERM_DONE          BIT(20)
+#define FNIC_DEV_RST_ABTS_PENDING       BIT(21)
+
+/*
  * Usage of the scsi_cmnd scratchpad.
  * These fields are locked by the hashed io_req_lock.
  */
@@ -64,6 +93,7 @@
 #define CMD_ABTS_STATUS(Cmnd)	((Cmnd)->SCp.Message)
 #define CMD_LR_STATUS(Cmnd)	((Cmnd)->SCp.have_data_in)
 #define CMD_TAG(Cmnd)           ((Cmnd)->SCp.sent_command)
+#define CMD_FLAGS(Cmnd)         ((Cmnd)->SCp.Status)
 
 #define FCPIO_INVALID_CODE 0x100 /* hdr_status value unused by firmware */
 
@@ -71,9 +101,28 @@
 #define FNIC_HOST_RESET_TIMEOUT	     10000	/* mSec */
 #define FNIC_RMDEVICE_TIMEOUT        1000       /* mSec */
 #define FNIC_HOST_RESET_SETTLE_TIME  30         /* Sec */
+#define FNIC_ABT_TERM_DELAY_TIMEOUT  500        /* mSec */
 
 #define FNIC_MAX_FCP_TARGET     256
 
+/**
+ * state_flags to identify host state along along with fnic's state
+ **/
+#define __FNIC_FLAGS_FWRESET		BIT(0) /* fwreset in progress */
+#define __FNIC_FLAGS_BLOCK_IO		BIT(1) /* IOs are blocked */
+
+#define FNIC_FLAGS_NONE			(0)
+#define FNIC_FLAGS_FWRESET		(__FNIC_FLAGS_FWRESET | \
+					__FNIC_FLAGS_BLOCK_IO)
+
+#define FNIC_FLAGS_IO_BLOCKED		(__FNIC_FLAGS_BLOCK_IO)
+
+#define fnic_set_state_flags(fnicp, st_flags)	\
+	__fnic_set_state_flags(fnicp, st_flags, 0)
+
+#define fnic_clear_state_flags(fnicp, st_flags)  \
+	__fnic_set_state_flags(fnicp, st_flags, 1)
+
 extern unsigned int fnic_log_level;
 
 #define FNIC_MAIN_LOGGING 0x01
@@ -170,6 +219,9 @@ struct fnic {
 
 	struct completion *remove_wait; /* device remove thread blocks */
 
+	atomic_t in_flight;		/* io counter */
+	u32 _reserved;			/* fill hole */
+	unsigned long state_flags;	/* protected by host lock */
 	enum fnic_state state;
 	spinlock_t fnic_lock;
 
@@ -267,4 +319,12 @@ const char *fnic_state_to_str(unsigned int state);
 void fnic_log_q_error(struct fnic *fnic);
 void fnic_handle_link_event(struct fnic *fnic);
 
+int fnic_is_abts_pending(struct fnic *, struct scsi_cmnd *);
+
+static inline int
+fnic_chk_state_flags_locked(struct fnic *fnic, unsigned long st_flags)
+{
+	return ((fnic->state_flags & st_flags) == st_flags);
+}
+void __fnic_set_state_flags(struct fnic *, unsigned long, unsigned long);
 #endif /* _FNIC_H_ */
diff --git a/drivers/scsi/fnic/fnic_debugfs.c b/drivers/scsi/fnic/fnic_debugfs.c
new file mode 100644
index 000000000000..adc1f7f471f5
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_debugfs.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright 2012 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/debugfs.h>
+#include "fnic.h"
+
+static struct dentry *fnic_trace_debugfs_root;
+static struct dentry *fnic_trace_debugfs_file;
+static struct dentry *fnic_trace_enable;
+
+/*
+ * fnic_trace_ctrl_open - Open the trace_enable file
+ * @inode: The inode pointer.
+ * @file: The file pointer to attach the trace enable/disable flag.
+ *
+ * Description:
+ * This routine opens a debugsfs file trace_enable.
+ *
+ * Returns:
+ * This function returns zero if successful.
+ */
+static int fnic_trace_ctrl_open(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+/*
+ * fnic_trace_ctrl_read - Read a trace_enable debugfs file
+ * @filp: The file pointer to read from.
+ * @ubuf: The buffer to copy the data to.
+ * @cnt: The number of bytes to read.
+ * @ppos: The position in the file to start reading from.
+ *
+ * Description:
+ * This routine reads value of variable fnic_tracing_enabled
+ * and stores into local @buf. It will start reading file at @ppos and
+ * copy up to @cnt of data to @ubuf from @buf.
+ *
+ * Returns:
+ * This function returns the amount of data that was read.
+ */
+static ssize_t fnic_trace_ctrl_read(struct file *filp,
+				  char __user *ubuf,
+				  size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int len;
+	len = sprintf(buf, "%u\n", fnic_tracing_enabled);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
+}
+
+/*
+ * fnic_trace_ctrl_write - Write to trace_enable debugfs file
+ * @filp: The file pointer to write from.
+ * @ubuf: The buffer to copy the data from.
+ * @cnt: The number of bytes to write.
+ * @ppos: The position in the file to start writing to.
+ *
+ * Description:
+ * This routine writes data from user buffer @ubuf to buffer @buf and
+ * sets fnic_tracing_enabled value as per user input.
+ *
+ * Returns:
+ * This function returns the amount of data that was written.
+ */
+static ssize_t fnic_trace_ctrl_write(struct file *filp,
+				  const char __user *ubuf,
+				  size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	unsigned long val;
+	int ret;
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	fnic_tracing_enabled = val;
+	(*ppos)++;
+
+	return cnt;
+}
+
+/*
+ * fnic_trace_debugfs_open - Open the fnic trace log
+ * @inode: The inode pointer
+ * @file: The file pointer to attach the log output
+ *
+ * Description:
+ * This routine is the entry point for the debugfs open file operation.
+ * It allocates the necessary buffer for the log, fills the buffer from
+ * the in-memory log and then returns a pointer to that log in
+ * the private_data field in @file.
+ *
+ * Returns:
+ * This function returns zero if successful. On error it will return
+ * a negative error value.
+ */
+static int fnic_trace_debugfs_open(struct inode *inode,
+				  struct file *file)
+{
+	fnic_dbgfs_t *fnic_dbg_prt;
+	fnic_dbg_prt = kzalloc(sizeof(fnic_dbgfs_t), GFP_KERNEL);
+	if (!fnic_dbg_prt)
+		return -ENOMEM;
+
+	fnic_dbg_prt->buffer = vmalloc((3*(trace_max_pages * PAGE_SIZE)));
+	if (!fnic_dbg_prt->buffer) {
+		kfree(fnic_dbg_prt);
+		return -ENOMEM;
+	}
+	memset((void *)fnic_dbg_prt->buffer, 0,
+			  (3*(trace_max_pages * PAGE_SIZE)));
+	fnic_dbg_prt->buffer_len = fnic_get_trace_data(fnic_dbg_prt);
+	file->private_data = fnic_dbg_prt;
+	return 0;
+}
+
+/*
+ * fnic_trace_debugfs_lseek - Seek through a debugfs file
+ * @file: The file pointer to seek through.
+ * @offset: The offset to seek to or the amount to seek by.
+ * @howto: Indicates how to seek.
+ *
+ * Description:
+ * This routine is the entry point for the debugfs lseek file operation.
+ * The @howto parameter indicates whether @offset is the offset to directly
+ * seek to, or if it is a value to seek forward or reverse by. This function
+ * figures out what the new offset of the debugfs file will be and assigns
+ * that value to the f_pos field of @file.
+ *
+ * Returns:
+ * This function returns the new offset if successful and returns a negative
+ * error if unable to process the seek.
+ */
+static loff_t fnic_trace_debugfs_lseek(struct file *file,
+					loff_t offset,
+					int howto)
+{
+	fnic_dbgfs_t *fnic_dbg_prt = file->private_data;
+	loff_t pos = -1;
+
+	switch (howto) {
+	case 0:
+		pos = offset;
+		break;
+	case 1:
+		pos = file->f_pos + offset;
+		break;
+	case 2:
+		pos = fnic_dbg_prt->buffer_len - offset;
+	}
+	return (pos < 0 || pos > fnic_dbg_prt->buffer_len) ?
+			  -EINVAL : (file->f_pos = pos);
+}
+
+/*
+ * fnic_trace_debugfs_read - Read a debugfs file
+ * @file: The file pointer to read from.
+ * @ubuf: The buffer to copy the data to.
+ * @nbytes: The number of bytes to read.
+ * @pos: The position in the file to start reading from.
+ *
+ * Description:
+ * This routine reads data from the buffer indicated in the private_data
+ * field of @file. It will start reading at @pos and copy up to @nbytes of
+ * data to @ubuf.
+ *
+ * Returns:
+ * This function returns the amount of data that was read (this could be
+ * less than @nbytes if the end of the file was reached).
+ */
+static ssize_t fnic_trace_debugfs_read(struct file *file,
+					char __user *ubuf,
+					size_t nbytes,
+					loff_t *pos)
+{
+	fnic_dbgfs_t *fnic_dbg_prt = file->private_data;
+	int rc = 0;
+	rc = simple_read_from_buffer(ubuf, nbytes, pos,
+				  fnic_dbg_prt->buffer,
+				  fnic_dbg_prt->buffer_len);
+	return rc;
+}
+
+/*
+ * fnic_trace_debugfs_release - Release the buffer used to store
+ * debugfs file data
+ * @inode: The inode pointer
+ * @file: The file pointer that contains the buffer to release
+ *
+ * Description:
+ * This routine frees the buffer that was allocated when the debugfs
+ * file was opened.
+ *
+ * Returns:
+ * This function returns zero.
+ */
+static int fnic_trace_debugfs_release(struct inode *inode,
+					  struct file *file)
+{
+	fnic_dbgfs_t *fnic_dbg_prt = file->private_data;
+
+	vfree(fnic_dbg_prt->buffer);
+	kfree(fnic_dbg_prt);
+	return 0;
+}
+
+static const struct file_operations fnic_trace_ctrl_fops = {
+	.owner = THIS_MODULE,
+	.open = fnic_trace_ctrl_open,
+	.read = fnic_trace_ctrl_read,
+	.write = fnic_trace_ctrl_write,
+};
+
+static const struct file_operations fnic_trace_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = fnic_trace_debugfs_open,
+	.llseek = fnic_trace_debugfs_lseek,
+	.read = fnic_trace_debugfs_read,
+	.release = fnic_trace_debugfs_release,
+};
+
+/*
+ * fnic_trace_debugfs_init - Initialize debugfs for fnic trace logging
+ *
+ * Description:
+ * When Debugfs is configured this routine sets up the fnic debugfs
+ * file system. If not already created, this routine will create the
+ * fnic directory. It will create file trace to log fnic trace buffer
+ * output into debugfs and it will also create file trace_enable to
+ * control enable/disable of trace logging into trace buffer.
+ */
+int fnic_trace_debugfs_init(void)
+{
+	int rc = -1;
+	fnic_trace_debugfs_root = debugfs_create_dir("fnic", NULL);
+	if (!fnic_trace_debugfs_root) {
+		printk(KERN_DEBUG "Cannot create debugfs root\n");
+		return rc;
+	}
+	fnic_trace_enable = debugfs_create_file("tracing_enable",
+					  S_IFREG|S_IRUGO|S_IWUSR,
+					  fnic_trace_debugfs_root,
+					  NULL, &fnic_trace_ctrl_fops);
+
+	if (!fnic_trace_enable) {
+		printk(KERN_DEBUG "Cannot create trace_enable file"
+				  " under debugfs");
+		return rc;
+	}
+
+	fnic_trace_debugfs_file = debugfs_create_file("trace",
+						  S_IFREG|S_IRUGO|S_IWUSR,
+						  fnic_trace_debugfs_root,
+						  NULL,
+						  &fnic_trace_debugfs_fops);
+
+	if (!fnic_trace_debugfs_file) {
+		printk(KERN_DEBUG "Cannot create trace file under debugfs");
+		return rc;
+	}
+	rc = 0;
+	return rc;
+}
+
+/*
+ * fnic_trace_debugfs_terminate - Tear down debugfs infrastructure
+ *
+ * Description:
+ * When Debugfs is configured this routine removes debugfs file system
+ * elements that are specific to fnic trace logging.
+ */
+void fnic_trace_debugfs_terminate(void)
+{
+	if (fnic_trace_debugfs_file) {
+		debugfs_remove(fnic_trace_debugfs_file);
+		fnic_trace_debugfs_file = NULL;
+	}
+	if (fnic_trace_enable) {
+		debugfs_remove(fnic_trace_enable);
+		fnic_trace_enable = NULL;
+	}
+	if (fnic_trace_debugfs_root) {
+		debugfs_remove(fnic_trace_debugfs_root);
+		fnic_trace_debugfs_root = NULL;
+	}
+}
diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
index 3c53c3478ee7..483eb9dbe663 100644
--- a/drivers/scsi/fnic/fnic_fcs.c
+++ b/drivers/scsi/fnic/fnic_fcs.c
@@ -495,7 +495,8 @@ void fnic_eth_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	}
 
 	fnic_queue_wq_eth_desc(wq, skb, pa, skb->len,
-			       fnic->vlan_hw_insert, fnic->vlan_id, 1);
+			       0 /* hw inserts cos value */,
+			       fnic->vlan_id, 1);
 	spin_unlock_irqrestore(&fnic->wq_lock[0], flags);
 }
 
@@ -563,7 +564,8 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp)
 	}
 
 	fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp),
-			   fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1);
+			   0 /* hw inserts cos value */,
+			   fnic->vlan_id, 1, 1, 1);
 fnic_send_frame_end:
 	spin_unlock_irqrestore(&fnic->wq_lock[0], flags);
 
diff --git a/drivers/scsi/fnic/fnic_io.h b/drivers/scsi/fnic/fnic_io.h
index f0b896988cd5..c35b8f1889ea 100644
--- a/drivers/scsi/fnic/fnic_io.h
+++ b/drivers/scsi/fnic/fnic_io.h
@@ -21,7 +21,7 @@
 #include <scsi/fc/fc_fcp.h>
 
 #define FNIC_DFLT_SG_DESC_CNT  32
-#define FNIC_MAX_SG_DESC_CNT        1024    /* Maximum descriptors per sgl */
+#define FNIC_MAX_SG_DESC_CNT        256     /* Maximum descriptors per sgl */
 #define FNIC_SG_DESC_ALIGN          16      /* Descriptor address alignment */
 
 struct host_sg_desc {
@@ -45,7 +45,8 @@ enum fnic_sgl_list_type {
 };
 
 enum fnic_ioreq_state {
-	FNIC_IOREQ_CMD_PENDING = 0,
+	FNIC_IOREQ_NOT_INITED = 0,
+	FNIC_IOREQ_CMD_PENDING,
 	FNIC_IOREQ_ABTS_PENDING,
 	FNIC_IOREQ_ABTS_COMPLETE,
 	FNIC_IOREQ_CMD_COMPLETE,
@@ -60,6 +61,7 @@ struct fnic_io_req {
 	u8 sgl_type; /* device DMA descriptor list type */
 	u8 io_completed:1; /* set to 1 when fw completes IO */
 	u32 port_id; /* remote port DID */
+	unsigned long start_time; /* in jiffies */
 	struct completion *abts_done; /* completion for abts */
 	struct completion *dr_done; /* completion for device reset */
 };
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index fbf3ac6e0c55..d601ac543c52 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -68,6 +68,10 @@ unsigned int fnic_log_level;
 module_param(fnic_log_level, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(fnic_log_level, "bit mask of fnic logging levels");
 
+unsigned int fnic_trace_max_pages = 16;
+module_param(fnic_trace_max_pages, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(fnic_trace_max_pages, "Total allocated memory pages "
+					"for fnic trace buffer");
 
 static struct libfc_function_template fnic_transport_template = {
 	.frame_send = fnic_send,
@@ -624,6 +628,9 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	fnic->state = FNIC_IN_FC_MODE;
 
+	atomic_set(&fnic->in_flight, 0);
+	fnic->state_flags = FNIC_FLAGS_NONE;
+
 	/* Enable hardware stripping of vlan header on ingress */
 	fnic_set_nic_config(fnic, 0, 0, 0, 0, 0, 0, 1);
 
@@ -858,6 +865,14 @@ static int __init fnic_init_module(void)
 
 	printk(KERN_INFO PFX "%s, ver %s\n", DRV_DESCRIPTION, DRV_VERSION);
 
+	/* Allocate memory for trace buffer */
+	err = fnic_trace_buf_init();
+	if (err < 0) {
+		printk(KERN_ERR PFX "Trace buffer initialization Failed "
+				  "Fnic Tracing utility is disabled\n");
+		fnic_trace_free();
+	}
+
 	/* Create a cache for allocation of default size sgls */
 	len = sizeof(struct fnic_dflt_sgl_list);
 	fnic_sgl_cache[FNIC_SGL_CACHE_DFLT] = kmem_cache_create
@@ -928,6 +943,7 @@ err_create_fnic_ioreq_slab:
 err_create_fnic_sgl_slab_max:
 	kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]);
 err_create_fnic_sgl_slab_dflt:
+	fnic_trace_free();
 	return err;
 }
 
@@ -939,6 +955,7 @@ static void __exit fnic_cleanup_module(void)
 	kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]);
 	kmem_cache_destroy(fnic_io_req_cache);
 	fc_release_transport(fnic_fc_transport);
+	fnic_trace_free();
 }
 
 module_init(fnic_init_module);
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index c40ce52ed7c6..be99e7549d89 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -47,6 +47,7 @@ const char *fnic_state_str[] = {
 };
 
 static const char *fnic_ioreq_state_str[] = {
+	[FNIC_IOREQ_NOT_INITED] = "FNIC_IOREQ_NOT_INITED",
 	[FNIC_IOREQ_CMD_PENDING] = "FNIC_IOREQ_CMD_PENDING",
 	[FNIC_IOREQ_ABTS_PENDING] = "FNIC_IOREQ_ABTS_PENDING",
 	[FNIC_IOREQ_ABTS_COMPLETE] = "FNIC_IOREQ_ABTS_COMPLETE",
@@ -165,6 +166,33 @@ static int free_wq_copy_descs(struct fnic *fnic, struct vnic_wq_copy *wq)
 }
 
 
+/**
+ * __fnic_set_state_flags
+ * Sets/Clears bits in fnic's state_flags
+ **/
+void
+__fnic_set_state_flags(struct fnic *fnic, unsigned long st_flags,
+			unsigned long clearbits)
+{
+	struct Scsi_Host *host = fnic->lport->host;
+	int sh_locked = spin_is_locked(host->host_lock);
+	unsigned long flags = 0;
+
+	if (!sh_locked)
+		spin_lock_irqsave(host->host_lock, flags);
+
+	if (clearbits)
+		fnic->state_flags &= ~st_flags;
+	else
+		fnic->state_flags |= st_flags;
+
+	if (!sh_locked)
+		spin_unlock_irqrestore(host->host_lock, flags);
+
+	return;
+}
+
+
 /*
  * fnic_fw_reset_handler
  * Routine to send reset msg to fw
@@ -175,9 +203,16 @@ int fnic_fw_reset_handler(struct fnic *fnic)
 	int ret = 0;
 	unsigned long flags;
 
+	/* indicate fwreset to io path */
+	fnic_set_state_flags(fnic, FNIC_FLAGS_FWRESET);
+
 	skb_queue_purge(&fnic->frame_queue);
 	skb_queue_purge(&fnic->tx_queue);
 
+	/* wait for io cmpl */
+	while (atomic_read(&fnic->in_flight))
+		schedule_timeout(msecs_to_jiffies(1));
+
 	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
 
 	if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
@@ -193,9 +228,12 @@ int fnic_fw_reset_handler(struct fnic *fnic)
 	if (!ret)
 		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
 			      "Issued fw reset\n");
-	else
+	else {
+		fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET);
 		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
 			      "Failed to issue fw reset\n");
+	}
+
 	return ret;
 }
 
@@ -312,6 +350,8 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
 
 	if (unlikely(!vnic_wq_copy_desc_avail(wq))) {
 		spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags);
+		FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+			  "fnic_queue_wq_copy_desc failure - no descriptors\n");
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 
@@ -351,16 +391,20 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
  */
 static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
-	struct fc_lport *lp;
+	struct fc_lport *lp = shost_priv(sc->device->host);
 	struct fc_rport *rport;
-	struct fnic_io_req *io_req;
-	struct fnic *fnic;
+	struct fnic_io_req *io_req = NULL;
+	struct fnic *fnic = lport_priv(lp);
 	struct vnic_wq_copy *wq;
 	int ret;
-	int sg_count;
+	u64 cmd_trace;
+	int sg_count = 0;
 	unsigned long flags;
 	unsigned long ptr;
 
+	if (unlikely(fnic_chk_state_flags_locked(fnic, FNIC_FLAGS_IO_BLOCKED)))
+		return SCSI_MLQUEUE_HOST_BUSY;
+
 	rport = starget_to_rport(scsi_target(sc->device));
 	ret = fc_remote_port_chkready(rport);
 	if (ret) {
@@ -369,20 +413,21 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_
 		return 0;
 	}
 
-	lp = shost_priv(sc->device->host);
 	if (lp->state != LPORT_ST_READY || !(lp->link_up))
 		return SCSI_MLQUEUE_HOST_BUSY;
 
+	atomic_inc(&fnic->in_flight);
+
 	/*
 	 * Release host lock, use driver resource specific locks from here.
 	 * Don't re-enable interrupts in case they were disabled prior to the
 	 * caller disabling them.
 	 */
 	spin_unlock(lp->host->host_lock);
+	CMD_STATE(sc) = FNIC_IOREQ_NOT_INITED;
+	CMD_FLAGS(sc) = FNIC_NO_FLAGS;
 
 	/* Get a new io_req for this SCSI IO */
-	fnic = lport_priv(lp);
-
 	io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
 	if (!io_req) {
 		ret = SCSI_MLQUEUE_HOST_BUSY;
@@ -393,6 +438,9 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_
 	/* Map the data buffer */
 	sg_count = scsi_dma_map(sc);
 	if (sg_count < 0) {
+		FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no,
+			  sc->request->tag, sc, 0, sc->cmnd[0],
+			  sg_count, CMD_STATE(sc));
 		mempool_free(io_req, fnic->io_req_pool);
 		goto out;
 	}
@@ -427,8 +475,10 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_
 
 	/* initialize rest of io_req */
 	io_req->port_id = rport->port_id;
+	io_req->start_time = jiffies;
 	CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING;
 	CMD_SP(sc) = (char *)io_req;
+	CMD_FLAGS(sc) |= FNIC_IO_INITIALIZED;
 	sc->scsi_done = done;
 
 	/* create copy wq desc and enqueue it */
@@ -440,7 +490,9 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_
 		 * refetch the pointer under the lock.
 		 */
 		spinlock_t *io_lock = fnic_io_lock_hash(fnic, sc);
-
+		FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no,
+			  sc->request->tag, sc, 0, 0, 0,
+			  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
 		spin_lock_irqsave(io_lock, flags);
 		io_req = (struct fnic_io_req *)CMD_SP(sc);
 		CMD_SP(sc) = NULL;
@@ -450,8 +502,21 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_
 			fnic_release_ioreq_buf(fnic, io_req, sc);
 			mempool_free(io_req, fnic->io_req_pool);
 		}
+	} else {
+		/* REVISIT: Use per IO lock in the final code */
+		CMD_FLAGS(sc) |= FNIC_IO_ISSUED;
 	}
 out:
+	cmd_trace = ((u64)sc->cmnd[0] << 56 | (u64)sc->cmnd[7] << 40 |
+			(u64)sc->cmnd[8] << 32 | (u64)sc->cmnd[2] << 24 |
+			(u64)sc->cmnd[3] << 16 | (u64)sc->cmnd[4] << 8 |
+			sc->cmnd[5]);
+
+	FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no,
+		  sc->request->tag, sc, io_req,
+		  sg_count, cmd_trace,
+		  (((u64)CMD_FLAGS(sc) >> 32) | CMD_STATE(sc)));
+	atomic_dec(&fnic->in_flight);
 	/* acquire host lock before returning to SCSI */
 	spin_lock(lp->host->host_lock);
 	return ret;
@@ -529,6 +594,8 @@ static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic,
 	fnic_flush_tx(fnic);
 
  reset_cmpl_handler_end:
+	fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET);
+
 	return ret;
 }
 
@@ -622,6 +689,7 @@ static inline void fnic_fcpio_ack_handler(struct fnic *fnic,
 	struct vnic_wq_copy *wq;
 	u16 request_out = desc->u.ack.request_out;
 	unsigned long flags;
+	u64 *ox_id_tag = (u64 *)(void *)desc;
 
 	/* mark the ack state */
 	wq = &fnic->wq_copy[cq_index - fnic->raw_wq_count - fnic->rq_count];
@@ -632,6 +700,9 @@ static inline void fnic_fcpio_ack_handler(struct fnic *fnic,
 		fnic->fw_ack_recd[0] = 1;
 	}
 	spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
+	FNIC_TRACE(fnic_fcpio_ack_handler,
+		  fnic->lport->host->host_no, 0, 0, ox_id_tag[2], ox_id_tag[3],
+		  ox_id_tag[4], ox_id_tag[5]);
 }
 
 /*
@@ -651,27 +722,53 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 	struct scsi_cmnd *sc;
 	unsigned long flags;
 	spinlock_t *io_lock;
+	u64 cmd_trace;
+	unsigned long start_time;
 
 	/* Decode the cmpl description to get the io_req id */
 	fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
 	fcpio_tag_id_dec(&tag, &id);
+	icmnd_cmpl = &desc->u.icmnd_cmpl;
 
-	if (id >= FNIC_MAX_IO_REQ)
+	if (id >= FNIC_MAX_IO_REQ) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			"Tag out of range tag %x hdr status = %s\n",
+			     id, fnic_fcpio_status_to_str(hdr_status));
 		return;
+	}
 
 	sc = scsi_host_find_tag(fnic->lport->host, id);
 	WARN_ON_ONCE(!sc);
-	if (!sc)
+	if (!sc) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			  "icmnd_cmpl sc is null - "
+			  "hdr status = %s tag = 0x%x desc = 0x%p\n",
+			  fnic_fcpio_status_to_str(hdr_status), id, desc);
+		FNIC_TRACE(fnic_fcpio_icmnd_cmpl_handler,
+			  fnic->lport->host->host_no, id,
+			  ((u64)icmnd_cmpl->_resvd0[1] << 16 |
+			  (u64)icmnd_cmpl->_resvd0[0]),
+			  ((u64)hdr_status << 16 |
+			  (u64)icmnd_cmpl->scsi_status << 8 |
+			  (u64)icmnd_cmpl->flags), desc,
+			  (u64)icmnd_cmpl->residual, 0);
 		return;
+	}
 
 	io_lock = fnic_io_lock_hash(fnic, sc);
 	spin_lock_irqsave(io_lock, flags);
 	io_req = (struct fnic_io_req *)CMD_SP(sc);
 	WARN_ON_ONCE(!io_req);
 	if (!io_req) {
+		CMD_FLAGS(sc) |= FNIC_IO_REQ_NULL;
 		spin_unlock_irqrestore(io_lock, flags);
+		shost_printk(KERN_ERR, fnic->lport->host,
+			  "icmnd_cmpl io_req is null - "
+			  "hdr status = %s tag = 0x%x sc 0x%p\n",
+			  fnic_fcpio_status_to_str(hdr_status), id, sc);
 		return;
 	}
+	start_time = io_req->start_time;
 
 	/* firmware completed the io */
 	io_req->io_completed = 1;
@@ -682,6 +779,28 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 	 */
 	if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
 		spin_unlock_irqrestore(io_lock, flags);
+		CMD_FLAGS(sc) |= FNIC_IO_ABTS_PENDING;
+		switch (hdr_status) {
+		case FCPIO_SUCCESS:
+			CMD_FLAGS(sc) |= FNIC_IO_DONE;
+			FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+				  "icmnd_cmpl ABTS pending hdr status = %s "
+				  "sc  0x%p scsi_status %x  residual %d\n",
+				  fnic_fcpio_status_to_str(hdr_status), sc,
+				  icmnd_cmpl->scsi_status,
+				  icmnd_cmpl->residual);
+			break;
+		case FCPIO_ABORTED:
+			CMD_FLAGS(sc) |= FNIC_IO_ABORTED;
+			break;
+		default:
+			FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+					  "icmnd_cmpl abts pending "
+					  "hdr status = %s tag = 0x%x sc = 0x%p\n",
+					  fnic_fcpio_status_to_str(hdr_status),
+					  id, sc);
+			break;
+		}
 		return;
 	}
 
@@ -765,6 +884,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 
 	/* Break link with the SCSI command */
 	CMD_SP(sc) = NULL;
+	CMD_FLAGS(sc) |= FNIC_IO_DONE;
 
 	spin_unlock_irqrestore(io_lock, flags);
 
@@ -772,6 +892,20 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 
 	mempool_free(io_req, fnic->io_req_pool);
 
+	cmd_trace = ((u64)hdr_status << 56) |
+		  (u64)icmnd_cmpl->scsi_status << 48 |
+		  (u64)icmnd_cmpl->flags << 40 | (u64)sc->cmnd[0] << 32 |
+		  (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 |
+		  (u64)sc->cmnd[4] << 8 | sc->cmnd[5];
+
+	FNIC_TRACE(fnic_fcpio_icmnd_cmpl_handler,
+		  sc->device->host->host_no, id, sc,
+		  ((u64)icmnd_cmpl->_resvd0[1] << 56 |
+		  (u64)icmnd_cmpl->_resvd0[0] << 48 |
+		  jiffies_to_msecs(jiffies - start_time)),
+		  desc, cmd_trace,
+		  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+
 	if (sc->sc_data_direction == DMA_FROM_DEVICE) {
 		fnic->lport->host_stats.fcp_input_requests++;
 		fnic->fcp_input_bytes += xfer_len;
@@ -784,7 +918,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 	/* Call SCSI completion function to complete the IO */
 	if (sc->scsi_done)
 		sc->scsi_done(sc);
-
 }
 
 /* fnic_fcpio_itmf_cmpl_handler
@@ -801,28 +934,54 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic,
 	struct fnic_io_req *io_req;
 	unsigned long flags;
 	spinlock_t *io_lock;
+	unsigned long start_time;
 
 	fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
 	fcpio_tag_id_dec(&tag, &id);
 
-	if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ)
+	if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+		"Tag out of range tag %x hdr status = %s\n",
+		id, fnic_fcpio_status_to_str(hdr_status));
 		return;
+	}
 
 	sc = scsi_host_find_tag(fnic->lport->host, id & FNIC_TAG_MASK);
 	WARN_ON_ONCE(!sc);
-	if (!sc)
+	if (!sc) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			  "itmf_cmpl sc is null - hdr status = %s tag = 0x%x\n",
+			  fnic_fcpio_status_to_str(hdr_status), id);
 		return;
-
+	}
 	io_lock = fnic_io_lock_hash(fnic, sc);
 	spin_lock_irqsave(io_lock, flags);
 	io_req = (struct fnic_io_req *)CMD_SP(sc);
 	WARN_ON_ONCE(!io_req);
 	if (!io_req) {
 		spin_unlock_irqrestore(io_lock, flags);
+		CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL;
+		shost_printk(KERN_ERR, fnic->lport->host,
+			  "itmf_cmpl io_req is null - "
+			  "hdr status = %s tag = 0x%x sc 0x%p\n",
+			  fnic_fcpio_status_to_str(hdr_status), id, sc);
 		return;
 	}
+	start_time = io_req->start_time;
 
-	if (id & FNIC_TAG_ABORT) {
+	if ((id & FNIC_TAG_ABORT) && (id & FNIC_TAG_DEV_RST)) {
+		/* Abort and terminate completion of device reset req */
+		/* REVISIT : Add asserts about various flags */
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			      "dev reset abts cmpl recd. id %x status %s\n",
+			      id, fnic_fcpio_status_to_str(hdr_status));
+		CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
+		CMD_ABTS_STATUS(sc) = hdr_status;
+		CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE;
+		if (io_req->abts_done)
+			complete(io_req->abts_done);
+		spin_unlock_irqrestore(io_lock, flags);
+	} else if (id & FNIC_TAG_ABORT) {
 		/* Completion of abort cmd */
 		if (CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING) {
 			/* This is a late completion. Ignore it */
@@ -832,6 +991,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic,
 		CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
 		CMD_ABTS_STATUS(sc) = hdr_status;
 
+		CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
 		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
 			      "abts cmpl recd. id %d status %s\n",
 			      (int)(id & FNIC_TAG_MASK),
@@ -855,14 +1015,58 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic,
 
 			fnic_release_ioreq_buf(fnic, io_req, sc);
 			mempool_free(io_req, fnic->io_req_pool);
-			if (sc->scsi_done)
+			if (sc->scsi_done) {
+				FNIC_TRACE(fnic_fcpio_itmf_cmpl_handler,
+					sc->device->host->host_no, id,
+					sc,
+					jiffies_to_msecs(jiffies - start_time),
+					desc,
+					(((u64)hdr_status << 40) |
+					(u64)sc->cmnd[0] << 32 |
+					(u64)sc->cmnd[2] << 24 |
+					(u64)sc->cmnd[3] << 16 |
+					(u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
+					(((u64)CMD_FLAGS(sc) << 32) |
+					CMD_STATE(sc)));
 				sc->scsi_done(sc);
+			}
 		}
 
 	} else if (id & FNIC_TAG_DEV_RST) {
 		/* Completion of device reset */
 		CMD_LR_STATUS(sc) = hdr_status;
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+			spin_unlock_irqrestore(io_lock, flags);
+			CMD_FLAGS(sc) |= FNIC_DEV_RST_ABTS_PENDING;
+			FNIC_TRACE(fnic_fcpio_itmf_cmpl_handler,
+				  sc->device->host->host_no, id, sc,
+				  jiffies_to_msecs(jiffies - start_time),
+				  desc, 0,
+				  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+				"Terminate pending "
+				"dev reset cmpl recd. id %d status %s\n",
+				(int)(id & FNIC_TAG_MASK),
+				fnic_fcpio_status_to_str(hdr_status));
+			return;
+		}
+		if (CMD_FLAGS(sc) & FNIC_DEV_RST_TIMED_OUT) {
+			/* Need to wait for terminate completion */
+			spin_unlock_irqrestore(io_lock, flags);
+			FNIC_TRACE(fnic_fcpio_itmf_cmpl_handler,
+				  sc->device->host->host_no, id, sc,
+				  jiffies_to_msecs(jiffies - start_time),
+				  desc, 0,
+				  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+				"dev reset cmpl recd after time out. "
+				"id %d status %s\n",
+				(int)(id & FNIC_TAG_MASK),
+				fnic_fcpio_status_to_str(hdr_status));
+			return;
+		}
 		CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE;
+		CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE;
 		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
 			      "dev reset cmpl recd. id %d status %s\n",
 			      (int)(id & FNIC_TAG_MASK),
@@ -889,7 +1093,6 @@ static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev,
 				   struct fcpio_fw_req *desc)
 {
 	struct fnic *fnic = vnic_dev_priv(vdev);
-	int ret = 0;
 
 	switch (desc->hdr.type) {
 	case FCPIO_ACK: /* fw copied copy wq desc to its queue */
@@ -906,11 +1109,11 @@ static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev,
 
 	case FCPIO_FLOGI_REG_CMPL: /* fw completed flogi_reg */
 	case FCPIO_FLOGI_FIP_REG_CMPL: /* fw completed flogi_fip_reg */
-		ret = fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc);
+		fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc);
 		break;
 
 	case FCPIO_RESET_CMPL: /* fw completed reset */
-		ret = fnic_fcpio_fw_reset_cmpl_handler(fnic, desc);
+		fnic_fcpio_fw_reset_cmpl_handler(fnic, desc);
 		break;
 
 	default:
@@ -920,7 +1123,7 @@ static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev,
 		break;
 	}
 
-	return ret;
+	return 0;
 }
 
 /*
@@ -950,6 +1153,7 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
 	unsigned long flags = 0;
 	struct scsi_cmnd *sc;
 	spinlock_t *io_lock;
+	unsigned long start_time = 0;
 
 	for (i = 0; i < FNIC_MAX_IO_REQ; i++) {
 		if (i == exclude_id)
@@ -962,6 +1166,23 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
 		io_lock = fnic_io_lock_hash(fnic, sc);
 		spin_lock_irqsave(io_lock, flags);
 		io_req = (struct fnic_io_req *)CMD_SP(sc);
+		if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+			!(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
+			/*
+			 * We will be here only when FW completes reset
+			 * without sending completions for outstanding ios.
+			 */
+			CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE;
+			if (io_req && io_req->dr_done)
+				complete(io_req->dr_done);
+			else if (io_req && io_req->abts_done)
+				complete(io_req->abts_done);
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		} else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
 		if (!io_req) {
 			spin_unlock_irqrestore(io_lock, flags);
 			goto cleanup_scsi_cmd;
@@ -975,6 +1196,7 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
 		 * If there is a scsi_cmnd associated with this io_req, then
 		 * free the corresponding state
 		 */
+		start_time = io_req->start_time;
 		fnic_release_ioreq_buf(fnic, io_req, sc);
 		mempool_free(io_req, fnic->io_req_pool);
 
@@ -984,8 +1206,18 @@ cleanup_scsi_cmd:
 			      " DID_TRANSPORT_DISRUPTED\n");
 
 		/* Complete the command to SCSI */
-		if (sc->scsi_done)
+		if (sc->scsi_done) {
+			FNIC_TRACE(fnic_cleanup_io,
+				  sc->device->host->host_no, i, sc,
+				  jiffies_to_msecs(jiffies - start_time),
+				  0, ((u64)sc->cmnd[0] << 32 |
+				  (u64)sc->cmnd[2] << 24 |
+				  (u64)sc->cmnd[3] << 16 |
+				  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
+				  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+
 			sc->scsi_done(sc);
+		}
 	}
 }
 
@@ -998,6 +1230,7 @@ void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
 	struct scsi_cmnd *sc;
 	unsigned long flags;
 	spinlock_t *io_lock;
+	unsigned long start_time = 0;
 
 	/* get the tag reference */
 	fcpio_tag_id_dec(&desc->hdr.tag, &id);
@@ -1027,6 +1260,7 @@ void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
 
 	spin_unlock_irqrestore(io_lock, flags);
 
+	start_time = io_req->start_time;
 	fnic_release_ioreq_buf(fnic, io_req, sc);
 	mempool_free(io_req, fnic->io_req_pool);
 
@@ -1035,8 +1269,17 @@ wq_copy_cleanup_scsi_cmd:
 	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "wq_copy_cleanup_handler:"
 		      " DID_NO_CONNECT\n");
 
-	if (sc->scsi_done)
+	if (sc->scsi_done) {
+		FNIC_TRACE(fnic_wq_copy_cleanup_handler,
+			  sc->device->host->host_no, id, sc,
+			  jiffies_to_msecs(jiffies - start_time),
+			  0, ((u64)sc->cmnd[0] << 32 |
+			  (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 |
+			  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
+			  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+
 		sc->scsi_done(sc);
+	}
 }
 
 static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag,
@@ -1044,8 +1287,18 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag,
 					  struct fnic_io_req *io_req)
 {
 	struct vnic_wq_copy *wq = &fnic->wq_copy[0];
+	struct Scsi_Host *host = fnic->lport->host;
 	unsigned long flags;
 
+	spin_lock_irqsave(host->host_lock, flags);
+	if (unlikely(fnic_chk_state_flags_locked(fnic,
+						FNIC_FLAGS_IO_BLOCKED))) {
+		spin_unlock_irqrestore(host->host_lock, flags);
+		return 1;
+	} else
+		atomic_inc(&fnic->in_flight);
+	spin_unlock_irqrestore(host->host_lock, flags);
+
 	spin_lock_irqsave(&fnic->wq_copy_lock[0], flags);
 
 	if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
@@ -1053,6 +1306,9 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag,
 
 	if (!vnic_wq_copy_desc_avail(wq)) {
 		spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
+		atomic_dec(&fnic->in_flight);
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			"fnic_queue_abort_io_req: failure: no descriptors\n");
 		return 1;
 	}
 	fnic_queue_wq_copy_desc_itmf(wq, tag | FNIC_TAG_ABORT,
@@ -1060,12 +1316,15 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag,
 				     fnic->config.ra_tov, fnic->config.ed_tov);
 
 	spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags);
+	atomic_dec(&fnic->in_flight);
+
 	return 0;
 }
 
-void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
+static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
 {
 	int tag;
+	int abt_tag;
 	struct fnic_io_req *io_req;
 	spinlock_t *io_lock;
 	unsigned long flags;
@@ -1075,13 +1334,14 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
 
 	FNIC_SCSI_DBG(KERN_DEBUG,
 		      fnic->lport->host,
-		      "fnic_rport_reset_exch called portid 0x%06x\n",
+		      "fnic_rport_exch_reset called portid 0x%06x\n",
 		      port_id);
 
 	if (fnic->in_remove)
 		return;
 
 	for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+		abt_tag = tag;
 		sc = scsi_host_find_tag(fnic->lport->host, tag);
 		if (!sc)
 			continue;
@@ -1096,6 +1356,15 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
 			continue;
 		}
 
+		if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+			(!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			"fnic_rport_exch_reset dev rst not pending sc 0x%p\n",
+			sc);
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
+
 		/*
 		 * Found IO that is still pending with firmware and
 		 * belongs to rport that went away
@@ -1104,9 +1373,29 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
 			spin_unlock_irqrestore(io_lock, flags);
 			continue;
 		}
+		if (io_req->abts_done) {
+			shost_printk(KERN_ERR, fnic->lport->host,
+			"fnic_rport_exch_reset: io_req->abts_done is set "
+			"state is %s\n",
+			fnic_ioreq_state_to_str(CMD_STATE(sc)));
+		}
+
+		if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
+			shost_printk(KERN_ERR, fnic->lport->host,
+				  "rport_exch_reset "
+				  "IO not yet issued %p tag 0x%x flags "
+				  "%x state %d\n",
+				  sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
+		}
 		old_ioreq_state = CMD_STATE(sc);
 		CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
 		CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+		if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+			abt_tag = (tag | FNIC_TAG_DEV_RST);
+			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			"fnic_rport_exch_reset dev rst sc 0x%p\n",
+			sc);
+		}
 
 		BUG_ON(io_req->abts_done);
 
@@ -1118,7 +1407,7 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
 		/* Now queue the abort command to firmware */
 		int_to_scsilun(sc->device->lun, &fc_lun);
 
-		if (fnic_queue_abort_io_req(fnic, tag,
+		if (fnic_queue_abort_io_req(fnic, abt_tag,
 					    FCPIO_ITMF_ABT_TASK_TERM,
 					    fc_lun.scsi_lun, io_req)) {
 			/*
@@ -1127,12 +1416,17 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
 			 * aborted later by scsi_eh, or cleaned up during
 			 * lun reset
 			 */
-			io_lock = fnic_io_lock_hash(fnic, sc);
-
 			spin_lock_irqsave(io_lock, flags);
 			if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
 				CMD_STATE(sc) = old_ioreq_state;
 			spin_unlock_irqrestore(io_lock, flags);
+		} else {
+			spin_lock_irqsave(io_lock, flags);
+			if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+				CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+			else
+				CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
+			spin_unlock_irqrestore(io_lock, flags);
 		}
 	}
 
@@ -1141,6 +1435,7 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
 void fnic_terminate_rport_io(struct fc_rport *rport)
 {
 	int tag;
+	int abt_tag;
 	struct fnic_io_req *io_req;
 	spinlock_t *io_lock;
 	unsigned long flags;
@@ -1154,14 +1449,15 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
 
 	FNIC_SCSI_DBG(KERN_DEBUG,
 		      fnic->lport->host, "fnic_terminate_rport_io called"
-		      " wwpn 0x%llx, wwnn0x%llx, portid 0x%06x\n",
-		      rport->port_name, rport->node_name,
+		      " wwpn 0x%llx, wwnn0x%llx, rport 0x%p, portid 0x%06x\n",
+		      rport->port_name, rport->node_name, rport,
 		      rport->port_id);
 
 	if (fnic->in_remove)
 		return;
 
 	for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+		abt_tag = tag;
 		sc = scsi_host_find_tag(fnic->lport->host, tag);
 		if (!sc)
 			continue;
@@ -1180,6 +1476,14 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
 			continue;
 		}
 
+		if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+			(!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			"fnic_terminate_rport_io dev rst not pending sc 0x%p\n",
+			sc);
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
 		/*
 		 * Found IO that is still pending with firmware and
 		 * belongs to rport that went away
@@ -1188,9 +1492,27 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
 			spin_unlock_irqrestore(io_lock, flags);
 			continue;
 		}
+		if (io_req->abts_done) {
+			shost_printk(KERN_ERR, fnic->lport->host,
+			"fnic_terminate_rport_io: io_req->abts_done is set "
+			"state is %s\n",
+			fnic_ioreq_state_to_str(CMD_STATE(sc)));
+		}
+		if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
+			FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+				  "fnic_terminate_rport_io "
+				  "IO not yet issued %p tag 0x%x flags "
+				  "%x state %d\n",
+				  sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
+		}
 		old_ioreq_state = CMD_STATE(sc);
 		CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
 		CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+		if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+			abt_tag = (tag | FNIC_TAG_DEV_RST);
+			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			"fnic_terminate_rport_io dev rst sc 0x%p\n", sc);
+		}
 
 		BUG_ON(io_req->abts_done);
 
@@ -1203,7 +1525,7 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
 		/* Now queue the abort command to firmware */
 		int_to_scsilun(sc->device->lun, &fc_lun);
 
-		if (fnic_queue_abort_io_req(fnic, tag,
+		if (fnic_queue_abort_io_req(fnic, abt_tag,
 					    FCPIO_ITMF_ABT_TASK_TERM,
 					    fc_lun.scsi_lun, io_req)) {
 			/*
@@ -1212,12 +1534,17 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
 			 * aborted later by scsi_eh, or cleaned up during
 			 * lun reset
 			 */
-			io_lock = fnic_io_lock_hash(fnic, sc);
-
 			spin_lock_irqsave(io_lock, flags);
 			if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
 				CMD_STATE(sc) = old_ioreq_state;
 			spin_unlock_irqrestore(io_lock, flags);
+		} else {
+			spin_lock_irqsave(io_lock, flags);
+			if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+				CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+			else
+				CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
+			spin_unlock_irqrestore(io_lock, flags);
 		}
 	}
 
@@ -1232,13 +1559,15 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 {
 	struct fc_lport *lp;
 	struct fnic *fnic;
-	struct fnic_io_req *io_req;
+	struct fnic_io_req *io_req = NULL;
 	struct fc_rport *rport;
 	spinlock_t *io_lock;
 	unsigned long flags;
+	unsigned long start_time = 0;
 	int ret = SUCCESS;
-	u32 task_req;
+	u32 task_req = 0;
 	struct scsi_lun fc_lun;
+	int tag;
 	DECLARE_COMPLETION_ONSTACK(tm_done);
 
 	/* Wait for rport to unblock */
@@ -1249,9 +1578,13 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 
 	fnic = lport_priv(lp);
 	rport = starget_to_rport(scsi_target(sc->device));
-	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-			"Abort Cmd called FCID 0x%x, LUN 0x%x TAG %d\n",
-			rport->port_id, sc->device->lun, sc->request->tag);
+	tag = sc->request->tag;
+	FNIC_SCSI_DBG(KERN_DEBUG,
+		fnic->lport->host,
+		"Abort Cmd called FCID 0x%x, LUN 0x%x TAG %x flags %x\n",
+		rport->port_id, sc->device->lun, tag, CMD_FLAGS(sc));
+
+	CMD_FLAGS(sc) = FNIC_NO_FLAGS;
 
 	if (lp->state != LPORT_ST_READY || !(lp->link_up)) {
 		ret = FAILED;
@@ -1318,6 +1651,10 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 		ret = FAILED;
 		goto fnic_abort_cmd_end;
 	}
+	if (task_req == FCPIO_ITMF_ABT_TASK)
+		CMD_FLAGS(sc) |= FNIC_IO_ABTS_ISSUED;
+	else
+		CMD_FLAGS(sc) |= FNIC_IO_TERM_ISSUED;
 
 	/*
 	 * We queued an abort IO, wait for its completion.
@@ -1336,6 +1673,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 	io_req = (struct fnic_io_req *)CMD_SP(sc);
 	if (!io_req) {
 		spin_unlock_irqrestore(io_lock, flags);
+		CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL;
 		ret = FAILED;
 		goto fnic_abort_cmd_end;
 	}
@@ -1344,6 +1682,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 	/* fw did not complete abort, timed out */
 	if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
 		spin_unlock_irqrestore(io_lock, flags);
+		CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_TIMED_OUT;
 		ret = FAILED;
 		goto fnic_abort_cmd_end;
 	}
@@ -1359,12 +1698,21 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 
 	spin_unlock_irqrestore(io_lock, flags);
 
+	start_time = io_req->start_time;
 	fnic_release_ioreq_buf(fnic, io_req, sc);
 	mempool_free(io_req, fnic->io_req_pool);
 
 fnic_abort_cmd_end:
+	FNIC_TRACE(fnic_abort_cmd, sc->device->host->host_no,
+		  sc->request->tag, sc,
+		  jiffies_to_msecs(jiffies - start_time),
+		  0, ((u64)sc->cmnd[0] << 32 |
+		  (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 |
+		  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
+		  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+
 	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-		      "Returning from abort cmd %s\n",
+		      "Returning from abort cmd type %x %s\n", task_req,
 		      (ret == SUCCESS) ?
 		      "SUCCESS" : "FAILED");
 	return ret;
@@ -1375,16 +1723,28 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic,
 				       struct fnic_io_req *io_req)
 {
 	struct vnic_wq_copy *wq = &fnic->wq_copy[0];
+	struct Scsi_Host *host = fnic->lport->host;
 	struct scsi_lun fc_lun;
 	int ret = 0;
 	unsigned long intr_flags;
 
+	spin_lock_irqsave(host->host_lock, intr_flags);
+	if (unlikely(fnic_chk_state_flags_locked(fnic,
+						FNIC_FLAGS_IO_BLOCKED))) {
+		spin_unlock_irqrestore(host->host_lock, intr_flags);
+		return FAILED;
+	} else
+		atomic_inc(&fnic->in_flight);
+	spin_unlock_irqrestore(host->host_lock, intr_flags);
+
 	spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags);
 
 	if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0])
 		free_wq_copy_descs(fnic, wq);
 
 	if (!vnic_wq_copy_desc_avail(wq)) {
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+			  "queue_dr_io_req failure - no descriptors\n");
 		ret = -EAGAIN;
 		goto lr_io_req_end;
 	}
@@ -1399,6 +1759,7 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic,
 
 lr_io_req_end:
 	spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags);
+	atomic_dec(&fnic->in_flight);
 
 	return ret;
 }
@@ -1412,7 +1773,7 @@ lr_io_req_end:
 static int fnic_clean_pending_aborts(struct fnic *fnic,
 				     struct scsi_cmnd *lr_sc)
 {
-	int tag;
+	int tag, abt_tag;
 	struct fnic_io_req *io_req;
 	spinlock_t *io_lock;
 	unsigned long flags;
@@ -1421,6 +1782,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
 	struct scsi_lun fc_lun;
 	struct scsi_device *lun_dev = lr_sc->device;
 	DECLARE_COMPLETION_ONSTACK(tm_done);
+	enum fnic_ioreq_state old_ioreq_state;
 
 	for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
 		sc = scsi_host_find_tag(fnic->lport->host, tag);
@@ -1449,7 +1811,41 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
 			      "Found IO in %s on lun\n",
 			      fnic_ioreq_state_to_str(CMD_STATE(sc)));
 
-		BUG_ON(CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING);
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
+		if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+			(!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+			FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+				"%s dev rst not pending sc 0x%p\n", __func__,
+				sc);
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
+		old_ioreq_state = CMD_STATE(sc);
+		/*
+		 * Any pending IO issued prior to reset is expected to be
+		 * in abts pending state, if not we need to set
+		 * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending.
+		 * When IO is completed, the IO will be handed over and
+		 * handled in this function.
+		 */
+		CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+
+		if (io_req->abts_done)
+			shost_printk(KERN_ERR, fnic->lport->host,
+			  "%s: io_req->abts_done is set state is %s\n",
+			  __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
+
+		BUG_ON(io_req->abts_done);
+
+		abt_tag = tag;
+		if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+			abt_tag |= FNIC_TAG_DEV_RST;
+			FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+				  "%s: dev rst sc 0x%p\n", __func__, sc);
+		}
 
 		CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
 		io_req->abts_done = &tm_done;
@@ -1458,17 +1854,25 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
 		/* Now queue the abort command to firmware */
 		int_to_scsilun(sc->device->lun, &fc_lun);
 
-		if (fnic_queue_abort_io_req(fnic, tag,
+		if (fnic_queue_abort_io_req(fnic, abt_tag,
 					    FCPIO_ITMF_ABT_TASK_TERM,
 					    fc_lun.scsi_lun, io_req)) {
 			spin_lock_irqsave(io_lock, flags);
 			io_req = (struct fnic_io_req *)CMD_SP(sc);
 			if (io_req)
 				io_req->abts_done = NULL;
+			if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+				CMD_STATE(sc) = old_ioreq_state;
 			spin_unlock_irqrestore(io_lock, flags);
 			ret = 1;
 			goto clean_pending_aborts_end;
+		} else {
+			spin_lock_irqsave(io_lock, flags);
+			if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+				CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+			spin_unlock_irqrestore(io_lock, flags);
 		}
+		CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
 
 		wait_for_completion_timeout(&tm_done,
 					    msecs_to_jiffies
@@ -1479,8 +1883,8 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
 		io_req = (struct fnic_io_req *)CMD_SP(sc);
 		if (!io_req) {
 			spin_unlock_irqrestore(io_lock, flags);
-			ret = 1;
-			goto clean_pending_aborts_end;
+			CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL;
+			continue;
 		}
 
 		io_req->abts_done = NULL;
@@ -1488,6 +1892,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
 		/* if abort is still pending with fw, fail */
 		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
 			spin_unlock_irqrestore(io_lock, flags);
+			CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
 			ret = 1;
 			goto clean_pending_aborts_end;
 		}
@@ -1498,10 +1903,75 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
 		mempool_free(io_req, fnic->io_req_pool);
 	}
 
+	schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
+
+	/* walk again to check, if IOs are still pending in fw */
+	if (fnic_is_abts_pending(fnic, lr_sc))
+		ret = FAILED;
+
 clean_pending_aborts_end:
 	return ret;
 }
 
+/**
+ * fnic_scsi_host_start_tag
+ * Allocates tagid from host's tag list
+ **/
+static inline int
+fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc)
+{
+	struct blk_queue_tag *bqt = fnic->lport->host->bqt;
+	int tag, ret = SCSI_NO_TAG;
+
+	BUG_ON(!bqt);
+	if (!bqt) {
+		pr_err("Tags are not supported\n");
+		goto end;
+	}
+
+	do {
+		tag = find_next_zero_bit(bqt->tag_map, bqt->max_depth, 1);
+		if (tag >= bqt->max_depth) {
+			pr_err("Tag allocation failure\n");
+			goto end;
+		}
+	} while (test_and_set_bit(tag, bqt->tag_map));
+
+	bqt->tag_index[tag] = sc->request;
+	sc->request->tag = tag;
+	sc->tag = tag;
+	if (!sc->request->special)
+		sc->request->special = sc;
+
+	ret = tag;
+
+end:
+	return ret;
+}
+
+/**
+ * fnic_scsi_host_end_tag
+ * frees tag allocated by fnic_scsi_host_start_tag.
+ **/
+static inline void
+fnic_scsi_host_end_tag(struct fnic *fnic, struct scsi_cmnd *sc)
+{
+	struct blk_queue_tag *bqt = fnic->lport->host->bqt;
+	int tag = sc->request->tag;
+
+	if (tag == SCSI_NO_TAG)
+		return;
+
+	BUG_ON(!bqt || !bqt->tag_index[tag]);
+	if (!bqt)
+		return;
+
+	bqt->tag_index[tag] = NULL;
+	clear_bit(tag, bqt->tag_map);
+
+	return;
+}
+
 /*
  * SCSI Eh thread issues a Lun Reset when one or more commands on a LUN
  * fail to get aborted. It calls driver's eh_device_reset with a SCSI command
@@ -1511,13 +1981,17 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 {
 	struct fc_lport *lp;
 	struct fnic *fnic;
-	struct fnic_io_req *io_req;
+	struct fnic_io_req *io_req = NULL;
 	struct fc_rport *rport;
 	int status;
 	int ret = FAILED;
 	spinlock_t *io_lock;
 	unsigned long flags;
+	unsigned long start_time = 0;
+	struct scsi_lun fc_lun;
+	int tag = 0;
 	DECLARE_COMPLETION_ONSTACK(tm_done);
+	int tag_gen_flag = 0;   /*to track tags allocated by fnic driver*/
 
 	/* Wait for rport to unblock */
 	fc_block_scsi_eh(sc);
@@ -1529,8 +2003,8 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 
 	rport = starget_to_rport(scsi_target(sc->device));
 	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-			"Device reset called FCID 0x%x, LUN 0x%x\n",
-			rport->port_id, sc->device->lun);
+		      "Device reset called FCID 0x%x, LUN 0x%x sc 0x%p\n",
+		      rport->port_id, sc->device->lun, sc);
 
 	if (lp->state != LPORT_ST_READY || !(lp->link_up))
 		goto fnic_device_reset_end;
@@ -1539,6 +2013,16 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 	if (fc_remote_port_chkready(rport))
 		goto fnic_device_reset_end;
 
+	CMD_FLAGS(sc) = FNIC_DEVICE_RESET;
+	/* Allocate tag if not present */
+
+	tag = sc->request->tag;
+	if (unlikely(tag < 0)) {
+		tag = fnic_scsi_host_start_tag(fnic, sc);
+		if (unlikely(tag == SCSI_NO_TAG))
+			goto fnic_device_reset_end;
+		tag_gen_flag = 1;
+	}
 	io_lock = fnic_io_lock_hash(fnic, sc);
 	spin_lock_irqsave(io_lock, flags);
 	io_req = (struct fnic_io_req *)CMD_SP(sc);
@@ -1562,8 +2046,7 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 	CMD_LR_STATUS(sc) = FCPIO_INVALID_CODE;
 	spin_unlock_irqrestore(io_lock, flags);
 
-	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %d\n",
-		      sc->request->tag);
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %x\n", tag);
 
 	/*
 	 * issue the device reset, if enqueue failed, clean up the ioreq
@@ -1576,6 +2059,9 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 			io_req->dr_done = NULL;
 		goto fnic_device_reset_clean;
 	}
+	spin_lock_irqsave(io_lock, flags);
+	CMD_FLAGS(sc) |= FNIC_DEV_RST_ISSUED;
+	spin_unlock_irqrestore(io_lock, flags);
 
 	/*
 	 * Wait on the local completion for LUN reset.  The io_req may be
@@ -1588,12 +2074,13 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 	io_req = (struct fnic_io_req *)CMD_SP(sc);
 	if (!io_req) {
 		spin_unlock_irqrestore(io_lock, flags);
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+				"io_req is null tag 0x%x sc 0x%p\n", tag, sc);
 		goto fnic_device_reset_end;
 	}
 	io_req->dr_done = NULL;
 
 	status = CMD_LR_STATUS(sc);
-	spin_unlock_irqrestore(io_lock, flags);
 
 	/*
 	 * If lun reset not completed, bail out with failed. io_req
@@ -1602,7 +2089,53 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 	if (status == FCPIO_INVALID_CODE) {
 		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
 			      "Device reset timed out\n");
-		goto fnic_device_reset_end;
+		CMD_FLAGS(sc) |= FNIC_DEV_RST_TIMED_OUT;
+		spin_unlock_irqrestore(io_lock, flags);
+		int_to_scsilun(sc->device->lun, &fc_lun);
+		/*
+		 * Issue abort and terminate on the device reset request.
+		 * If q'ing of the abort fails, retry issue it after a delay.
+		 */
+		while (1) {
+			spin_lock_irqsave(io_lock, flags);
+			if (CMD_FLAGS(sc) & FNIC_DEV_RST_TERM_ISSUED) {
+				spin_unlock_irqrestore(io_lock, flags);
+				break;
+			}
+			spin_unlock_irqrestore(io_lock, flags);
+			if (fnic_queue_abort_io_req(fnic,
+				tag | FNIC_TAG_DEV_RST,
+				FCPIO_ITMF_ABT_TASK_TERM,
+				fc_lun.scsi_lun, io_req)) {
+				wait_for_completion_timeout(&tm_done,
+				msecs_to_jiffies(FNIC_ABT_TERM_DELAY_TIMEOUT));
+			} else {
+				spin_lock_irqsave(io_lock, flags);
+				CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+				CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+				io_req->abts_done = &tm_done;
+				spin_unlock_irqrestore(io_lock, flags);
+				FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+				"Abort and terminate issued on Device reset "
+				"tag 0x%x sc 0x%p\n", tag, sc);
+				break;
+			}
+		}
+		while (1) {
+			spin_lock_irqsave(io_lock, flags);
+			if (!(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
+				spin_unlock_irqrestore(io_lock, flags);
+				wait_for_completion_timeout(&tm_done,
+				msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT));
+				break;
+			} else {
+				io_req = (struct fnic_io_req *)CMD_SP(sc);
+				io_req->abts_done = NULL;
+				goto fnic_device_reset_clean;
+			}
+		}
+	} else {
+		spin_unlock_irqrestore(io_lock, flags);
 	}
 
 	/* Completed, but not successful, clean up the io_req, return fail */
@@ -1645,11 +2178,24 @@ fnic_device_reset_clean:
 	spin_unlock_irqrestore(io_lock, flags);
 
 	if (io_req) {
+		start_time = io_req->start_time;
 		fnic_release_ioreq_buf(fnic, io_req, sc);
 		mempool_free(io_req, fnic->io_req_pool);
 	}
 
 fnic_device_reset_end:
+	FNIC_TRACE(fnic_device_reset, sc->device->host->host_no,
+		  sc->request->tag, sc,
+		  jiffies_to_msecs(jiffies - start_time),
+		  0, ((u64)sc->cmnd[0] << 32 |
+		  (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 |
+		  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
+		  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+
+	/* free tag if it is allocated */
+	if (unlikely(tag_gen_flag))
+		fnic_scsi_host_end_tag(fnic, sc);
+
 	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
 		      "Returning from device reset %s\n",
 		      (ret == SUCCESS) ?
@@ -1735,7 +2281,15 @@ void fnic_scsi_abort_io(struct fc_lport *lp)
 	DECLARE_COMPLETION_ONSTACK(remove_wait);
 
 	/* Issue firmware reset for fnic, wait for reset to complete */
+retry_fw_reset:
 	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	if (unlikely(fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)) {
+		/* fw reset is in progress, poll for its completion */
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		schedule_timeout(msecs_to_jiffies(100));
+		goto retry_fw_reset;
+	}
+
 	fnic->remove_wait = &remove_wait;
 	old_state = fnic->state;
 	fnic->state = FNIC_IN_FC_TRANS_ETH_MODE;
@@ -1776,7 +2330,14 @@ void fnic_scsi_cleanup(struct fc_lport *lp)
 	struct fnic *fnic = lport_priv(lp);
 
 	/* issue fw reset */
+retry_fw_reset:
 	spin_lock_irqsave(&fnic->fnic_lock, flags);
+	if (unlikely(fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)) {
+		/* fw reset is in progress, poll for its completion */
+		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+		schedule_timeout(msecs_to_jiffies(100));
+		goto retry_fw_reset;
+	}
 	old_state = fnic->state;
 	fnic->state = FNIC_IN_FC_TRANS_ETH_MODE;
 	fnic_update_mac_locked(fnic, fnic->ctlr.ctl_src_addr);
@@ -1822,3 +2383,61 @@ call_fc_exch_mgr_reset:
 	fc_exch_mgr_reset(lp, sid, did);
 
 }
+
+/*
+ * fnic_is_abts_pending() is a helper function that
+ * walks through tag map to check if there is any IOs pending,if there is one,
+ * then it returns 1 (true), otherwise 0 (false)
+ * if @lr_sc is non NULL, then it checks IOs specific to particular LUN,
+ * otherwise, it checks for all IOs.
+ */
+int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
+{
+	int tag;
+	struct fnic_io_req *io_req;
+	spinlock_t *io_lock;
+	unsigned long flags;
+	int ret = 0;
+	struct scsi_cmnd *sc;
+	struct scsi_device *lun_dev = NULL;
+
+	if (lr_sc)
+		lun_dev = lr_sc->device;
+
+	/* walk again to check, if IOs are still pending in fw */
+	for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+		sc = scsi_host_find_tag(fnic->lport->host, tag);
+		/*
+		 * ignore this lun reset cmd or cmds that do not belong to
+		 * this lun
+		 */
+		if (!sc || (lr_sc && (sc->device != lun_dev || sc == lr_sc)))
+			continue;
+
+		io_lock = fnic_io_lock_hash(fnic, sc);
+		spin_lock_irqsave(io_lock, flags);
+
+		io_req = (struct fnic_io_req *)CMD_SP(sc);
+
+		if (!io_req || sc->device != lun_dev) {
+			spin_unlock_irqrestore(io_lock, flags);
+			continue;
+		}
+
+		/*
+		 * Found IO that is still pending with firmware and
+		 * belongs to the LUN that we are resetting
+		 */
+		FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+			      "Found IO in %s on lun\n",
+			      fnic_ioreq_state_to_str(CMD_STATE(sc)));
+
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+			spin_unlock_irqrestore(io_lock, flags);
+			ret = 1;
+			continue;
+		}
+	}
+
+	return ret;
+}
diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c
new file mode 100644
index 000000000000..23a60e3d8527
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_trace.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2012 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mempool.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include "fnic_io.h"
+#include "fnic.h"
+
+unsigned int trace_max_pages;
+static int fnic_max_trace_entries;
+
+static unsigned long fnic_trace_buf_p;
+static DEFINE_SPINLOCK(fnic_trace_lock);
+
+static fnic_trace_dbg_t fnic_trace_entries;
+int fnic_tracing_enabled = 1;
+
+/*
+ * fnic_trace_get_buf - Give buffer pointer to user to fill up trace information
+ *
+ * Description:
+ * This routine gets next available trace buffer entry location @wr_idx
+ * from allocated trace buffer pages and give that memory location
+ * to user to store the trace information.
+ *
+ * Return Value:
+ * This routine returns pointer to next available trace entry
+ * @fnic_buf_head for user to fill trace information.
+ */
+fnic_trace_data_t *fnic_trace_get_buf(void)
+{
+	unsigned long fnic_buf_head;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fnic_trace_lock, flags);
+
+	/*
+	 * Get next available memory location for writing trace information
+	 * at @wr_idx and increment @wr_idx
+	 */
+	fnic_buf_head =
+		fnic_trace_entries.page_offset[fnic_trace_entries.wr_idx];
+	fnic_trace_entries.wr_idx++;
+
+	/*
+	 * Verify if trace buffer is full then change wd_idx to
+	 * start from zero
+	 */
+	if (fnic_trace_entries.wr_idx >= fnic_max_trace_entries)
+		fnic_trace_entries.wr_idx = 0;
+
+	/*
+	 * Verify if write index @wr_idx and read index @rd_idx are same then
+	 * increment @rd_idx to move to next entry in trace buffer
+	 */
+	if (fnic_trace_entries.wr_idx == fnic_trace_entries.rd_idx) {
+		fnic_trace_entries.rd_idx++;
+		if (fnic_trace_entries.rd_idx >= fnic_max_trace_entries)
+			fnic_trace_entries.rd_idx = 0;
+	}
+	spin_unlock_irqrestore(&fnic_trace_lock, flags);
+	return (fnic_trace_data_t *)fnic_buf_head;
+}
+
+/*
+ * fnic_get_trace_data - Copy trace buffer to a memory file
+ * @fnic_dbgfs_t: pointer to debugfs trace buffer
+ *
+ * Description:
+ * This routine gathers the fnic trace debugfs data from the fnic_trace_data_t
+ * buffer and dumps it to fnic_dbgfs_t. It will start at the rd_idx entry in
+ * the log and process the log until the end of the buffer. Then it will gather
+ * from the beginning of the log and process until the current entry @wr_idx.
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into fnic_dbgfs_t
+ */
+int fnic_get_trace_data(fnic_dbgfs_t *fnic_dbgfs_prt)
+{
+	int rd_idx;
+	int wr_idx;
+	int len = 0;
+	unsigned long flags;
+	char str[KSYM_SYMBOL_LEN];
+	struct timespec val;
+	fnic_trace_data_t *tbp;
+
+	spin_lock_irqsave(&fnic_trace_lock, flags);
+	rd_idx = fnic_trace_entries.rd_idx;
+	wr_idx = fnic_trace_entries.wr_idx;
+	if (wr_idx < rd_idx) {
+		while (1) {
+			/* Start from read index @rd_idx */
+			tbp = (fnic_trace_data_t *)
+				  fnic_trace_entries.page_offset[rd_idx];
+			if (!tbp) {
+				spin_unlock_irqrestore(&fnic_trace_lock, flags);
+				return 0;
+			}
+			/* Convert function pointer to function name */
+			if (sizeof(unsigned long) < 8) {
+				sprint_symbol(str, tbp->fnaddr.low);
+				jiffies_to_timespec(tbp->timestamp.low, &val);
+			} else {
+				sprint_symbol(str, tbp->fnaddr.val);
+				jiffies_to_timespec(tbp->timestamp.val, &val);
+			}
+			/*
+			 * Dump trace buffer entry to memory file
+			 * and increment read index @rd_idx
+			 */
+			len += snprintf(fnic_dbgfs_prt->buffer + len,
+				  (trace_max_pages * PAGE_SIZE * 3) - len,
+				  "%16lu.%16lu %-50s %8x %8x %16llx %16llx "
+				  "%16llx %16llx %16llx\n", val.tv_sec,
+				  val.tv_nsec, str, tbp->host_no, tbp->tag,
+				  tbp->data[0], tbp->data[1], tbp->data[2],
+				  tbp->data[3], tbp->data[4]);
+			rd_idx++;
+			/*
+			 * If rd_idx is reached to maximum trace entries
+			 * then move rd_idx to zero
+			 */
+			if (rd_idx > (fnic_max_trace_entries-1))
+				rd_idx = 0;
+			/*
+			 * Continure dumpping trace buffer entries into
+			 * memory file till rd_idx reaches write index
+			 */
+			if (rd_idx == wr_idx)
+				break;
+		}
+	} else if (wr_idx > rd_idx) {
+		while (1) {
+			/* Start from read index @rd_idx */
+			tbp = (fnic_trace_data_t *)
+				  fnic_trace_entries.page_offset[rd_idx];
+			if (!tbp) {
+				spin_unlock_irqrestore(&fnic_trace_lock, flags);
+				return 0;
+			}
+			/* Convert function pointer to function name */
+			if (sizeof(unsigned long) < 8) {
+				sprint_symbol(str, tbp->fnaddr.low);
+				jiffies_to_timespec(tbp->timestamp.low, &val);
+			} else {
+				sprint_symbol(str, tbp->fnaddr.val);
+				jiffies_to_timespec(tbp->timestamp.val, &val);
+			}
+			/*
+			 * Dump trace buffer entry to memory file
+			 * and increment read index @rd_idx
+			 */
+			len += snprintf(fnic_dbgfs_prt->buffer + len,
+				  (trace_max_pages * PAGE_SIZE * 3) - len,
+				  "%16lu.%16lu %-50s %8x %8x %16llx %16llx "
+				  "%16llx %16llx %16llx\n", val.tv_sec,
+				  val.tv_nsec, str, tbp->host_no, tbp->tag,
+				  tbp->data[0], tbp->data[1], tbp->data[2],
+				  tbp->data[3], tbp->data[4]);
+			rd_idx++;
+			/*
+			 * Continue dumpping trace buffer entries into
+			 * memory file till rd_idx reaches write index
+			 */
+			if (rd_idx == wr_idx)
+				break;
+		}
+	}
+	spin_unlock_irqrestore(&fnic_trace_lock, flags);
+	return len;
+}
+
+/*
+ * fnic_trace_buf_init - Initialize fnic trace buffer logging facility
+ *
+ * Description:
+ * Initialize trace buffer data structure by allocating required memory and
+ * setting page_offset information for every trace entry by adding trace entry
+ * length to previous page_offset value.
+ */
+int fnic_trace_buf_init(void)
+{
+	unsigned long fnic_buf_head;
+	int i;
+	int err = 0;
+
+	trace_max_pages = fnic_trace_max_pages;
+	fnic_max_trace_entries = (trace_max_pages * PAGE_SIZE)/
+					  FNIC_ENTRY_SIZE_BYTES;
+
+	fnic_trace_buf_p = (unsigned long)vmalloc((trace_max_pages * PAGE_SIZE));
+	if (!fnic_trace_buf_p) {
+		printk(KERN_ERR PFX "Failed to allocate memory "
+				  "for fnic_trace_buf_p\n");
+		err = -ENOMEM;
+		goto err_fnic_trace_buf_init;
+	}
+	memset((void *)fnic_trace_buf_p, 0, (trace_max_pages * PAGE_SIZE));
+
+	fnic_trace_entries.page_offset = vmalloc(fnic_max_trace_entries *
+						  sizeof(unsigned long));
+	if (!fnic_trace_entries.page_offset) {
+		printk(KERN_ERR PFX "Failed to allocate memory for"
+				  " page_offset\n");
+		if (fnic_trace_buf_p) {
+			vfree((void *)fnic_trace_buf_p);
+			fnic_trace_buf_p = 0;
+		}
+		err = -ENOMEM;
+		goto err_fnic_trace_buf_init;
+	}
+	memset((void *)fnic_trace_entries.page_offset, 0,
+		  (fnic_max_trace_entries * sizeof(unsigned long)));
+	fnic_trace_entries.wr_idx = fnic_trace_entries.rd_idx = 0;
+	fnic_buf_head = fnic_trace_buf_p;
+
+	/*
+	 * Set page_offset field of fnic_trace_entries struct by
+	 * calculating memory location for every trace entry using
+	 * length of each trace entry
+	 */
+	for (i = 0; i < fnic_max_trace_entries; i++) {
+		fnic_trace_entries.page_offset[i] = fnic_buf_head;
+		fnic_buf_head += FNIC_ENTRY_SIZE_BYTES;
+	}
+	err = fnic_trace_debugfs_init();
+	if (err < 0) {
+		printk(KERN_ERR PFX "Failed to initialize debugfs for tracing\n");
+		goto err_fnic_trace_debugfs_init;
+	}
+	printk(KERN_INFO PFX "Successfully Initialized Trace Buffer\n");
+	return err;
+err_fnic_trace_debugfs_init:
+	fnic_trace_free();
+err_fnic_trace_buf_init:
+	return err;
+}
+
+/*
+ * fnic_trace_free - Free memory of fnic trace data structures.
+ */
+void fnic_trace_free(void)
+{
+	fnic_tracing_enabled = 0;
+	fnic_trace_debugfs_terminate();
+	if (fnic_trace_entries.page_offset) {
+		vfree((void *)fnic_trace_entries.page_offset);
+		fnic_trace_entries.page_offset = NULL;
+	}
+	if (fnic_trace_buf_p) {
+		vfree((void *)fnic_trace_buf_p);
+		fnic_trace_buf_p = 0;
+	}
+	printk(KERN_INFO PFX "Successfully Freed Trace Buffer\n");
+}
diff --git a/drivers/scsi/fnic/fnic_trace.h b/drivers/scsi/fnic/fnic_trace.h
new file mode 100644
index 000000000000..cef42b4c4d6c
--- /dev/null
+++ b/drivers/scsi/fnic/fnic_trace.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2012 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __FNIC_TRACE_H__
+#define __FNIC_TRACE_H__
+
+#define FNIC_ENTRY_SIZE_BYTES 64
+
+extern ssize_t simple_read_from_buffer(void __user *to,
+					  size_t count,
+					  loff_t *ppos,
+					  const void *from,
+					  size_t available);
+
+extern unsigned int fnic_trace_max_pages;
+extern int fnic_tracing_enabled;
+extern unsigned int trace_max_pages;
+
+typedef struct fnic_trace_dbg {
+	int wr_idx;
+	int rd_idx;
+	unsigned long *page_offset;
+} fnic_trace_dbg_t;
+
+typedef struct fnic_dbgfs {
+	int buffer_len;
+	char *buffer;
+} fnic_dbgfs_t;
+
+struct fnic_trace_data {
+	union {
+		struct {
+			u32 low;
+			u32 high;
+		};
+		u64 val;
+	} timestamp, fnaddr;
+	u32 host_no;
+	u32 tag;
+	u64 data[5];
+} __attribute__((__packed__));
+
+typedef struct fnic_trace_data fnic_trace_data_t;
+
+#define FNIC_TRACE_ENTRY_SIZE \
+		  (FNIC_ENTRY_SIZE_BYTES - sizeof(fnic_trace_data_t))
+
+#define FNIC_TRACE(_fn, _hn, _t, _a, _b, _c, _d, _e)           \
+	if (unlikely(fnic_tracing_enabled)) {                   \
+		fnic_trace_data_t *trace_buf = fnic_trace_get_buf(); \
+		if (trace_buf) { \
+			if (sizeof(unsigned long) < 8) { \
+				trace_buf->timestamp.low = jiffies; \
+				trace_buf->fnaddr.low = (u32)(unsigned long)_fn; \
+			} else { \
+				trace_buf->timestamp.val = jiffies; \
+				trace_buf->fnaddr.val = (u64)(unsigned long)_fn; \
+			} \
+			trace_buf->host_no = _hn; \
+			trace_buf->tag = _t; \
+			trace_buf->data[0] = (u64)(unsigned long)_a; \
+			trace_buf->data[1] = (u64)(unsigned long)_b; \
+			trace_buf->data[2] = (u64)(unsigned long)_c; \
+			trace_buf->data[3] = (u64)(unsigned long)_d; \
+			trace_buf->data[4] = (u64)(unsigned long)_e; \
+		} \
+	}
+
+fnic_trace_data_t *fnic_trace_get_buf(void);
+int fnic_get_trace_data(fnic_dbgfs_t *);
+int fnic_trace_buf_init(void);
+void fnic_trace_free(void);
+int fnic_trace_debugfs_init(void);
+void fnic_trace_debugfs_terminate(void);
+
+#endif
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 599790e41a98..59bceac51a4c 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -1107,14 +1107,8 @@ static int gdth_init_pci(struct pci_dev *pdev, gdth_pci_str *pcistr,
 	pci_read_config_word(pdev, PCI_COMMAND, &command);
         command |= 6;
 	pci_write_config_word(pdev, PCI_COMMAND, command);
-	if (pci_resource_start(pdev, 8) == 1UL)
-	    pci_resource_start(pdev, 8) = 0UL;
-        i = 0xFEFF0001UL;
-	pci_write_config_dword(pdev, PCI_ROM_ADDRESS, i);
-        gdth_delay(1);
-	pci_write_config_dword(pdev, PCI_ROM_ADDRESS,
-			       pci_resource_start(pdev, 8));
-        
+	gdth_delay(1);
+
         dp6m_ptr = ha->brd;
 
         /* Ensure that it is safe to access the non HW portions of DPMEM.
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 4f338061b5c3..7f4f790a3d71 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -165,7 +165,7 @@ static void cmd_free(struct ctlr_info *h, struct CommandList *c);
 static void cmd_special_free(struct ctlr_info *h, struct CommandList *c);
 static struct CommandList *cmd_alloc(struct ctlr_info *h);
 static struct CommandList *cmd_special_alloc(struct ctlr_info *h);
-static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
+static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 	void *buff, size_t size, u8 page_code, unsigned char *scsi3addr,
 	int cmd_type);
 
@@ -1131,7 +1131,7 @@ clean:
 	return -ENOMEM;
 }
 
-static void hpsa_map_sg_chain_block(struct ctlr_info *h,
+static int hpsa_map_sg_chain_block(struct ctlr_info *h,
 	struct CommandList *c)
 {
 	struct SGDescriptor *chain_sg, *chain_block;
@@ -1144,8 +1144,15 @@ static void hpsa_map_sg_chain_block(struct ctlr_info *h,
 		(c->Header.SGTotal - h->max_cmd_sg_entries);
 	temp64 = pci_map_single(h->pdev, chain_block, chain_sg->Len,
 				PCI_DMA_TODEVICE);
+	if (dma_mapping_error(&h->pdev->dev, temp64)) {
+		/* prevent subsequent unmapping */
+		chain_sg->Addr.lower = 0;
+		chain_sg->Addr.upper = 0;
+		return -1;
+	}
 	chain_sg->Addr.lower = (u32) (temp64 & 0x0FFFFFFFFULL);
 	chain_sg->Addr.upper = (u32) ((temp64 >> 32) & 0x0FFFFFFFFULL);
+	return 0;
 }
 
 static void hpsa_unmap_sg_chain_block(struct ctlr_info *h,
@@ -1390,7 +1397,7 @@ static void hpsa_pci_unmap(struct pci_dev *pdev,
 	}
 }
 
-static void hpsa_map_one(struct pci_dev *pdev,
+static int hpsa_map_one(struct pci_dev *pdev,
 		struct CommandList *cp,
 		unsigned char *buf,
 		size_t buflen,
@@ -1401,10 +1408,16 @@ static void hpsa_map_one(struct pci_dev *pdev,
 	if (buflen == 0 || data_direction == PCI_DMA_NONE) {
 		cp->Header.SGList = 0;
 		cp->Header.SGTotal = 0;
-		return;
+		return 0;
 	}
 
 	addr64 = (u64) pci_map_single(pdev, buf, buflen, data_direction);
+	if (dma_mapping_error(&pdev->dev, addr64)) {
+		/* Prevent subsequent unmap of something never mapped */
+		cp->Header.SGList = 0;
+		cp->Header.SGTotal = 0;
+		return -1;
+	}
 	cp->SG[0].Addr.lower =
 	  (u32) (addr64 & (u64) 0x00000000FFFFFFFF);
 	cp->SG[0].Addr.upper =
@@ -1412,6 +1425,7 @@ static void hpsa_map_one(struct pci_dev *pdev,
 	cp->SG[0].Len = buflen;
 	cp->Header.SGList = (u8) 1;   /* no. SGs contig in this cmd */
 	cp->Header.SGTotal = (u16) 1; /* total sgs in this cmd list */
+	return 0;
 }
 
 static inline void hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h,
@@ -1540,13 +1554,18 @@ static int hpsa_scsi_do_inquiry(struct ctlr_info *h, unsigned char *scsi3addr,
 		return -ENOMEM;
 	}
 
-	fill_cmd(c, HPSA_INQUIRY, h, buf, bufsize, page, scsi3addr, TYPE_CMD);
+	if (fill_cmd(c, HPSA_INQUIRY, h, buf, bufsize,
+			page, scsi3addr, TYPE_CMD)) {
+		rc = -1;
+		goto out;
+	}
 	hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
 	ei = c->err_info;
 	if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
 		hpsa_scsi_interpret_error(c);
 		rc = -1;
 	}
+out:
 	cmd_special_free(h, c);
 	return rc;
 }
@@ -1564,7 +1583,9 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr)
 		return -ENOMEM;
 	}
 
-	fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0, scsi3addr, TYPE_MSG);
+	/* fill_cmd can't fail here, no data buffer to map. */
+	(void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h,
+			NULL, 0, 0, scsi3addr, TYPE_MSG);
 	hpsa_scsi_do_simple_cmd_core(h, c);
 	/* no unmap needed here because no data xfer. */
 
@@ -1631,8 +1652,11 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
 	}
 	/* address the controller */
 	memset(scsi3addr, 0, sizeof(scsi3addr));
-	fill_cmd(c, logical ? HPSA_REPORT_LOG : HPSA_REPORT_PHYS, h,
-		buf, bufsize, 0, scsi3addr, TYPE_CMD);
+	if (fill_cmd(c, logical ? HPSA_REPORT_LOG : HPSA_REPORT_PHYS, h,
+		buf, bufsize, 0, scsi3addr, TYPE_CMD)) {
+		rc = -1;
+		goto out;
+	}
 	if (extended_response)
 		c->Request.CDB[1] = extended_response;
 	hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
@@ -1642,6 +1666,7 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
 		hpsa_scsi_interpret_error(c);
 		rc = -1;
 	}
+out:
 	cmd_special_free(h, c);
 	return rc;
 }
@@ -2105,7 +2130,10 @@ static int hpsa_scatter_gather(struct ctlr_info *h,
 	if (chained) {
 		cp->Header.SGList = h->max_cmd_sg_entries;
 		cp->Header.SGTotal = (u16) (use_sg + 1);
-		hpsa_map_sg_chain_block(h, cp);
+		if (hpsa_map_sg_chain_block(h, cp)) {
+			scsi_dma_unmap(cmd);
+			return -1;
+		}
 		return 0;
 	}
 
@@ -2353,8 +2381,9 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h,
 		if (waittime < HPSA_MAX_WAIT_INTERVAL_SECS)
 			waittime = waittime * 2;
 
-		/* Send the Test Unit Ready */
-		fill_cmd(c, TEST_UNIT_READY, h, NULL, 0, 0, lunaddr, TYPE_CMD);
+		/* Send the Test Unit Ready, fill_cmd can't fail, no mapping */
+		(void) fill_cmd(c, TEST_UNIT_READY, h,
+				NULL, 0, 0, lunaddr, TYPE_CMD);
 		hpsa_scsi_do_simple_cmd_core(h, c);
 		/* no unmap needed here because no data xfer. */
 
@@ -2439,7 +2468,9 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
 		return -ENOMEM;
 	}
 
-	fill_cmd(c, HPSA_ABORT_MSG, h, abort, 0, 0, scsi3addr, TYPE_MSG);
+	/* fill_cmd can't fail here, no buffer to map */
+	(void) fill_cmd(c, HPSA_ABORT_MSG, h, abort,
+		0, 0, scsi3addr, TYPE_MSG);
 	if (swizzle)
 		swizzle_abort_tag(&c->Request.CDB[4]);
 	hpsa_scsi_do_simple_cmd_core(h, c);
@@ -2928,6 +2959,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 	struct CommandList *c;
 	char *buff = NULL;
 	union u64bit temp64;
+	int rc = 0;
 
 	if (!argp)
 		return -EINVAL;
@@ -2947,8 +2979,8 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 			/* Copy the data into the buffer we created */
 			if (copy_from_user(buff, iocommand.buf,
 				iocommand.buf_size)) {
-				kfree(buff);
-				return -EFAULT;
+				rc = -EFAULT;
+				goto out_kfree;
 			}
 		} else {
 			memset(buff, 0, iocommand.buf_size);
@@ -2956,8 +2988,8 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 	}
 	c = cmd_special_alloc(h);
 	if (c == NULL) {
-		kfree(buff);
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto out_kfree;
 	}
 	/* Fill in the command type */
 	c->cmd_type = CMD_IOCTL_PEND;
@@ -2982,6 +3014,13 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 	if (iocommand.buf_size > 0) {
 		temp64.val = pci_map_single(h->pdev, buff,
 			iocommand.buf_size, PCI_DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
+			c->SG[0].Addr.lower = 0;
+			c->SG[0].Addr.upper = 0;
+			c->SG[0].Len = 0;
+			rc = -ENOMEM;
+			goto out;
+		}
 		c->SG[0].Addr.lower = temp64.val32.lower;
 		c->SG[0].Addr.upper = temp64.val32.upper;
 		c->SG[0].Len = iocommand.buf_size;
@@ -2996,22 +3035,22 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 	memcpy(&iocommand.error_info, c->err_info,
 		sizeof(iocommand.error_info));
 	if (copy_to_user(argp, &iocommand, sizeof(iocommand))) {
-		kfree(buff);
-		cmd_special_free(h, c);
-		return -EFAULT;
+		rc = -EFAULT;
+		goto out;
 	}
 	if (iocommand.Request.Type.Direction == XFER_READ &&
 		iocommand.buf_size > 0) {
 		/* Copy the data out of the buffer we created */
 		if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) {
-			kfree(buff);
-			cmd_special_free(h, c);
-			return -EFAULT;
+			rc = -EFAULT;
+			goto out;
 		}
 	}
-	kfree(buff);
+out:
 	cmd_special_free(h, c);
-	return 0;
+out_kfree:
+	kfree(buff);
+	return rc;
 }
 
 static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
@@ -3103,6 +3142,15 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 		for (i = 0; i < sg_used; i++) {
 			temp64.val = pci_map_single(h->pdev, buff[i],
 				    buff_size[i], PCI_DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
+				c->SG[i].Addr.lower = 0;
+				c->SG[i].Addr.upper = 0;
+				c->SG[i].Len = 0;
+				hpsa_pci_unmap(h->pdev, c, i,
+					PCI_DMA_BIDIRECTIONAL);
+				status = -ENOMEM;
+				goto cleanup1;
+			}
 			c->SG[i].Addr.lower = temp64.val32.lower;
 			c->SG[i].Addr.upper = temp64.val32.upper;
 			c->SG[i].Len = buff_size[i];
@@ -3190,7 +3238,8 @@ static int hpsa_send_host_reset(struct ctlr_info *h, unsigned char *scsi3addr,
 	c = cmd_alloc(h);
 	if (!c)
 		return -ENOMEM;
-	fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0,
+	/* fill_cmd can't fail here, no data buffer to map */
+	(void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0,
 		RAID_CTLR_LUNID, TYPE_MSG);
 	c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */
 	c->waiting = NULL;
@@ -3202,7 +3251,7 @@ static int hpsa_send_host_reset(struct ctlr_info *h, unsigned char *scsi3addr,
 	return 0;
 }
 
-static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
+static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 	void *buff, size_t size, u8 page_code, unsigned char *scsi3addr,
 	int cmd_type)
 {
@@ -3271,7 +3320,7 @@ static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 		default:
 			dev_warn(&h->pdev->dev, "unknown command 0x%c\n", cmd);
 			BUG();
-			return;
+			return -1;
 		}
 	} else if (cmd_type == TYPE_MSG) {
 		switch (cmd) {
@@ -3343,10 +3392,9 @@ static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 	default:
 		pci_dir = PCI_DMA_BIDIRECTIONAL;
 	}
-
-	hpsa_map_one(h->pdev, c, buff, size, pci_dir);
-
-	return;
+	if (hpsa_map_one(h->pdev, c, buff, size, pci_dir))
+		return -1;
+	return 0;
 }
 
 /*
@@ -4882,10 +4930,13 @@ static void hpsa_flush_cache(struct ctlr_info *h)
 		dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
 		goto out_of_memory;
 	}
-	fill_cmd(c, HPSA_CACHE_FLUSH, h, flush_buf, 4, 0,
-		RAID_CTLR_LUNID, TYPE_CMD);
+	if (fill_cmd(c, HPSA_CACHE_FLUSH, h, flush_buf, 4, 0,
+		RAID_CTLR_LUNID, TYPE_CMD)) {
+		goto out;
+	}
 	hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_TODEVICE);
 	if (c->err_info->CommandStatus != 0)
+out:
 		dev_warn(&h->pdev->dev,
 			"error flushing cache on controller\n");
 	cmd_special_free(h, c);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 1d7da3f41ebb..f328089a1060 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -98,6 +98,7 @@ static unsigned int ipr_transop_timeout = 0;
 static unsigned int ipr_debug = 0;
 static unsigned int ipr_max_devs = IPR_DEFAULT_SIS64_DEVS;
 static unsigned int ipr_dual_ioa_raid = 1;
+static unsigned int ipr_number_of_msix = 2;
 static DEFINE_SPINLOCK(ipr_driver_lock);
 
 /* This table describes the differences between DMA controller chips */
@@ -107,6 +108,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = {
 		.max_cmds = 100,
 		.cache_line_size = 0x20,
 		.clear_isr = 1,
+		.iopoll_weight = 0,
 		{
 			.set_interrupt_mask_reg = 0x0022C,
 			.clr_interrupt_mask_reg = 0x00230,
@@ -131,6 +133,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = {
 		.max_cmds = 100,
 		.cache_line_size = 0x20,
 		.clear_isr = 1,
+		.iopoll_weight = 0,
 		{
 			.set_interrupt_mask_reg = 0x00288,
 			.clr_interrupt_mask_reg = 0x0028C,
@@ -155,6 +158,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = {
 		.max_cmds = 1000,
 		.cache_line_size = 0x20,
 		.clear_isr = 0,
+		.iopoll_weight = 64,
 		{
 			.set_interrupt_mask_reg = 0x00010,
 			.clr_interrupt_mask_reg = 0x00018,
@@ -215,6 +219,8 @@ MODULE_PARM_DESC(dual_ioa_raid, "Enable dual adapter RAID support. Set to 1 to e
 module_param_named(max_devs, ipr_max_devs, int, 0);
 MODULE_PARM_DESC(max_devs, "Specify the maximum number of physical devices. "
 		 "[Default=" __stringify(IPR_DEFAULT_SIS64_DEVS) "]");
+module_param_named(number_of_msix, ipr_number_of_msix, int, 0);
+MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to use on capable adapters (1 - 5).  (default:2)");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(IPR_DRIVER_VERSION);
 
@@ -549,7 +555,8 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd,
 	struct ipr_trace_entry *trace_entry;
 	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 
-	trace_entry = &ioa_cfg->trace[ioa_cfg->trace_index++];
+	trace_entry = &ioa_cfg->trace[atomic_add_return
+			(1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES];
 	trace_entry->time = jiffies;
 	trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0];
 	trace_entry->type = type;
@@ -560,6 +567,7 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd,
 	trace_entry->cmd_index = ipr_cmd->cmd_index & 0xff;
 	trace_entry->res_handle = ipr_cmd->ioarcb.res_handle;
 	trace_entry->u.add_data = add_data;
+	wmb();
 }
 #else
 #define ipr_trc_hook(ipr_cmd, type, add_data) do { } while (0)
@@ -595,8 +603,11 @@ static void ipr_reinit_ipr_cmnd(struct ipr_cmnd *ipr_cmd)
 	struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa;
 	struct ipr_ioasa64 *ioasa64 = &ipr_cmd->s.ioasa64;
 	dma_addr_t dma_addr = ipr_cmd->dma_addr;
+	int hrrq_id;
 
+	hrrq_id = ioarcb->cmd_pkt.hrrq_id;
 	memset(&ioarcb->cmd_pkt, 0, sizeof(struct ipr_cmd_pkt));
+	ioarcb->cmd_pkt.hrrq_id = hrrq_id;
 	ioarcb->data_transfer_length = 0;
 	ioarcb->read_data_transfer_length = 0;
 	ioarcb->ioadl_len = 0;
@@ -646,12 +657,16 @@ static void ipr_init_ipr_cmnd(struct ipr_cmnd *ipr_cmd,
  * 	pointer to ipr command struct
  **/
 static
-struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
+struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_hrr_queue *hrrq)
 {
-	struct ipr_cmnd *ipr_cmd;
+	struct ipr_cmnd *ipr_cmd = NULL;
+
+	if (likely(!list_empty(&hrrq->hrrq_free_q))) {
+		ipr_cmd = list_entry(hrrq->hrrq_free_q.next,
+			struct ipr_cmnd, queue);
+		list_del(&ipr_cmd->queue);
+	}
 
-	ipr_cmd = list_entry(ioa_cfg->free_q.next, struct ipr_cmnd, queue);
-	list_del(&ipr_cmd->queue);
 
 	return ipr_cmd;
 }
@@ -666,7 +681,8 @@ struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
 static
 struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
 {
-	struct ipr_cmnd *ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg);
+	struct ipr_cmnd *ipr_cmd =
+		__ipr_get_free_ipr_cmnd(&ioa_cfg->hrrq[IPR_INIT_HRRQ]);
 	ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done);
 	return ipr_cmd;
 }
@@ -686,9 +702,15 @@ static void ipr_mask_and_clear_interrupts(struct ipr_ioa_cfg *ioa_cfg,
 					  u32 clr_ints)
 {
 	volatile u32 int_reg;
+	int i;
 
 	/* Stop new interrupts */
-	ioa_cfg->allow_interrupts = 0;
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		spin_lock(&ioa_cfg->hrrq[i]._lock);
+		ioa_cfg->hrrq[i].allow_interrupts = 0;
+		spin_unlock(&ioa_cfg->hrrq[i]._lock);
+	}
+	wmb();
 
 	/* Set interrupt mask to stop all new interrupts */
 	if (ioa_cfg->sis64)
@@ -761,13 +783,12 @@ static int ipr_set_pcix_cmd_reg(struct ipr_ioa_cfg *ioa_cfg)
  **/
 static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd)
 {
-	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 	struct ata_queued_cmd *qc = ipr_cmd->qc;
 	struct ipr_sata_port *sata_port = qc->ap->private_data;
 
 	qc->err_mask |= AC_ERR_OTHER;
 	sata_port->ioasa.status |= ATA_BUSY;
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 	ata_qc_complete(qc);
 }
 
@@ -783,14 +804,13 @@ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd)
  **/
 static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd)
 {
-	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 	struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
 
 	scsi_cmd->result |= (DID_ERROR << 16);
 
 	scsi_dma_unmap(ipr_cmd->scsi_cmd);
 	scsi_cmd->scsi_done(scsi_cmd);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 }
 
 /**
@@ -805,24 +825,32 @@ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd)
 static void ipr_fail_all_ops(struct ipr_ioa_cfg *ioa_cfg)
 {
 	struct ipr_cmnd *ipr_cmd, *temp;
+	struct ipr_hrr_queue *hrrq;
 
 	ENTER;
-	list_for_each_entry_safe(ipr_cmd, temp, &ioa_cfg->pending_q, queue) {
-		list_del(&ipr_cmd->queue);
+	for_each_hrrq(hrrq, ioa_cfg) {
+		spin_lock(&hrrq->_lock);
+		list_for_each_entry_safe(ipr_cmd,
+					temp, &hrrq->hrrq_pending_q, queue) {
+			list_del(&ipr_cmd->queue);
 
-		ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_IOA_WAS_RESET);
-		ipr_cmd->s.ioasa.hdr.ilid = cpu_to_be32(IPR_DRIVER_ILID);
+			ipr_cmd->s.ioasa.hdr.ioasc =
+				cpu_to_be32(IPR_IOASC_IOA_WAS_RESET);
+			ipr_cmd->s.ioasa.hdr.ilid =
+				cpu_to_be32(IPR_DRIVER_ILID);
 
-		if (ipr_cmd->scsi_cmd)
-			ipr_cmd->done = ipr_scsi_eh_done;
-		else if (ipr_cmd->qc)
-			ipr_cmd->done = ipr_sata_eh_done;
+			if (ipr_cmd->scsi_cmd)
+				ipr_cmd->done = ipr_scsi_eh_done;
+			else if (ipr_cmd->qc)
+				ipr_cmd->done = ipr_sata_eh_done;
 
-		ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, IPR_IOASC_IOA_WAS_RESET);
-		del_timer(&ipr_cmd->timer);
-		ipr_cmd->done(ipr_cmd);
+			ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH,
+				     IPR_IOASC_IOA_WAS_RESET);
+			del_timer(&ipr_cmd->timer);
+			ipr_cmd->done(ipr_cmd);
+		}
+		spin_unlock(&hrrq->_lock);
 	}
-
 	LEAVE;
 }
 
@@ -872,9 +900,7 @@ static void ipr_do_req(struct ipr_cmnd *ipr_cmd,
 		       void (*done) (struct ipr_cmnd *),
 		       void (*timeout_func) (struct ipr_cmnd *), u32 timeout)
 {
-	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
-
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 
 	ipr_cmd->done = done;
 
@@ -975,6 +1001,14 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd,
 	spin_lock_irq(ioa_cfg->host->host_lock);
 }
 
+static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg)
+{
+	if (ioa_cfg->hrrq_num == 1)
+		return 0;
+	else
+		return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1;
+}
+
 /**
  * ipr_send_hcam - Send an HCAM to the adapter.
  * @ioa_cfg:	ioa config struct
@@ -994,9 +1028,9 @@ static void ipr_send_hcam(struct ipr_ioa_cfg *ioa_cfg, u8 type,
 	struct ipr_cmnd *ipr_cmd;
 	struct ipr_ioarcb *ioarcb;
 
-	if (ioa_cfg->allow_cmds) {
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) {
 		ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg);
-		list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+		list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 		list_add_tail(&hostrcb->queue, &ioa_cfg->hostrcb_pending_q);
 
 		ipr_cmd->u.hostrcb = hostrcb;
@@ -1166,14 +1200,15 @@ static int ipr_is_same_device(struct ipr_resource_entry *res,
 }
 
 /**
- * ipr_format_res_path - Format the resource path for printing.
+ * __ipr_format_res_path - Format the resource path for printing.
  * @res_path:	resource path
  * @buf:	buffer
+ * @len:	length of buffer provided
  *
  * Return value:
  * 	pointer to buffer
  **/
-static char *ipr_format_res_path(u8 *res_path, char *buffer, int len)
+static char *__ipr_format_res_path(u8 *res_path, char *buffer, int len)
 {
 	int i;
 	char *p = buffer;
@@ -1187,6 +1222,27 @@ static char *ipr_format_res_path(u8 *res_path, char *buffer, int len)
 }
 
 /**
+ * ipr_format_res_path - Format the resource path for printing.
+ * @ioa_cfg:	ioa config struct
+ * @res_path:	resource path
+ * @buf:	buffer
+ * @len:	length of buffer provided
+ *
+ * Return value:
+ *	pointer to buffer
+ **/
+static char *ipr_format_res_path(struct ipr_ioa_cfg *ioa_cfg,
+				 u8 *res_path, char *buffer, int len)
+{
+	char *p = buffer;
+
+	*p = '\0';
+	p += snprintf(p, buffer + len - p, "%d/", ioa_cfg->host->host_no);
+	__ipr_format_res_path(res_path, p, len - (buffer - p));
+	return buffer;
+}
+
+/**
  * ipr_update_res_entry - Update the resource entry.
  * @res:	resource entry struct
  * @cfgtew:	config table entry wrapper struct
@@ -1226,8 +1282,8 @@ static void ipr_update_res_entry(struct ipr_resource_entry *res,
 
 		if (res->sdev && new_path)
 			sdev_printk(KERN_INFO, res->sdev, "Resource path: %s\n",
-				    ipr_format_res_path(res->res_path, buffer,
-							sizeof(buffer)));
+				    ipr_format_res_path(res->ioa_cfg,
+					res->res_path, buffer, sizeof(buffer)));
 	} else {
 		res->flags = cfgtew->u.cfgte->flags;
 		if (res->flags & IPR_IS_IOA_RESOURCE)
@@ -1363,7 +1419,7 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd)
 	u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
 	list_del(&hostrcb->queue);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 
 	if (ioasc) {
 		if (ioasc != IPR_IOASC_IOA_WAS_RESET)
@@ -1613,8 +1669,8 @@ static void ipr_log_sis64_config_error(struct ipr_ioa_cfg *ioa_cfg,
 		ipr_err_separator;
 
 		ipr_err("Device %d : %s", i + 1,
-			 ipr_format_res_path(dev_entry->res_path, buffer,
-					     sizeof(buffer)));
+			__ipr_format_res_path(dev_entry->res_path,
+					      buffer, sizeof(buffer)));
 		ipr_log_ext_vpd(&dev_entry->vpd);
 
 		ipr_err("-----New Device Information-----\n");
@@ -1960,14 +2016,16 @@ static void ipr_log64_fabric_path(struct ipr_hostrcb *hostrcb,
 
 			ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s\n",
 				     path_active_desc[i].desc, path_state_desc[j].desc,
-				     ipr_format_res_path(fabric->res_path, buffer,
-							 sizeof(buffer)));
+				     ipr_format_res_path(hostrcb->ioa_cfg,
+						fabric->res_path,
+						buffer, sizeof(buffer)));
 			return;
 		}
 	}
 
 	ipr_err("Path state=%02X Resource Path=%s\n", path_state,
-		ipr_format_res_path(fabric->res_path, buffer, sizeof(buffer)));
+		ipr_format_res_path(hostrcb->ioa_cfg, fabric->res_path,
+				    buffer, sizeof(buffer)));
 }
 
 static const struct {
@@ -2108,18 +2166,20 @@ static void ipr_log64_path_elem(struct ipr_hostrcb *hostrcb,
 
 			ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s, Link rate=%s, WWN=%08X%08X\n",
 				     path_status_desc[j].desc, path_type_desc[i].desc,
-				     ipr_format_res_path(cfg->res_path, buffer,
-							 sizeof(buffer)),
-				     link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK],
-				     be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1]));
+				     ipr_format_res_path(hostrcb->ioa_cfg,
+					cfg->res_path, buffer, sizeof(buffer)),
+					link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK],
+					be32_to_cpu(cfg->wwid[0]),
+					be32_to_cpu(cfg->wwid[1]));
 			return;
 		}
 	}
 	ipr_hcam_err(hostrcb, "Path element=%02X: Resource Path=%s, Link rate=%s "
 		     "WWN=%08X%08X\n", cfg->type_status,
-		     ipr_format_res_path(cfg->res_path, buffer, sizeof(buffer)),
-		     link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK],
-		     be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1]));
+		     ipr_format_res_path(hostrcb->ioa_cfg,
+			cfg->res_path, buffer, sizeof(buffer)),
+			link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK],
+			be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1]));
 }
 
 /**
@@ -2182,7 +2242,8 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg,
 
 	ipr_err("RAID %s Array Configuration: %s\n",
 		error->protection_level,
-		ipr_format_res_path(error->last_res_path, buffer, sizeof(buffer)));
+		ipr_format_res_path(ioa_cfg, error->last_res_path,
+			buffer, sizeof(buffer)));
 
 	ipr_err_separator;
 
@@ -2203,11 +2264,12 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg,
 		ipr_err("Array Member %d:\n", i);
 		ipr_log_ext_vpd(&array_entry->vpd);
 		ipr_err("Current Location: %s\n",
-			 ipr_format_res_path(array_entry->res_path, buffer,
-					     sizeof(buffer)));
+			 ipr_format_res_path(ioa_cfg, array_entry->res_path,
+				buffer, sizeof(buffer)));
 		ipr_err("Expected Location: %s\n",
-			 ipr_format_res_path(array_entry->expected_res_path,
-					     buffer, sizeof(buffer)));
+			 ipr_format_res_path(ioa_cfg,
+				array_entry->expected_res_path,
+				buffer, sizeof(buffer)));
 
 		ipr_err_separator;
 	}
@@ -2409,7 +2471,7 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd)
 		fd_ioasc = be32_to_cpu(hostrcb->hcam.u.error.fd_ioasc);
 
 	list_del(&hostrcb->queue);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 
 	if (!ioasc) {
 		ipr_handle_log_data(ioa_cfg, hostrcb);
@@ -2491,36 +2553,6 @@ static void ipr_oper_timeout(struct ipr_cmnd *ipr_cmd)
 }
 
 /**
- * ipr_reset_reload - Reset/Reload the IOA
- * @ioa_cfg:		ioa config struct
- * @shutdown_type:	shutdown type
- *
- * This function resets the adapter and re-initializes it.
- * This function assumes that all new host commands have been stopped.
- * Return value:
- * 	SUCCESS / FAILED
- **/
-static int ipr_reset_reload(struct ipr_ioa_cfg *ioa_cfg,
-			    enum ipr_shutdown_type shutdown_type)
-{
-	if (!ioa_cfg->in_reset_reload)
-		ipr_initiate_ioa_reset(ioa_cfg, shutdown_type);
-
-	spin_unlock_irq(ioa_cfg->host->host_lock);
-	wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
-	spin_lock_irq(ioa_cfg->host->host_lock);
-
-	/* If we got hit with a host reset while we were already resetting
-	 the adapter for some reason, and the reset failed. */
-	if (ioa_cfg->ioa_is_dead) {
-		ipr_trace;
-		return FAILED;
-	}
-
-	return SUCCESS;
-}
-
-/**
  * ipr_find_ses_entry - Find matching SES in SES table
  * @res:	resource entry struct of SES
  *
@@ -3153,7 +3185,8 @@ static void ipr_worker_thread(struct work_struct *work)
 restart:
 	do {
 		did_work = 0;
-		if (!ioa_cfg->allow_cmds || !ioa_cfg->allow_ml_add_del) {
+		if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds ||
+		    !ioa_cfg->allow_ml_add_del) {
 			spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 			return;
 		}
@@ -3401,7 +3434,7 @@ static ssize_t ipr_show_adapter_state(struct device *dev,
 	int len;
 
 	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-	if (ioa_cfg->ioa_is_dead)
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead)
 		len = snprintf(buf, PAGE_SIZE, "offline\n");
 	else
 		len = snprintf(buf, PAGE_SIZE, "online\n");
@@ -3427,14 +3460,20 @@ static ssize_t ipr_store_adapter_state(struct device *dev,
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
 	unsigned long lock_flags;
-	int result = count;
+	int result = count, i;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
 	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-	if (ioa_cfg->ioa_is_dead && !strncmp(buf, "online", 6)) {
-		ioa_cfg->ioa_is_dead = 0;
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead &&
+	    !strncmp(buf, "online", 6)) {
+		for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+			spin_lock(&ioa_cfg->hrrq[i]._lock);
+			ioa_cfg->hrrq[i].ioa_is_dead = 0;
+			spin_unlock(&ioa_cfg->hrrq[i]._lock);
+		}
+		wmb();
 		ioa_cfg->reset_retries = 0;
 		ioa_cfg->in_ioa_bringdown = 0;
 		ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
@@ -3494,6 +3533,95 @@ static struct device_attribute ipr_ioa_reset_attr = {
 	.store = ipr_store_reset_adapter
 };
 
+static int ipr_iopoll(struct blk_iopoll *iop, int budget);
+ /**
+ * ipr_show_iopoll_weight - Show ipr polling mode
+ * @dev:	class device struct
+ * @buf:	buffer
+ *
+ * Return value:
+ *	number of bytes printed to buffer
+ **/
+static ssize_t ipr_show_iopoll_weight(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
+	unsigned long lock_flags = 0;
+	int len;
+
+	spin_lock_irqsave(shost->host_lock, lock_flags);
+	len = snprintf(buf, PAGE_SIZE, "%d\n", ioa_cfg->iopoll_weight);
+	spin_unlock_irqrestore(shost->host_lock, lock_flags);
+
+	return len;
+}
+
+/**
+ * ipr_store_iopoll_weight - Change the adapter's polling mode
+ * @dev:	class device struct
+ * @buf:	buffer
+ *
+ * Return value:
+ *	number of bytes printed to buffer
+ **/
+static ssize_t ipr_store_iopoll_weight(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
+	unsigned long user_iopoll_weight;
+	unsigned long lock_flags = 0;
+	int i;
+
+	if (!ioa_cfg->sis64) {
+		dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n");
+		return -EINVAL;
+	}
+	if (kstrtoul(buf, 10, &user_iopoll_weight))
+		return -EINVAL;
+
+	if (user_iopoll_weight > 256) {
+		dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n");
+		return -EINVAL;
+	}
+
+	if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
+		dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n");
+		return strlen(buf);
+	}
+
+	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+		for (i = 1; i < ioa_cfg->hrrq_num; i++)
+			blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+	}
+
+	spin_lock_irqsave(shost->host_lock, lock_flags);
+	ioa_cfg->iopoll_weight = user_iopoll_weight;
+	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+		for (i = 1; i < ioa_cfg->hrrq_num; i++) {
+			blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+					ioa_cfg->iopoll_weight, ipr_iopoll);
+			blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
+		}
+	}
+	spin_unlock_irqrestore(shost->host_lock, lock_flags);
+
+	return strlen(buf);
+}
+
+static struct device_attribute ipr_iopoll_weight_attr = {
+	.attr = {
+		.name =		"iopoll_weight",
+		.mode =		S_IRUGO | S_IWUSR,
+	},
+	.show = ipr_show_iopoll_weight,
+	.store = ipr_store_iopoll_weight
+};
+
 /**
  * ipr_alloc_ucode_buffer - Allocates a microcode download buffer
  * @buf_len:		buffer length
@@ -3862,6 +3990,7 @@ static struct device_attribute *ipr_ioa_attrs[] = {
 	&ipr_ioa_reset_attr,
 	&ipr_update_fw_attr,
 	&ipr_ioa_fw_type_attr,
+	&ipr_iopoll_weight_attr,
 	NULL,
 };
 
@@ -4014,7 +4143,7 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg)
 
 	ioa_cfg->dump = dump;
 	ioa_cfg->sdt_state = WAIT_FOR_DUMP;
-	if (ioa_cfg->ioa_is_dead && !ioa_cfg->dump_taken) {
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && !ioa_cfg->dump_taken) {
 		ioa_cfg->dump_taken = 1;
 		schedule_work(&ioa_cfg->work_q);
 	}
@@ -4227,8 +4356,8 @@ static ssize_t ipr_show_resource_path(struct device *dev, struct device_attribut
 	res = (struct ipr_resource_entry *)sdev->hostdata;
 	if (res && ioa_cfg->sis64)
 		len = snprintf(buf, PAGE_SIZE, "%s\n",
-			       ipr_format_res_path(res->res_path, buffer,
-						   sizeof(buffer)));
+			       __ipr_format_res_path(res->res_path, buffer,
+						     sizeof(buffer)));
 	else if (res)
 		len = snprintf(buf, PAGE_SIZE, "%d:%d:%d:%d\n", ioa_cfg->host->host_no,
 			       res->bus, res->target, res->lun);
@@ -4556,8 +4685,8 @@ static int ipr_slave_configure(struct scsi_device *sdev)
 			scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);
 		if (ioa_cfg->sis64)
 			sdev_printk(KERN_INFO, sdev, "Resource path: %s\n",
-				    ipr_format_res_path(res->res_path, buffer,
-							sizeof(buffer)));
+				    ipr_format_res_path(ioa_cfg,
+				res->res_path, buffer, sizeof(buffer)));
 		return 0;
 	}
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
@@ -4638,22 +4767,18 @@ static int ipr_slave_alloc(struct scsi_device *sdev)
 	return rc;
 }
 
-/**
- * ipr_eh_host_reset - Reset the host adapter
- * @scsi_cmd:	scsi command struct
- *
- * Return value:
- * 	SUCCESS / FAILED
- **/
-static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd)
+static int ipr_eh_host_reset(struct scsi_cmnd *cmd)
 {
 	struct ipr_ioa_cfg *ioa_cfg;
-	int rc;
+	unsigned long lock_flags = 0;
+	int rc = SUCCESS;
 
 	ENTER;
-	ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata;
+	ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata;
+	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 
 	if (!ioa_cfg->in_reset_reload) {
+		ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_ABBREV);
 		dev_err(&ioa_cfg->pdev->dev,
 			"Adapter being reset as a result of error recovery.\n");
 
@@ -4661,20 +4786,19 @@ static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd)
 			ioa_cfg->sdt_state = GET_DUMP;
 	}
 
-	rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV);
-
-	LEAVE;
-	return rc;
-}
-
-static int ipr_eh_host_reset(struct scsi_cmnd *cmd)
-{
-	int rc;
+	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+	wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
+	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 
-	spin_lock_irq(cmd->device->host->host_lock);
-	rc = __ipr_eh_host_reset(cmd);
-	spin_unlock_irq(cmd->device->host->host_lock);
+	/* If we got hit with a host reset while we were already resetting
+	 the adapter for some reason, and the reset failed. */
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) {
+		ipr_trace;
+		rc = FAILED;
+	}
 
+	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+	LEAVE;
 	return rc;
 }
 
@@ -4723,7 +4847,7 @@ static int ipr_device_reset(struct ipr_ioa_cfg *ioa_cfg,
 
 	ipr_send_blocking_cmd(ipr_cmd, ipr_timeout, IPR_DEVICE_RESET_TIMEOUT);
 	ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 	if (ipr_is_gata(res) && res->sata_port && ioasc != IPR_IOASC_IOA_WAS_RESET) {
 		if (ipr_cmd->ioa_cfg->sis64)
 			memcpy(&res->sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata,
@@ -4793,6 +4917,7 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd)
 	struct ipr_resource_entry *res;
 	struct ata_port *ap;
 	int rc = 0;
+	struct ipr_hrr_queue *hrrq;
 
 	ENTER;
 	ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata;
@@ -4808,22 +4933,26 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd)
 	 */
 	if (ioa_cfg->in_reset_reload)
 		return FAILED;
-	if (ioa_cfg->ioa_is_dead)
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead)
 		return FAILED;
 
-	list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) {
-		if (ipr_cmd->ioarcb.res_handle == res->res_handle) {
-			if (ipr_cmd->scsi_cmd)
-				ipr_cmd->done = ipr_scsi_eh_done;
-			if (ipr_cmd->qc)
-				ipr_cmd->done = ipr_sata_eh_done;
-			if (ipr_cmd->qc && !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) {
-				ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT;
-				ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED;
+	for_each_hrrq(hrrq, ioa_cfg) {
+		spin_lock(&hrrq->_lock);
+		list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
+			if (ipr_cmd->ioarcb.res_handle == res->res_handle) {
+				if (ipr_cmd->scsi_cmd)
+					ipr_cmd->done = ipr_scsi_eh_done;
+				if (ipr_cmd->qc)
+					ipr_cmd->done = ipr_sata_eh_done;
+				if (ipr_cmd->qc &&
+				    !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) {
+					ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT;
+					ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED;
+				}
 			}
 		}
+		spin_unlock(&hrrq->_lock);
 	}
-
 	res->resetting_device = 1;
 	scmd_printk(KERN_ERR, scsi_cmd, "Resetting device\n");
 
@@ -4833,11 +4962,17 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd)
 		ata_std_error_handler(ap);
 		spin_lock_irq(scsi_cmd->device->host->host_lock);
 
-		list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) {
-			if (ipr_cmd->ioarcb.res_handle == res->res_handle) {
-				rc = -EIO;
-				break;
+		for_each_hrrq(hrrq, ioa_cfg) {
+			spin_lock(&hrrq->_lock);
+			list_for_each_entry(ipr_cmd,
+					    &hrrq->hrrq_pending_q, queue) {
+				if (ipr_cmd->ioarcb.res_handle ==
+				    res->res_handle) {
+					rc = -EIO;
+					break;
+				}
 			}
+			spin_unlock(&hrrq->_lock);
 		}
 	} else
 		rc = ipr_device_reset(ioa_cfg, res);
@@ -4890,7 +5025,7 @@ static void ipr_bus_reset_done(struct ipr_cmnd *ipr_cmd)
 	else
 		ipr_cmd->sibling->done(ipr_cmd->sibling);
 
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 	LEAVE;
 }
 
@@ -4951,6 +5086,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd)
 	struct ipr_cmd_pkt *cmd_pkt;
 	u32 ioasc, int_reg;
 	int op_found = 0;
+	struct ipr_hrr_queue *hrrq;
 
 	ENTER;
 	ioa_cfg = (struct ipr_ioa_cfg *)scsi_cmd->device->host->hostdata;
@@ -4960,7 +5096,8 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd)
 	 * This will force the mid-layer to call ipr_eh_host_reset,
 	 * which will then go to sleep and wait for the reset to complete
 	 */
-	if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead)
+	if (ioa_cfg->in_reset_reload ||
+	    ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead)
 		return FAILED;
 	if (!res)
 		return FAILED;
@@ -4975,12 +5112,16 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd)
 	if (!ipr_is_gscsi(res))
 		return FAILED;
 
-	list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) {
-		if (ipr_cmd->scsi_cmd == scsi_cmd) {
-			ipr_cmd->done = ipr_scsi_eh_done;
-			op_found = 1;
-			break;
+	for_each_hrrq(hrrq, ioa_cfg) {
+		spin_lock(&hrrq->_lock);
+		list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
+			if (ipr_cmd->scsi_cmd == scsi_cmd) {
+				ipr_cmd->done = ipr_scsi_eh_done;
+				op_found = 1;
+				break;
+			}
 		}
+		spin_unlock(&hrrq->_lock);
 	}
 
 	if (!op_found)
@@ -5007,7 +5148,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd)
 		ipr_trace;
 	}
 
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q);
 	if (!ipr_is_naca_model(res))
 		res->needs_sync_complete = 1;
 
@@ -5099,6 +5240,9 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg,
 	} else {
 		if (int_reg & IPR_PCII_IOA_UNIT_CHECKED)
 			ioa_cfg->ioa_unit_checked = 1;
+		else if (int_reg & IPR_PCII_NO_HOST_RRQ)
+			dev_err(&ioa_cfg->pdev->dev,
+				"No Host RRQ. 0x%08X\n", int_reg);
 		else
 			dev_err(&ioa_cfg->pdev->dev,
 				"Permanent IOA failure. 0x%08X\n", int_reg);
@@ -5121,10 +5265,10 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg,
  * Return value:
  * 	none
  **/
-static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg)
+static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg, u16 number)
 {
 	ioa_cfg->errors_logged++;
-	dev_err(&ioa_cfg->pdev->dev, "%s\n", msg);
+	dev_err(&ioa_cfg->pdev->dev, "%s %d\n", msg, number);
 
 	if (WAIT_FOR_DUMP == ioa_cfg->sdt_state)
 		ioa_cfg->sdt_state = GET_DUMP;
@@ -5132,6 +5276,83 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg)
 	ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
 }
 
+static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget,
+						struct list_head *doneq)
+{
+	u32 ioasc;
+	u16 cmd_index;
+	struct ipr_cmnd *ipr_cmd;
+	struct ipr_ioa_cfg *ioa_cfg = hrr_queue->ioa_cfg;
+	int num_hrrq = 0;
+
+	/* If interrupts are disabled, ignore the interrupt */
+	if (!hrr_queue->allow_interrupts)
+		return 0;
+
+	while ((be32_to_cpu(*hrr_queue->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
+	       hrr_queue->toggle_bit) {
+
+		cmd_index = (be32_to_cpu(*hrr_queue->hrrq_curr) &
+			     IPR_HRRQ_REQ_RESP_HANDLE_MASK) >>
+			     IPR_HRRQ_REQ_RESP_HANDLE_SHIFT;
+
+		if (unlikely(cmd_index > hrr_queue->max_cmd_id ||
+			     cmd_index < hrr_queue->min_cmd_id)) {
+			ipr_isr_eh(ioa_cfg,
+				"Invalid response handle from IOA: ",
+				cmd_index);
+			break;
+		}
+
+		ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index];
+		ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
+
+		ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc);
+
+		list_move_tail(&ipr_cmd->queue, doneq);
+
+		if (hrr_queue->hrrq_curr < hrr_queue->hrrq_end) {
+			hrr_queue->hrrq_curr++;
+		} else {
+			hrr_queue->hrrq_curr = hrr_queue->hrrq_start;
+			hrr_queue->toggle_bit ^= 1u;
+		}
+		num_hrrq++;
+		if (budget > 0 && num_hrrq >= budget)
+			break;
+	}
+
+	return num_hrrq;
+}
+
+static int ipr_iopoll(struct blk_iopoll *iop, int budget)
+{
+	struct ipr_ioa_cfg *ioa_cfg;
+	struct ipr_hrr_queue *hrrq;
+	struct ipr_cmnd *ipr_cmd, *temp;
+	unsigned long hrrq_flags;
+	int completed_ops;
+	LIST_HEAD(doneq);
+
+	hrrq = container_of(iop, struct ipr_hrr_queue, iopoll);
+	ioa_cfg = hrrq->ioa_cfg;
+
+	spin_lock_irqsave(hrrq->lock, hrrq_flags);
+	completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
+
+	if (completed_ops < budget)
+		blk_iopoll_complete(iop);
+	spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
+
+	list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
+		list_del(&ipr_cmd->queue);
+		del_timer(&ipr_cmd->timer);
+		ipr_cmd->fast_done(ipr_cmd);
+	}
+
+	return completed_ops;
+}
+
 /**
  * ipr_isr - Interrupt service routine
  * @irq:	irq number
@@ -5142,78 +5363,48 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg)
  **/
 static irqreturn_t ipr_isr(int irq, void *devp)
 {
-	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)devp;
-	unsigned long lock_flags = 0;
+	struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp;
+	struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg;
+	unsigned long hrrq_flags = 0;
 	u32 int_reg = 0;
-	u32 ioasc;
-	u16 cmd_index;
 	int num_hrrq = 0;
 	int irq_none = 0;
 	struct ipr_cmnd *ipr_cmd, *temp;
 	irqreturn_t rc = IRQ_NONE;
 	LIST_HEAD(doneq);
 
-	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-
+	spin_lock_irqsave(hrrq->lock, hrrq_flags);
 	/* If interrupts are disabled, ignore the interrupt */
-	if (!ioa_cfg->allow_interrupts) {
-		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+	if (!hrrq->allow_interrupts) {
+		spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 		return IRQ_NONE;
 	}
 
 	while (1) {
-		ipr_cmd = NULL;
-
-		while ((be32_to_cpu(*ioa_cfg->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
-		       ioa_cfg->toggle_bit) {
-
-			cmd_index = (be32_to_cpu(*ioa_cfg->hrrq_curr) &
-				     IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> IPR_HRRQ_REQ_RESP_HANDLE_SHIFT;
-
-			if (unlikely(cmd_index >= IPR_NUM_CMD_BLKS)) {
-				ipr_isr_eh(ioa_cfg, "Invalid response handle from IOA");
-				rc = IRQ_HANDLED;
-				goto unlock_out;
-			}
-
-			ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index];
-
-			ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
-
-			ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc);
-
-			list_move_tail(&ipr_cmd->queue, &doneq);
-
-			rc = IRQ_HANDLED;
-
-			if (ioa_cfg->hrrq_curr < ioa_cfg->hrrq_end) {
-				ioa_cfg->hrrq_curr++;
-			} else {
-				ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start;
-				ioa_cfg->toggle_bit ^= 1u;
-			}
-		}
+		if (ipr_process_hrrq(hrrq, -1, &doneq)) {
+			rc =  IRQ_HANDLED;
 
-		if (ipr_cmd && !ioa_cfg->clear_isr)
-			break;
+			if (!ioa_cfg->clear_isr)
+				break;
 
-		if (ipr_cmd != NULL) {
 			/* Clear the PCI interrupt */
 			num_hrrq = 0;
 			do {
-				writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32);
+				writel(IPR_PCII_HRRQ_UPDATED,
+				     ioa_cfg->regs.clr_interrupt_reg32);
 				int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32);
 			} while (int_reg & IPR_PCII_HRRQ_UPDATED &&
-					num_hrrq++ < IPR_MAX_HRRQ_RETRIES);
+				num_hrrq++ < IPR_MAX_HRRQ_RETRIES);
 
 		} else if (rc == IRQ_NONE && irq_none == 0) {
 			int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32);
 			irq_none++;
 		} else if (num_hrrq == IPR_MAX_HRRQ_RETRIES &&
 			   int_reg & IPR_PCII_HRRQ_UPDATED) {
-			ipr_isr_eh(ioa_cfg, "Error clearing HRRQ");
+			ipr_isr_eh(ioa_cfg,
+				"Error clearing HRRQ: ", num_hrrq);
 			rc = IRQ_HANDLED;
-			goto unlock_out;
+			break;
 		} else
 			break;
 	}
@@ -5221,14 +5412,64 @@ static irqreturn_t ipr_isr(int irq, void *devp)
 	if (unlikely(rc == IRQ_NONE))
 		rc = ipr_handle_other_interrupt(ioa_cfg, int_reg);
 
-unlock_out:
-	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+	spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 	list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
 		list_del(&ipr_cmd->queue);
 		del_timer(&ipr_cmd->timer);
 		ipr_cmd->fast_done(ipr_cmd);
 	}
+	return rc;
+}
+
+/**
+ * ipr_isr_mhrrq - Interrupt service routine
+ * @irq:	irq number
+ * @devp:	pointer to ioa config struct
+ *
+ * Return value:
+ *	IRQ_NONE / IRQ_HANDLED
+ **/
+static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
+{
+	struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp;
+	struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg;
+	unsigned long hrrq_flags = 0;
+	struct ipr_cmnd *ipr_cmd, *temp;
+	irqreturn_t rc = IRQ_NONE;
+	LIST_HEAD(doneq);
+
+	spin_lock_irqsave(hrrq->lock, hrrq_flags);
+
+	/* If interrupts are disabled, ignore the interrupt */
+	if (!hrrq->allow_interrupts) {
+		spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
+		return IRQ_NONE;
+	}
+
+	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+		if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
+		       hrrq->toggle_bit) {
+			if (!blk_iopoll_sched_prep(&hrrq->iopoll))
+				blk_iopoll_sched(&hrrq->iopoll);
+			spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
+			return IRQ_HANDLED;
+		}
+	} else {
+		if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
+			hrrq->toggle_bit)
+
+			if (ipr_process_hrrq(hrrq, -1, &doneq))
+				rc =  IRQ_HANDLED;
+	}
+
+	spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 
+	list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
+		list_del(&ipr_cmd->queue);
+		del_timer(&ipr_cmd->timer);
+		ipr_cmd->fast_done(ipr_cmd);
+	}
 	return rc;
 }
 
@@ -5388,7 +5629,6 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd)
 {
 	struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
 	struct ipr_resource_entry *res = scsi_cmd->device->hostdata;
-	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 	u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
 	if (IPR_IOASC_SENSE_KEY(ioasc) > 0) {
@@ -5406,7 +5646,7 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd)
 		res->in_erp = 0;
 	}
 	scsi_dma_unmap(ipr_cmd->scsi_cmd);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 	scsi_cmd->scsi_done(scsi_cmd);
 }
 
@@ -5790,7 +6030,7 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg,
 	}
 
 	scsi_dma_unmap(ipr_cmd->scsi_cmd);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 	scsi_cmd->scsi_done(scsi_cmd);
 }
 
@@ -5809,21 +6049,21 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd)
 	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 	struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
 	u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
-	unsigned long lock_flags;
+	unsigned long hrrq_flags;
 
 	scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len));
 
 	if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) {
 		scsi_dma_unmap(scsi_cmd);
 
-		spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-		list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+		spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags);
+		list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 		scsi_cmd->scsi_done(scsi_cmd);
-		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+		spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags);
 	} else {
-		spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+		spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags);
 		ipr_erp_start(ioa_cfg, ipr_cmd);
-		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+		spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags);
 	}
 }
 
@@ -5846,22 +6086,34 @@ static int ipr_queuecommand(struct Scsi_Host *shost,
 	struct ipr_resource_entry *res;
 	struct ipr_ioarcb *ioarcb;
 	struct ipr_cmnd *ipr_cmd;
-	unsigned long lock_flags;
+	unsigned long hrrq_flags, lock_flags;
 	int rc;
+	struct ipr_hrr_queue *hrrq;
+	int hrrq_id;
 
 	ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
 
-	spin_lock_irqsave(shost->host_lock, lock_flags);
 	scsi_cmd->result = (DID_OK << 16);
 	res = scsi_cmd->device->hostdata;
 
+	if (ipr_is_gata(res) && res->sata_port) {
+		spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+		rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap);
+		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+		return rc;
+	}
+
+	hrrq_id = ipr_get_hrrq_index(ioa_cfg);
+	hrrq = &ioa_cfg->hrrq[hrrq_id];
+
+	spin_lock_irqsave(hrrq->lock, hrrq_flags);
 	/*
 	 * We are currently blocking all devices due to a host reset
 	 * We have told the host to stop giving us new requests, but
 	 * ERP ops don't count. FIXME
 	 */
-	if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead)) {
-		spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	if (unlikely(!hrrq->allow_cmds && !hrrq->ioa_is_dead && !hrrq->removing_ioa)) {
+		spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 
@@ -5869,19 +6121,17 @@ static int ipr_queuecommand(struct Scsi_Host *shost,
 	 * FIXME - Create scsi_set_host_offline interface
 	 *  and the ioa_is_dead check can be removed
 	 */
-	if (unlikely(ioa_cfg->ioa_is_dead || !res)) {
-		spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	if (unlikely(hrrq->ioa_is_dead || hrrq->removing_ioa || !res)) {
+		spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 		goto err_nodev;
 	}
 
-	if (ipr_is_gata(res) && res->sata_port) {
-		rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap);
-		spin_unlock_irqrestore(shost->host_lock, lock_flags);
-		return rc;
+	ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq);
+	if (ipr_cmd == NULL) {
+		spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
+		return SCSI_MLQUEUE_HOST_BUSY;
 	}
-
-	ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg);
-	spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 
 	ipr_init_ipr_cmnd(ipr_cmd, ipr_scsi_done);
 	ioarcb = &ipr_cmd->ioarcb;
@@ -5902,26 +6152,27 @@ static int ipr_queuecommand(struct Scsi_Host *shost,
 	}
 
 	if (scsi_cmd->cmnd[0] >= 0xC0 &&
-	    (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE))
+	    (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) {
 		ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
+	}
 
 	if (ioa_cfg->sis64)
 		rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd);
 	else
 		rc = ipr_build_ioadl(ioa_cfg, ipr_cmd);
 
-	spin_lock_irqsave(shost->host_lock, lock_flags);
-	if (unlikely(rc || (!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))) {
-		list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
-		spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	spin_lock_irqsave(hrrq->lock, hrrq_flags);
+	if (unlikely(rc || (!hrrq->allow_cmds && !hrrq->ioa_is_dead))) {
+		list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q);
+		spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 		if (!rc)
 			scsi_dma_unmap(scsi_cmd);
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 
-	if (unlikely(ioa_cfg->ioa_is_dead)) {
-		list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
-		spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	if (unlikely(hrrq->ioa_is_dead)) {
+		list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q);
+		spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 		scsi_dma_unmap(scsi_cmd);
 		goto err_nodev;
 	}
@@ -5931,18 +6182,18 @@ static int ipr_queuecommand(struct Scsi_Host *shost,
 		ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE;
 		res->needs_sync_complete = 0;
 	}
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+	list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_pending_q);
 	ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res));
 	ipr_send_command(ipr_cmd);
-	spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 	return 0;
 
 err_nodev:
-	spin_lock_irqsave(shost->host_lock, lock_flags);
+	spin_lock_irqsave(hrrq->lock, hrrq_flags);
 	memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
 	scsi_cmd->result = (DID_NO_CONNECT << 16);
 	scsi_cmd->scsi_done(scsi_cmd);
-	spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 	return 0;
 }
 
@@ -6040,7 +6291,7 @@ static void ipr_ata_phy_reset(struct ata_port *ap)
 		spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
 	}
 
-	if (!ioa_cfg->allow_cmds)
+	if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds)
 		goto out_unlock;
 
 	rc = ipr_device_reset(ioa_cfg, res);
@@ -6071,6 +6322,7 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc)
 	struct ipr_sata_port *sata_port = qc->ap->private_data;
 	struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg;
 	struct ipr_cmnd *ipr_cmd;
+	struct ipr_hrr_queue *hrrq;
 	unsigned long flags;
 
 	spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
@@ -6080,11 +6332,15 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc)
 		spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
 	}
 
-	list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) {
-		if (ipr_cmd->qc == qc) {
-			ipr_device_reset(ioa_cfg, sata_port->res);
-			break;
+	for_each_hrrq(hrrq, ioa_cfg) {
+		spin_lock(&hrrq->_lock);
+		list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
+			if (ipr_cmd->qc == qc) {
+				ipr_device_reset(ioa_cfg, sata_port->res);
+				break;
+			}
 		}
+		spin_unlock(&hrrq->_lock);
 	}
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 }
@@ -6133,6 +6389,7 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd)
 	struct ipr_resource_entry *res = sata_port->res;
 	u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
+	spin_lock(&ipr_cmd->hrrq->_lock);
 	if (ipr_cmd->ioa_cfg->sis64)
 		memcpy(&sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata,
 		       sizeof(struct ipr_ioasa_gata));
@@ -6148,7 +6405,8 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd)
 		qc->err_mask |= __ac_err_mask(sata_port->ioasa.status);
 	else
 		qc->err_mask |= ac_err_mask(sata_port->ioasa.status);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
+	spin_unlock(&ipr_cmd->hrrq->_lock);
 	ata_qc_complete(qc);
 }
 
@@ -6244,6 +6502,48 @@ static void ipr_build_ata_ioadl(struct ipr_cmnd *ipr_cmd,
 }
 
 /**
+ * ipr_qc_defer - Get a free ipr_cmd
+ * @qc:	queued command
+ *
+ * Return value:
+ *	0 if success
+ **/
+static int ipr_qc_defer(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct ipr_sata_port *sata_port = ap->private_data;
+	struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg;
+	struct ipr_cmnd *ipr_cmd;
+	struct ipr_hrr_queue *hrrq;
+	int hrrq_id;
+
+	hrrq_id = ipr_get_hrrq_index(ioa_cfg);
+	hrrq = &ioa_cfg->hrrq[hrrq_id];
+
+	qc->lldd_task = NULL;
+	spin_lock(&hrrq->_lock);
+	if (unlikely(hrrq->ioa_is_dead)) {
+		spin_unlock(&hrrq->_lock);
+		return 0;
+	}
+
+	if (unlikely(!hrrq->allow_cmds)) {
+		spin_unlock(&hrrq->_lock);
+		return ATA_DEFER_LINK;
+	}
+
+	ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq);
+	if (ipr_cmd == NULL) {
+		spin_unlock(&hrrq->_lock);
+		return ATA_DEFER_LINK;
+	}
+
+	qc->lldd_task = ipr_cmd;
+	spin_unlock(&hrrq->_lock);
+	return 0;
+}
+
+/**
  * ipr_qc_issue - Issue a SATA qc to a device
  * @qc:	queued command
  *
@@ -6260,10 +6560,23 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc)
 	struct ipr_ioarcb *ioarcb;
 	struct ipr_ioarcb_ata_regs *regs;
 
-	if (unlikely(!ioa_cfg->allow_cmds || ioa_cfg->ioa_is_dead))
+	if (qc->lldd_task == NULL)
+		ipr_qc_defer(qc);
+
+	ipr_cmd = qc->lldd_task;
+	if (ipr_cmd == NULL)
 		return AC_ERR_SYSTEM;
 
-	ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg);
+	qc->lldd_task = NULL;
+	spin_lock(&ipr_cmd->hrrq->_lock);
+	if (unlikely(!ipr_cmd->hrrq->allow_cmds ||
+			ipr_cmd->hrrq->ioa_is_dead)) {
+		list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
+		spin_unlock(&ipr_cmd->hrrq->_lock);
+		return AC_ERR_SYSTEM;
+	}
+
+	ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done);
 	ioarcb = &ipr_cmd->ioarcb;
 
 	if (ioa_cfg->sis64) {
@@ -6275,7 +6588,7 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc)
 	memset(regs, 0, sizeof(*regs));
 	ioarcb->add_cmd_parms_len = cpu_to_be16(sizeof(*regs));
 
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 	ipr_cmd->qc = qc;
 	ipr_cmd->done = ipr_sata_done;
 	ipr_cmd->ioarcb.res_handle = res->res_handle;
@@ -6315,10 +6628,12 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc)
 
 	default:
 		WARN_ON(1);
+		spin_unlock(&ipr_cmd->hrrq->_lock);
 		return AC_ERR_INVALID;
 	}
 
 	ipr_send_command(ipr_cmd);
+	spin_unlock(&ipr_cmd->hrrq->_lock);
 
 	return 0;
 }
@@ -6357,6 +6672,7 @@ static struct ata_port_operations ipr_sata_ops = {
 	.hardreset = ipr_sata_reset,
 	.post_internal_cmd = ipr_ata_post_internal,
 	.qc_prep = ata_noop_qc_prep,
+	.qc_defer = ipr_qc_defer,
 	.qc_issue = ipr_qc_issue,
 	.qc_fill_rtf = ipr_qc_fill_rtf,
 	.port_start = ata_sas_port_start,
@@ -6425,14 +6741,17 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd)
 	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 
 	ENTER;
+	if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) {
+		ipr_trace;
+		spin_unlock_irq(ioa_cfg->host->host_lock);
+		scsi_unblock_requests(ioa_cfg->host);
+		spin_lock_irq(ioa_cfg->host->host_lock);
+	}
+
 	ioa_cfg->in_reset_reload = 0;
 	ioa_cfg->reset_retries = 0;
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 	wake_up_all(&ioa_cfg->reset_wait_q);
-
-	spin_unlock_irq(ioa_cfg->host->host_lock);
-	scsi_unblock_requests(ioa_cfg->host);
-	spin_lock_irq(ioa_cfg->host->host_lock);
 	LEAVE;
 
 	return IPR_RC_JOB_RETURN;
@@ -6454,11 +6773,16 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd)
 	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 	struct ipr_resource_entry *res;
 	struct ipr_hostrcb *hostrcb, *temp;
-	int i = 0;
+	int i = 0, j;
 
 	ENTER;
 	ioa_cfg->in_reset_reload = 0;
-	ioa_cfg->allow_cmds = 1;
+	for (j = 0; j < ioa_cfg->hrrq_num; j++) {
+		spin_lock(&ioa_cfg->hrrq[j]._lock);
+		ioa_cfg->hrrq[j].allow_cmds = 1;
+		spin_unlock(&ioa_cfg->hrrq[j]._lock);
+	}
+	wmb();
 	ioa_cfg->reset_cmd = NULL;
 	ioa_cfg->doorbell |= IPR_RUNTIME_RESET;
 
@@ -6482,14 +6806,14 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd)
 	dev_info(&ioa_cfg->pdev->dev, "IOA initialized.\n");
 
 	ioa_cfg->reset_retries = 0;
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 	wake_up_all(&ioa_cfg->reset_wait_q);
 
 	spin_unlock(ioa_cfg->host->host_lock);
 	scsi_unblock_requests(ioa_cfg->host);
 	spin_lock(ioa_cfg->host->host_lock);
 
-	if (!ioa_cfg->allow_cmds)
+	if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds)
 		scsi_block_requests(ioa_cfg->host);
 
 	LEAVE;
@@ -6560,9 +6884,11 @@ static int ipr_set_supported_devs(struct ipr_cmnd *ipr_cmd)
 
 		if (!ioa_cfg->sis64)
 			ipr_cmd->job_step = ipr_set_supported_devs;
+		LEAVE;
 		return IPR_RC_JOB_RETURN;
 	}
 
+	LEAVE;
 	return IPR_RC_JOB_CONTINUE;
 }
 
@@ -6820,7 +7146,7 @@ static int ipr_reset_cmd_failed(struct ipr_cmnd *ipr_cmd)
 		ipr_cmd->ioarcb.cmd_pkt.cdb[0], ioasc);
 
 	ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 	return IPR_RC_JOB_RETURN;
 }
 
@@ -7278,46 +7604,71 @@ static int ipr_ioafp_identify_hrrq(struct ipr_cmnd *ipr_cmd)
 {
 	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 	struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb;
+	struct ipr_hrr_queue *hrrq;
 
 	ENTER;
+	ipr_cmd->job_step = ipr_ioafp_std_inquiry;
 	dev_info(&ioa_cfg->pdev->dev, "Starting IOA initialization sequence.\n");
 
-	ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q;
-	ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE);
+	if (ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) {
+		hrrq = &ioa_cfg->hrrq[ioa_cfg->identify_hrrq_index];
 
-	ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
-	if (ioa_cfg->sis64)
-		ioarcb->cmd_pkt.cdb[1] = 0x1;
-	ioarcb->cmd_pkt.cdb[2] =
-		((u64) ioa_cfg->host_rrq_dma >> 24) & 0xff;
-	ioarcb->cmd_pkt.cdb[3] =
-		((u64) ioa_cfg->host_rrq_dma >> 16) & 0xff;
-	ioarcb->cmd_pkt.cdb[4] =
-		((u64) ioa_cfg->host_rrq_dma >> 8) & 0xff;
-	ioarcb->cmd_pkt.cdb[5] =
-		((u64) ioa_cfg->host_rrq_dma) & 0xff;
-	ioarcb->cmd_pkt.cdb[7] =
-		((sizeof(u32) * IPR_NUM_CMD_BLKS) >> 8) & 0xff;
-	ioarcb->cmd_pkt.cdb[8] =
-		(sizeof(u32) * IPR_NUM_CMD_BLKS) & 0xff;
+		ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q;
+		ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE);
 
-	if (ioa_cfg->sis64) {
-		ioarcb->cmd_pkt.cdb[10] =
-			((u64) ioa_cfg->host_rrq_dma >> 56) & 0xff;
-		ioarcb->cmd_pkt.cdb[11] =
-			((u64) ioa_cfg->host_rrq_dma >> 48) & 0xff;
-		ioarcb->cmd_pkt.cdb[12] =
-			((u64) ioa_cfg->host_rrq_dma >> 40) & 0xff;
-		ioarcb->cmd_pkt.cdb[13] =
-			((u64) ioa_cfg->host_rrq_dma >> 32) & 0xff;
-	}
+		ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
+		if (ioa_cfg->sis64)
+			ioarcb->cmd_pkt.cdb[1] = 0x1;
 
-	ipr_cmd->job_step = ipr_ioafp_std_inquiry;
+		if (ioa_cfg->nvectors == 1)
+			ioarcb->cmd_pkt.cdb[1] &= ~IPR_ID_HRRQ_SELE_ENABLE;
+		else
+			ioarcb->cmd_pkt.cdb[1] |= IPR_ID_HRRQ_SELE_ENABLE;
+
+		ioarcb->cmd_pkt.cdb[2] =
+			((u64) hrrq->host_rrq_dma >> 24) & 0xff;
+		ioarcb->cmd_pkt.cdb[3] =
+			((u64) hrrq->host_rrq_dma >> 16) & 0xff;
+		ioarcb->cmd_pkt.cdb[4] =
+			((u64) hrrq->host_rrq_dma >> 8) & 0xff;
+		ioarcb->cmd_pkt.cdb[5] =
+			((u64) hrrq->host_rrq_dma) & 0xff;
+		ioarcb->cmd_pkt.cdb[7] =
+			((sizeof(u32) * hrrq->size) >> 8) & 0xff;
+		ioarcb->cmd_pkt.cdb[8] =
+			(sizeof(u32) * hrrq->size) & 0xff;
+
+		if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE)
+			ioarcb->cmd_pkt.cdb[9] =
+					ioa_cfg->identify_hrrq_index;
 
-	ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, IPR_INTERNAL_TIMEOUT);
+		if (ioa_cfg->sis64) {
+			ioarcb->cmd_pkt.cdb[10] =
+				((u64) hrrq->host_rrq_dma >> 56) & 0xff;
+			ioarcb->cmd_pkt.cdb[11] =
+				((u64) hrrq->host_rrq_dma >> 48) & 0xff;
+			ioarcb->cmd_pkt.cdb[12] =
+				((u64) hrrq->host_rrq_dma >> 40) & 0xff;
+			ioarcb->cmd_pkt.cdb[13] =
+				((u64) hrrq->host_rrq_dma >> 32) & 0xff;
+		}
+
+		if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE)
+			ioarcb->cmd_pkt.cdb[14] =
+					ioa_cfg->identify_hrrq_index;
+
+		ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout,
+			   IPR_INTERNAL_TIMEOUT);
+
+		if (++ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num)
+			ipr_cmd->job_step = ipr_ioafp_identify_hrrq;
+
+		LEAVE;
+		return IPR_RC_JOB_RETURN;
+	}
 
 	LEAVE;
-	return IPR_RC_JOB_RETURN;
+	return IPR_RC_JOB_CONTINUE;
 }
 
 /**
@@ -7365,7 +7716,9 @@ static void ipr_reset_timer_done(struct ipr_cmnd *ipr_cmd)
 static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd,
 				  unsigned long timeout)
 {
-	list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q);
+
+	ENTER;
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 	ipr_cmd->done = ipr_reset_ioa_job;
 
 	ipr_cmd->timer.data = (unsigned long) ipr_cmd;
@@ -7383,13 +7736,26 @@ static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd,
  **/
 static void ipr_init_ioa_mem(struct ipr_ioa_cfg *ioa_cfg)
 {
-	memset(ioa_cfg->host_rrq, 0, sizeof(u32) * IPR_NUM_CMD_BLKS);
+	struct ipr_hrr_queue *hrrq;
 
-	/* Initialize Host RRQ pointers */
-	ioa_cfg->hrrq_start = ioa_cfg->host_rrq;
-	ioa_cfg->hrrq_end = &ioa_cfg->host_rrq[IPR_NUM_CMD_BLKS - 1];
-	ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start;
-	ioa_cfg->toggle_bit = 1;
+	for_each_hrrq(hrrq, ioa_cfg) {
+		spin_lock(&hrrq->_lock);
+		memset(hrrq->host_rrq, 0, sizeof(u32) * hrrq->size);
+
+		/* Initialize Host RRQ pointers */
+		hrrq->hrrq_start = hrrq->host_rrq;
+		hrrq->hrrq_end = &hrrq->host_rrq[hrrq->size - 1];
+		hrrq->hrrq_curr = hrrq->hrrq_start;
+		hrrq->toggle_bit = 1;
+		spin_unlock(&hrrq->_lock);
+	}
+	wmb();
+
+	ioa_cfg->identify_hrrq_index = 0;
+	if (ioa_cfg->hrrq_num == 1)
+		atomic_set(&ioa_cfg->hrrq_index, 0);
+	else
+		atomic_set(&ioa_cfg->hrrq_index, 1);
 
 	/* Zero out config table */
 	memset(ioa_cfg->u.cfg_table, 0, ioa_cfg->cfg_table_size);
@@ -7446,7 +7812,8 @@ static int ipr_reset_next_stage(struct ipr_cmnd *ipr_cmd)
 	ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout;
 	ipr_cmd->done = ipr_reset_ioa_job;
 	add_timer(&ipr_cmd->timer);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 
 	return IPR_RC_JOB_RETURN;
 }
@@ -7466,12 +7833,18 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd)
 	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 	volatile u32 int_reg;
 	volatile u64 maskval;
+	int i;
 
 	ENTER;
 	ipr_cmd->job_step = ipr_ioafp_identify_hrrq;
 	ipr_init_ioa_mem(ioa_cfg);
 
-	ioa_cfg->allow_interrupts = 1;
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		spin_lock(&ioa_cfg->hrrq[i]._lock);
+		ioa_cfg->hrrq[i].allow_interrupts = 1;
+		spin_unlock(&ioa_cfg->hrrq[i]._lock);
+	}
+	wmb();
 	if (ioa_cfg->sis64) {
 		/* Set the adapter to the correct endian mode. */
 		writel(IPR_ENDIAN_SWAP_KEY, ioa_cfg->regs.endian_swap_reg);
@@ -7511,7 +7884,7 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd)
 	ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout;
 	ipr_cmd->done = ipr_reset_ioa_job;
 	add_timer(&ipr_cmd->timer);
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 
 	LEAVE;
 	return IPR_RC_JOB_RETURN;
@@ -8030,7 +8403,8 @@ static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd)
 	int rc = IPR_RC_JOB_CONTINUE;
 
 	ENTER;
-	if (shutdown_type != IPR_SHUTDOWN_NONE && !ioa_cfg->ioa_is_dead) {
+	if (shutdown_type != IPR_SHUTDOWN_NONE &&
+			!ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) {
 		ipr_cmd->ioarcb.res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE);
 		ipr_cmd->ioarcb.cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
 		ipr_cmd->ioarcb.cmd_pkt.cdb[0] = IPR_IOA_SHUTDOWN;
@@ -8078,7 +8452,8 @@ static void ipr_reset_ioa_job(struct ipr_cmnd *ipr_cmd)
 			 * We are doing nested adapter resets and this is
 			 * not the current reset job.
 			 */
-			list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+			list_add_tail(&ipr_cmd->queue,
+					&ipr_cmd->hrrq->hrrq_free_q);
 			return;
 		}
 
@@ -8113,10 +8488,17 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
 				    enum ipr_shutdown_type shutdown_type)
 {
 	struct ipr_cmnd *ipr_cmd;
+	int i;
 
 	ioa_cfg->in_reset_reload = 1;
-	ioa_cfg->allow_cmds = 0;
-	scsi_block_requests(ioa_cfg->host);
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		spin_lock(&ioa_cfg->hrrq[i]._lock);
+		ioa_cfg->hrrq[i].allow_cmds = 0;
+		spin_unlock(&ioa_cfg->hrrq[i]._lock);
+	}
+	wmb();
+	if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa)
+		scsi_block_requests(ioa_cfg->host);
 
 	ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg);
 	ioa_cfg->reset_cmd = ipr_cmd;
@@ -8141,7 +8523,9 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
 static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
 				   enum ipr_shutdown_type shutdown_type)
 {
-	if (ioa_cfg->ioa_is_dead)
+	int i;
+
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead)
 		return;
 
 	if (ioa_cfg->in_reset_reload) {
@@ -8156,7 +8540,12 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
 			"IOA taken offline - error recovery failed\n");
 
 		ioa_cfg->reset_retries = 0;
-		ioa_cfg->ioa_is_dead = 1;
+		for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+			spin_lock(&ioa_cfg->hrrq[i]._lock);
+			ioa_cfg->hrrq[i].ioa_is_dead = 1;
+			spin_unlock(&ioa_cfg->hrrq[i]._lock);
+		}
+		wmb();
 
 		if (ioa_cfg->in_ioa_bringdown) {
 			ioa_cfg->reset_cmd = NULL;
@@ -8164,9 +8553,11 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
 			ipr_fail_all_ops(ioa_cfg);
 			wake_up_all(&ioa_cfg->reset_wait_q);
 
-			spin_unlock_irq(ioa_cfg->host->host_lock);
-			scsi_unblock_requests(ioa_cfg->host);
-			spin_lock_irq(ioa_cfg->host->host_lock);
+			if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) {
+				spin_unlock_irq(ioa_cfg->host->host_lock);
+				scsi_unblock_requests(ioa_cfg->host);
+				spin_lock_irq(ioa_cfg->host->host_lock);
+			}
 			return;
 		} else {
 			ioa_cfg->in_ioa_bringdown = 1;
@@ -8188,9 +8579,17 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
  */
 static int ipr_reset_freeze(struct ipr_cmnd *ipr_cmd)
 {
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+	int i;
+
 	/* Disallow new interrupts, avoid loop */
-	ipr_cmd->ioa_cfg->allow_interrupts = 0;
-	list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q);
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		spin_lock(&ioa_cfg->hrrq[i]._lock);
+		ioa_cfg->hrrq[i].allow_interrupts = 0;
+		spin_unlock(&ioa_cfg->hrrq[i]._lock);
+	}
+	wmb();
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 	ipr_cmd->done = ipr_reset_ioa_job;
 	return IPR_RC_JOB_RETURN;
 }
@@ -8247,13 +8646,19 @@ static void ipr_pci_perm_failure(struct pci_dev *pdev)
 {
 	unsigned long flags = 0;
 	struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
+	int i;
 
 	spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
 	if (ioa_cfg->sdt_state == WAIT_FOR_DUMP)
 		ioa_cfg->sdt_state = ABORT_DUMP;
 	ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES;
 	ioa_cfg->in_ioa_bringdown = 1;
-	ioa_cfg->allow_cmds = 0;
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		spin_lock(&ioa_cfg->hrrq[i]._lock);
+		ioa_cfg->hrrq[i].allow_cmds = 0;
+		spin_unlock(&ioa_cfg->hrrq[i]._lock);
+	}
+	wmb();
 	ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 }
@@ -8310,12 +8715,11 @@ static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg)
 	} else
 		_ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_enable_ioa,
 					IPR_SHUTDOWN_NONE);
-
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
 	wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
 	spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags);
 
-	if (ioa_cfg->ioa_is_dead) {
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) {
 		rc = -EIO;
 	} else if (ipr_invalid_adapter(ioa_cfg)) {
 		if (!ipr_testmode)
@@ -8376,8 +8780,13 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg)
 	pci_free_consistent(ioa_cfg->pdev, sizeof(struct ipr_misc_cbs),
 			    ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma);
 	ipr_free_cmd_blks(ioa_cfg);
-	pci_free_consistent(ioa_cfg->pdev, sizeof(u32) * IPR_NUM_CMD_BLKS,
-			    ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma);
+
+	for (i = 0; i < ioa_cfg->hrrq_num; i++)
+		pci_free_consistent(ioa_cfg->pdev,
+					sizeof(u32) * ioa_cfg->hrrq[i].size,
+					ioa_cfg->hrrq[i].host_rrq,
+					ioa_cfg->hrrq[i].host_rrq_dma);
+
 	pci_free_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size,
 			    ioa_cfg->u.cfg_table,
 			    ioa_cfg->cfg_table_dma);
@@ -8408,8 +8817,23 @@ static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg)
 	struct pci_dev *pdev = ioa_cfg->pdev;
 
 	ENTER;
-	free_irq(pdev->irq, ioa_cfg);
-	pci_disable_msi(pdev);
+	if (ioa_cfg->intr_flag == IPR_USE_MSI ||
+	    ioa_cfg->intr_flag == IPR_USE_MSIX) {
+		int i;
+		for (i = 0; i < ioa_cfg->nvectors; i++)
+			free_irq(ioa_cfg->vectors_info[i].vec,
+				&ioa_cfg->hrrq[i]);
+	} else
+		free_irq(pdev->irq, &ioa_cfg->hrrq[0]);
+
+	if (ioa_cfg->intr_flag == IPR_USE_MSI) {
+		pci_disable_msi(pdev);
+		ioa_cfg->intr_flag &= ~IPR_USE_MSI;
+	} else if (ioa_cfg->intr_flag == IPR_USE_MSIX) {
+		pci_disable_msix(pdev);
+		ioa_cfg->intr_flag &= ~IPR_USE_MSIX;
+	}
+
 	iounmap(ioa_cfg->hdw_dma_regs);
 	pci_release_regions(pdev);
 	ipr_free_mem(ioa_cfg);
@@ -8430,7 +8854,7 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg)
 	struct ipr_cmnd *ipr_cmd;
 	struct ipr_ioarcb *ioarcb;
 	dma_addr_t dma_addr;
-	int i;
+	int i, entries_each_hrrq, hrrq_id = 0;
 
 	ioa_cfg->ipr_cmd_pool = pci_pool_create(IPR_NAME, ioa_cfg->pdev,
 						sizeof(struct ipr_cmnd), 512, 0);
@@ -8446,6 +8870,41 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg)
 		return -ENOMEM;
 	}
 
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		if (ioa_cfg->hrrq_num > 1) {
+			if (i == 0) {
+				entries_each_hrrq = IPR_NUM_INTERNAL_CMD_BLKS;
+				ioa_cfg->hrrq[i].min_cmd_id = 0;
+					ioa_cfg->hrrq[i].max_cmd_id =
+						(entries_each_hrrq - 1);
+			} else {
+				entries_each_hrrq =
+					IPR_NUM_BASE_CMD_BLKS/
+					(ioa_cfg->hrrq_num - 1);
+				ioa_cfg->hrrq[i].min_cmd_id =
+					IPR_NUM_INTERNAL_CMD_BLKS +
+					(i - 1) * entries_each_hrrq;
+				ioa_cfg->hrrq[i].max_cmd_id =
+					(IPR_NUM_INTERNAL_CMD_BLKS +
+					i * entries_each_hrrq - 1);
+			}
+		} else {
+			entries_each_hrrq = IPR_NUM_CMD_BLKS;
+			ioa_cfg->hrrq[i].min_cmd_id = 0;
+			ioa_cfg->hrrq[i].max_cmd_id = (entries_each_hrrq - 1);
+		}
+		ioa_cfg->hrrq[i].size = entries_each_hrrq;
+	}
+
+	BUG_ON(ioa_cfg->hrrq_num == 0);
+
+	i = IPR_NUM_CMD_BLKS -
+		ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id - 1;
+	if (i > 0) {
+		ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].size += i;
+		ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id += i;
+	}
+
 	for (i = 0; i < IPR_NUM_CMD_BLKS; i++) {
 		ipr_cmd = pci_pool_alloc(ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr);
 
@@ -8484,7 +8943,11 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg)
 		ipr_cmd->sense_buffer_dma = dma_addr +
 			offsetof(struct ipr_cmnd, sense_buffer);
 
-		list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+		ipr_cmd->ioarcb.cmd_pkt.hrrq_id = hrrq_id;
+		ipr_cmd->hrrq = &ioa_cfg->hrrq[hrrq_id];
+		list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
+		if (i >= ioa_cfg->hrrq[hrrq_id].max_cmd_id)
+			hrrq_id++;
 	}
 
 	return 0;
@@ -8516,6 +8979,10 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg)
 					     BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL);
 		ioa_cfg->vset_ids = kzalloc(sizeof(unsigned long) *
 					    BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL);
+
+		if (!ioa_cfg->target_ids || !ioa_cfg->array_ids
+			|| !ioa_cfg->vset_ids)
+			goto out_free_res_entries;
 	}
 
 	for (i = 0; i < ioa_cfg->max_devs_supported; i++) {
@@ -8530,15 +8997,34 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg)
 	if (!ioa_cfg->vpd_cbs)
 		goto out_free_res_entries;
 
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_free_q);
+		INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_pending_q);
+		spin_lock_init(&ioa_cfg->hrrq[i]._lock);
+		if (i == 0)
+			ioa_cfg->hrrq[i].lock = ioa_cfg->host->host_lock;
+		else
+			ioa_cfg->hrrq[i].lock = &ioa_cfg->hrrq[i]._lock;
+	}
+
 	if (ipr_alloc_cmd_blks(ioa_cfg))
 		goto out_free_vpd_cbs;
 
-	ioa_cfg->host_rrq = pci_alloc_consistent(ioa_cfg->pdev,
-						 sizeof(u32) * IPR_NUM_CMD_BLKS,
-						 &ioa_cfg->host_rrq_dma);
-
-	if (!ioa_cfg->host_rrq)
-		goto out_ipr_free_cmd_blocks;
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		ioa_cfg->hrrq[i].host_rrq = pci_alloc_consistent(ioa_cfg->pdev,
+					sizeof(u32) * ioa_cfg->hrrq[i].size,
+					&ioa_cfg->hrrq[i].host_rrq_dma);
+
+		if (!ioa_cfg->hrrq[i].host_rrq)  {
+			while (--i > 0)
+				pci_free_consistent(pdev,
+					sizeof(u32) * ioa_cfg->hrrq[i].size,
+					ioa_cfg->hrrq[i].host_rrq,
+					ioa_cfg->hrrq[i].host_rrq_dma);
+			goto out_ipr_free_cmd_blocks;
+		}
+		ioa_cfg->hrrq[i].ioa_cfg = ioa_cfg;
+	}
 
 	ioa_cfg->u.cfg_table = pci_alloc_consistent(ioa_cfg->pdev,
 						    ioa_cfg->cfg_table_size,
@@ -8582,8 +9068,12 @@ out_free_hostrcb_dma:
 			    ioa_cfg->u.cfg_table,
 			    ioa_cfg->cfg_table_dma);
 out_free_host_rrq:
-	pci_free_consistent(pdev, sizeof(u32) * IPR_NUM_CMD_BLKS,
-			    ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma);
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		pci_free_consistent(pdev,
+				sizeof(u32) * ioa_cfg->hrrq[i].size,
+				ioa_cfg->hrrq[i].host_rrq,
+				ioa_cfg->hrrq[i].host_rrq_dma);
+	}
 out_ipr_free_cmd_blocks:
 	ipr_free_cmd_blks(ioa_cfg);
 out_free_vpd_cbs:
@@ -8591,6 +9081,9 @@ out_free_vpd_cbs:
 			    ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma);
 out_free_res_entries:
 	kfree(ioa_cfg->res_entries);
+	kfree(ioa_cfg->target_ids);
+	kfree(ioa_cfg->array_ids);
+	kfree(ioa_cfg->vset_ids);
 	goto out;
 }
 
@@ -8638,15 +9131,11 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
 	ioa_cfg->doorbell = IPR_DOORBELL;
 	sprintf(ioa_cfg->eye_catcher, IPR_EYECATCHER);
 	sprintf(ioa_cfg->trace_start, IPR_TRACE_START_LABEL);
-	sprintf(ioa_cfg->ipr_free_label, IPR_FREEQ_LABEL);
-	sprintf(ioa_cfg->ipr_pending_label, IPR_PENDQ_LABEL);
 	sprintf(ioa_cfg->cfg_table_start, IPR_CFG_TBL_START);
 	sprintf(ioa_cfg->resource_table_label, IPR_RES_TABLE_LABEL);
 	sprintf(ioa_cfg->ipr_hcam_label, IPR_HCAM_LABEL);
 	sprintf(ioa_cfg->ipr_cmd_label, IPR_CMD_LABEL);
 
-	INIT_LIST_HEAD(&ioa_cfg->free_q);
-	INIT_LIST_HEAD(&ioa_cfg->pending_q);
 	INIT_LIST_HEAD(&ioa_cfg->hostrcb_free_q);
 	INIT_LIST_HEAD(&ioa_cfg->hostrcb_pending_q);
 	INIT_LIST_HEAD(&ioa_cfg->free_res_q);
@@ -8724,6 +9213,88 @@ ipr_get_chip_info(const struct pci_device_id *dev_id)
 	return NULL;
 }
 
+static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg)
+{
+	struct msix_entry entries[IPR_MAX_MSIX_VECTORS];
+	int i, err, vectors;
+
+	for (i = 0; i < ARRAY_SIZE(entries); ++i)
+		entries[i].entry = i;
+
+	vectors = ipr_number_of_msix;
+
+	while ((err = pci_enable_msix(ioa_cfg->pdev, entries, vectors)) > 0)
+			vectors = err;
+
+	if (err < 0) {
+		pci_disable_msix(ioa_cfg->pdev);
+		return err;
+	}
+
+	if (!err) {
+		for (i = 0; i < vectors; i++)
+			ioa_cfg->vectors_info[i].vec = entries[i].vector;
+		ioa_cfg->nvectors = vectors;
+	}
+
+	return err;
+}
+
+static int ipr_enable_msi(struct ipr_ioa_cfg *ioa_cfg)
+{
+	int i, err, vectors;
+
+	vectors = ipr_number_of_msix;
+
+	while ((err = pci_enable_msi_block(ioa_cfg->pdev, vectors)) > 0)
+			vectors = err;
+
+	if (err < 0) {
+		pci_disable_msi(ioa_cfg->pdev);
+		return err;
+	}
+
+	if (!err) {
+		for (i = 0; i < vectors; i++)
+			ioa_cfg->vectors_info[i].vec = ioa_cfg->pdev->irq + i;
+		ioa_cfg->nvectors = vectors;
+	}
+
+	return err;
+}
+
+static void name_msi_vectors(struct ipr_ioa_cfg *ioa_cfg)
+{
+	int vec_idx, n = sizeof(ioa_cfg->vectors_info[0].desc) - 1;
+
+	for (vec_idx = 0; vec_idx < ioa_cfg->nvectors; vec_idx++) {
+		snprintf(ioa_cfg->vectors_info[vec_idx].desc, n,
+			 "host%d-%d", ioa_cfg->host->host_no, vec_idx);
+		ioa_cfg->vectors_info[vec_idx].
+			desc[strlen(ioa_cfg->vectors_info[vec_idx].desc)] = 0;
+	}
+}
+
+static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg)
+{
+	int i, rc;
+
+	for (i = 1; i < ioa_cfg->nvectors; i++) {
+		rc = request_irq(ioa_cfg->vectors_info[i].vec,
+			ipr_isr_mhrrq,
+			0,
+			ioa_cfg->vectors_info[i].desc,
+			&ioa_cfg->hrrq[i]);
+		if (rc) {
+			while (--i >= 0)
+				free_irq(ioa_cfg->vectors_info[i].vec,
+					&ioa_cfg->hrrq[i]);
+			return rc;
+		}
+	}
+	return 0;
+}
+
 /**
  * ipr_test_intr - Handle the interrupt generated in ipr_test_msi().
  * @pdev:		PCI device struct
@@ -8740,6 +9311,7 @@ static irqreturn_t ipr_test_intr(int irq, void *devp)
 	unsigned long lock_flags = 0;
 	irqreturn_t rc = IRQ_HANDLED;
 
+	dev_info(&ioa_cfg->pdev->dev, "Received IRQ : %d\n", irq);
 	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 
 	ioa_cfg->msi_received = 1;
@@ -8787,9 +9359,9 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev)
 	writel(IPR_PCII_IO_DEBUG_ACKNOWLEDGE, ioa_cfg->regs.sense_interrupt_reg32);
 	int_reg = readl(ioa_cfg->regs.sense_interrupt_reg);
 	wait_event_timeout(ioa_cfg->msi_wait_q, ioa_cfg->msi_received, HZ);
+	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 	ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER);
 
-	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 	if (!ioa_cfg->msi_received) {
 		/* MSI test failed */
 		dev_info(&pdev->dev, "MSI test failed.  Falling back to LSI.\n");
@@ -8806,8 +9378,7 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev)
 	return rc;
 }
 
-/**
- * ipr_probe_ioa - Allocates memory and does first stage of initialization
+ /* ipr_probe_ioa - Allocates memory and does first stage of initialization
  * @pdev:		PCI device struct
  * @dev_id:		PCI device id struct
  *
@@ -8823,6 +9394,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
 	void __iomem *ipr_regs;
 	int rc = PCIBIOS_SUCCESSFUL;
 	volatile u32 mask, uproc, interrupts;
+	unsigned long lock_flags;
 
 	ENTER;
 
@@ -8918,17 +9490,56 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
 		goto cleanup_nomem;
 	}
 
-	/* Enable MSI style interrupts if they are supported. */
-	if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && !pci_enable_msi(pdev)) {
+	if (ipr_number_of_msix > IPR_MAX_MSIX_VECTORS) {
+		dev_err(&pdev->dev, "The max number of MSIX is %d\n",
+			IPR_MAX_MSIX_VECTORS);
+		ipr_number_of_msix = IPR_MAX_MSIX_VECTORS;
+	}
+
+	if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI &&
+			ipr_enable_msix(ioa_cfg) == 0)
+		ioa_cfg->intr_flag = IPR_USE_MSIX;
+	else if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI &&
+			ipr_enable_msi(ioa_cfg) == 0)
+		ioa_cfg->intr_flag = IPR_USE_MSI;
+	else {
+		ioa_cfg->intr_flag = IPR_USE_LSI;
+		ioa_cfg->nvectors = 1;
+		dev_info(&pdev->dev, "Cannot enable MSI.\n");
+	}
+
+	if (ioa_cfg->intr_flag == IPR_USE_MSI ||
+	    ioa_cfg->intr_flag == IPR_USE_MSIX) {
 		rc = ipr_test_msi(ioa_cfg, pdev);
-		if (rc == -EOPNOTSUPP)
-			pci_disable_msi(pdev);
+		if (rc == -EOPNOTSUPP) {
+			if (ioa_cfg->intr_flag == IPR_USE_MSI) {
+				ioa_cfg->intr_flag &= ~IPR_USE_MSI;
+				pci_disable_msi(pdev);
+			 } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) {
+				ioa_cfg->intr_flag &= ~IPR_USE_MSIX;
+				pci_disable_msix(pdev);
+			}
+
+			ioa_cfg->intr_flag = IPR_USE_LSI;
+			ioa_cfg->nvectors = 1;
+		}
 		else if (rc)
 			goto out_msi_disable;
-		else
-			dev_info(&pdev->dev, "MSI enabled with IRQ: %d\n", pdev->irq);
-	} else if (ipr_debug)
-		dev_info(&pdev->dev, "Cannot enable MSI.\n");
+		else {
+			if (ioa_cfg->intr_flag == IPR_USE_MSI)
+				dev_info(&pdev->dev,
+					"Request for %d MSIs succeeded with starting IRQ: %d\n",
+					ioa_cfg->nvectors, pdev->irq);
+			else if (ioa_cfg->intr_flag == IPR_USE_MSIX)
+				dev_info(&pdev->dev,
+					"Request for %d MSIXs succeeded.",
+					ioa_cfg->nvectors);
+		}
+	}
+
+	ioa_cfg->hrrq_num = min3(ioa_cfg->nvectors,
+				(unsigned int)num_online_cpus(),
+				(unsigned int)IPR_MAX_HRRQ_NUM);
 
 	/* Save away PCI config space for use following IOA reset */
 	rc = pci_save_state(pdev);
@@ -8975,11 +9586,24 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
 	if (interrupts & IPR_PCII_IOA_UNIT_CHECKED)
 		ioa_cfg->ioa_unit_checked = 1;
 
+	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 	ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER);
-	rc = request_irq(pdev->irq, ipr_isr,
-			 ioa_cfg->msi_received ? 0 : IRQF_SHARED,
-			 IPR_NAME, ioa_cfg);
+	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 
+	if (ioa_cfg->intr_flag == IPR_USE_MSI
+			|| ioa_cfg->intr_flag == IPR_USE_MSIX) {
+		name_msi_vectors(ioa_cfg);
+		rc = request_irq(ioa_cfg->vectors_info[0].vec, ipr_isr,
+			0,
+			ioa_cfg->vectors_info[0].desc,
+			&ioa_cfg->hrrq[0]);
+		if (!rc)
+			rc = ipr_request_other_msi_irqs(ioa_cfg);
+	} else {
+		rc = request_irq(pdev->irq, ipr_isr,
+			 IRQF_SHARED,
+			 IPR_NAME, &ioa_cfg->hrrq[0]);
+	}
 	if (rc) {
 		dev_err(&pdev->dev, "Couldn't register IRQ %d! rc=%d\n",
 			pdev->irq, rc);
@@ -9004,7 +9628,10 @@ out:
 cleanup_nolog:
 	ipr_free_mem(ioa_cfg);
 out_msi_disable:
-	pci_disable_msi(pdev);
+	if (ioa_cfg->intr_flag == IPR_USE_MSI)
+		pci_disable_msi(pdev);
+	else if (ioa_cfg->intr_flag == IPR_USE_MSIX)
+		pci_disable_msix(pdev);
 cleanup_nomem:
 	iounmap(ipr_regs);
 out_release_regions:
@@ -9074,6 +9701,7 @@ static void __ipr_remove(struct pci_dev *pdev)
 {
 	unsigned long host_lock_flags = 0;
 	struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
+	int i;
 	ENTER;
 
 	spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags);
@@ -9083,6 +9711,12 @@ static void __ipr_remove(struct pci_dev *pdev)
 		spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags);
 	}
 
+	for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+		spin_lock(&ioa_cfg->hrrq[i]._lock);
+		ioa_cfg->hrrq[i].removing_ioa = 1;
+		spin_unlock(&ioa_cfg->hrrq[i]._lock);
+	}
+	wmb();
 	ipr_initiate_ioa_bringdown(ioa_cfg, IPR_SHUTDOWN_NORMAL);
 
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
@@ -9138,7 +9772,7 @@ static void ipr_remove(struct pci_dev *pdev)
 static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
 {
 	struct ipr_ioa_cfg *ioa_cfg;
-	int rc;
+	int rc, i;
 
 	rc = ipr_probe_ioa(pdev, dev_id);
 
@@ -9185,6 +9819,17 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
 	scsi_add_device(ioa_cfg->host, IPR_IOA_BUS, IPR_IOA_TARGET, IPR_IOA_LUN);
 	ioa_cfg->allow_ml_add_del = 1;
 	ioa_cfg->host->max_channel = IPR_VSET_BUS;
+	ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight;
+
+	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+		for (i = 1; i < ioa_cfg->hrrq_num; i++) {
+			blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+					ioa_cfg->iopoll_weight, ipr_iopoll);
+			blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
+		}
+	}
+
 	schedule_work(&ioa_cfg->work_q);
 	return 0;
 }
@@ -9203,8 +9848,16 @@ static void ipr_shutdown(struct pci_dev *pdev)
 {
 	struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
 	unsigned long lock_flags = 0;
+	int i;
 
 	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+		ioa_cfg->iopoll_weight = 0;
+		for (i = 1; i < ioa_cfg->hrrq_num; i++)
+			blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+	}
+
 	while (ioa_cfg->in_reset_reload) {
 		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 		wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
@@ -9277,6 +9930,8 @@ static struct pci_device_id ipr_pci_table[] = {
 	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2,
 		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57B2, 0, 0, 0 },
 	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2,
+		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C0, 0, 0, 0 },
+	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2,
 		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C3, 0, 0, 0 },
 	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2,
 		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C4, 0, 0, 0 },
@@ -9290,6 +9945,14 @@ static struct pci_device_id ipr_pci_table[] = {
 		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C8, 0, 0, 0 },
 	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
 		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57CE, 0, 0, 0 },
+	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
+		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D5, 0, 0, 0 },
+	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
+		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D6, 0, 0, 0 },
+	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
+		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D7, 0, 0, 0 },
+	{ PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
+		PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D8, 0, 0, 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, ipr_pci_table);
@@ -9316,9 +9979,7 @@ static struct pci_driver ipr_driver = {
  **/
 static void ipr_halt_done(struct ipr_cmnd *ipr_cmd)
 {
-	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
-
-	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+	list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 }
 
 /**
@@ -9340,7 +10001,7 @@ static int ipr_halt(struct notifier_block *nb, ulong event, void *buf)
 
 	list_for_each_entry(ioa_cfg, &ipr_ioa_head, queue) {
 		spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
-		if (!ioa_cfg->allow_cmds) {
+		if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) {
 			spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 			continue;
 		}
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index c8a137f83bb1..21a6ff1ed5c6 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -32,14 +32,15 @@
 #include <linux/libata.h>
 #include <linux/list.h>
 #include <linux/kref.h>
+#include <linux/blk-iopoll.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 
 /*
  * Literals
  */
-#define IPR_DRIVER_VERSION "2.5.4"
-#define IPR_DRIVER_DATE "(July 11, 2012)"
+#define IPR_DRIVER_VERSION "2.6.0"
+#define IPR_DRIVER_DATE "(November 16, 2012)"
 
 /*
  * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding
@@ -82,6 +83,7 @@
 
 #define IPR_SUBS_DEV_ID_57B4    0x033B
 #define IPR_SUBS_DEV_ID_57B2    0x035F
+#define IPR_SUBS_DEV_ID_57C0    0x0352
 #define IPR_SUBS_DEV_ID_57C3    0x0353
 #define IPR_SUBS_DEV_ID_57C4    0x0354
 #define IPR_SUBS_DEV_ID_57C6    0x0357
@@ -94,6 +96,10 @@
 #define IPR_SUBS_DEV_ID_574D    0x0356
 #define IPR_SUBS_DEV_ID_57C8    0x035D
 
+#define IPR_SUBS_DEV_ID_57D5    0x03FB
+#define IPR_SUBS_DEV_ID_57D6    0x03FC
+#define IPR_SUBS_DEV_ID_57D7    0x03FF
+#define IPR_SUBS_DEV_ID_57D8    0x03FE
 #define IPR_NAME				"ipr"
 
 /*
@@ -298,6 +304,9 @@ IPR_PCII_NO_HOST_RRQ | IPR_PCII_IOARRIN_LOST | IPR_PCII_MMIO_ERROR)
  * Misc literals
  */
 #define IPR_NUM_IOADL_ENTRIES			IPR_MAX_SGLIST
+#define IPR_MAX_MSIX_VECTORS		0x5
+#define IPR_MAX_HRRQ_NUM		0x10
+#define IPR_INIT_HRRQ			0x0
 
 /*
  * Adapter interface types
@@ -404,7 +413,7 @@ struct ipr_config_table_entry64 {
 	__be64 dev_id;
 	__be64 lun;
 	__be64 lun_wwn[2];
-#define IPR_MAX_RES_PATH_LENGTH		24
+#define IPR_MAX_RES_PATH_LENGTH		48
 	__be64 res_path;
 	struct ipr_std_inq_data std_inq_data;
 	u8 reserved2[4];
@@ -459,9 +468,40 @@ struct ipr_supported_device {
 	u8 reserved2[16];
 }__attribute__((packed, aligned (4)));
 
+struct ipr_hrr_queue {
+	struct ipr_ioa_cfg *ioa_cfg;
+	__be32 *host_rrq;
+	dma_addr_t host_rrq_dma;
+#define IPR_HRRQ_REQ_RESP_HANDLE_MASK	0xfffffffc
+#define IPR_HRRQ_RESP_BIT_SET		0x00000002
+#define IPR_HRRQ_TOGGLE_BIT		0x00000001
+#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT	2
+#define IPR_ID_HRRQ_SELE_ENABLE		0x02
+	volatile __be32 *hrrq_start;
+	volatile __be32 *hrrq_end;
+	volatile __be32 *hrrq_curr;
+
+	struct list_head hrrq_free_q;
+	struct list_head hrrq_pending_q;
+	spinlock_t _lock;
+	spinlock_t *lock;
+
+	volatile u32 toggle_bit;
+	u32 size;
+	u32 min_cmd_id;
+	u32 max_cmd_id;
+	u8 allow_interrupts:1;
+	u8 ioa_is_dead:1;
+	u8 allow_cmds:1;
+	u8 removing_ioa:1;
+
+	struct blk_iopoll iopoll;
+};
+
 /* Command packet structure */
 struct ipr_cmd_pkt {
-	__be16 reserved;		/* Reserved by IOA */
+	u8 reserved;		/* Reserved by IOA */
+	u8 hrrq_id;
 	u8 request_type;
 #define IPR_RQTYPE_SCSICDB		0x00
 #define IPR_RQTYPE_IOACMD		0x01
@@ -1022,6 +1062,10 @@ struct ipr_hostrcb64_fabric_desc {
 	struct ipr_hostrcb64_config_element elem[1];
 }__attribute__((packed, aligned (8)));
 
+#define for_each_hrrq(hrrq, ioa_cfg) \
+		for (hrrq = (ioa_cfg)->hrrq; \
+			hrrq < ((ioa_cfg)->hrrq + (ioa_cfg)->hrrq_num); hrrq++)
+
 #define for_each_fabric_cfg(fabric, cfg) \
 		for (cfg = (fabric)->elem; \
 			cfg < ((fabric)->elem + be16_to_cpu((fabric)->num_entries)); \
@@ -1308,6 +1352,7 @@ struct ipr_chip_cfg_t {
 	u16 max_cmds;
 	u8 cache_line_size;
 	u8 clear_isr;
+	u32 iopoll_weight;
 	struct ipr_interrupt_offsets regs;
 };
 
@@ -1317,6 +1362,7 @@ struct ipr_chip_t {
 	u16 intr_type;
 #define IPR_USE_LSI			0x00
 #define IPR_USE_MSI			0x01
+#define IPR_USE_MSIX			0x02
 	u16 sis_type;
 #define IPR_SIS32			0x00
 #define IPR_SIS64			0x01
@@ -1375,13 +1421,10 @@ struct ipr_ioa_cfg {
 
 	struct list_head queue;
 
-	u8 allow_interrupts:1;
 	u8 in_reset_reload:1;
 	u8 in_ioa_bringdown:1;
 	u8 ioa_unit_checked:1;
-	u8 ioa_is_dead:1;
 	u8 dump_taken:1;
-	u8 allow_cmds:1;
 	u8 allow_ml_add_del:1;
 	u8 needs_hard_reset:1;
 	u8 dual_raid:1;
@@ -1413,21 +1456,7 @@ struct ipr_ioa_cfg {
 	char trace_start[8];
 #define IPR_TRACE_START_LABEL			"trace"
 	struct ipr_trace_entry *trace;
-	u32 trace_index:IPR_NUM_TRACE_INDEX_BITS;
-
-	/*
-	 * Queue for free command blocks
-	 */
-	char ipr_free_label[8];
-#define IPR_FREEQ_LABEL			"free-q"
-	struct list_head free_q;
-
-	/*
-	 * Queue for command blocks outstanding to the adapter
-	 */
-	char ipr_pending_label[8];
-#define IPR_PENDQ_LABEL			"pend-q"
-	struct list_head pending_q;
+	atomic_t trace_index;
 
 	char cfg_table_start[8];
 #define IPR_CFG_TBL_START		"cfg"
@@ -1452,16 +1481,10 @@ struct ipr_ioa_cfg {
 	struct list_head hostrcb_free_q;
 	struct list_head hostrcb_pending_q;
 
-	__be32 *host_rrq;
-	dma_addr_t host_rrq_dma;
-#define IPR_HRRQ_REQ_RESP_HANDLE_MASK	0xfffffffc
-#define IPR_HRRQ_RESP_BIT_SET			0x00000002
-#define IPR_HRRQ_TOGGLE_BIT				0x00000001
-#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT	2
-	volatile __be32 *hrrq_start;
-	volatile __be32 *hrrq_end;
-	volatile __be32 *hrrq_curr;
-	volatile u32 toggle_bit;
+	struct ipr_hrr_queue hrrq[IPR_MAX_HRRQ_NUM];
+	u32 hrrq_num;
+	atomic_t  hrrq_index;
+	u16 identify_hrrq_index;
 
 	struct ipr_bus_attributes bus_attr[IPR_MAX_NUM_BUSES];
 
@@ -1507,6 +1530,17 @@ struct ipr_ioa_cfg {
 	u32 max_cmds;
 	struct ipr_cmnd **ipr_cmnd_list;
 	dma_addr_t *ipr_cmnd_list_dma;
+
+	u16 intr_flag;
+	unsigned int nvectors;
+
+	struct {
+		unsigned short vec;
+		char desc[22];
+	} vectors_info[IPR_MAX_MSIX_VECTORS];
+
+	u32 iopoll_weight;
+
 }; /* struct ipr_ioa_cfg */
 
 struct ipr_cmnd {
@@ -1544,6 +1578,7 @@ struct ipr_cmnd {
 		struct scsi_device *sdev;
 	} u;
 
+	struct ipr_hrr_queue *hrrq;
 	struct ipr_ioa_cfg *ioa_cfg;
 };
 
@@ -1717,7 +1752,8 @@ struct ipr_ucode_image_header {
 	if (ipr_is_device(hostrcb)) {					\
 		if ((hostrcb)->ioa_cfg->sis64) {			\
 			printk(KERN_ERR IPR_NAME ": %s: " fmt, 		\
-				ipr_format_res_path(hostrcb->hcam.u.error64.fd_res_path, \
+				ipr_format_res_path(hostrcb->ioa_cfg,	\
+					hostrcb->hcam.u.error64.fd_res_path, \
 					hostrcb->rp_buffer,		\
 					sizeof(hostrcb->rp_buffer)),	\
 				__VA_ARGS__);				\
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index fcb9d0b20ee4..09c81b2f2169 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1381,10 +1381,10 @@ static void fc_fcp_timeout(unsigned long data)
 
 	fsp->state |= FC_SRB_FCP_PROCESSING_TMO;
 
-	if (fsp->state & FC_SRB_RCV_STATUS)
-		fc_fcp_complete_locked(fsp);
-	else if (rpriv->flags & FC_RP_FLAGS_REC_SUPPORTED)
+	if (rpriv->flags & FC_RP_FLAGS_REC_SUPPORTED)
 		fc_fcp_rec(fsp);
+	else if (fsp->state & FC_SRB_RCV_STATUS)
+		fc_fcp_complete_locked(fsp);
 	else
 		fc_fcp_recovery(fsp, FC_TIMED_OUT);
 	fsp->state &= ~FC_SRB_FCP_PROCESSING_TMO;
diff --git a/drivers/scsi/libfc/fc_libfc.h b/drivers/scsi/libfc/fc_libfc.h
index c2830cc66d6a..b74189d89322 100644
--- a/drivers/scsi/libfc/fc_libfc.h
+++ b/drivers/scsi/libfc/fc_libfc.h
@@ -41,25 +41,25 @@ extern unsigned int fc_debug_logging;
 
 #define FC_LIBFC_DBG(fmt, args...)					\
 	FC_CHECK_LOGGING(FC_LIBFC_LOGGING,				\
-			 printk(KERN_INFO "libfc: " fmt, ##args))
+			 pr_info("libfc: " fmt, ##args))
 
 #define FC_LPORT_DBG(lport, fmt, args...)				\
 	FC_CHECK_LOGGING(FC_LPORT_LOGGING,				\
-			 printk(KERN_INFO "host%u: lport %6.6x: " fmt,	\
-				(lport)->host->host_no,			\
-				(lport)->port_id, ##args))
+			 pr_info("host%u: lport %6.6x: " fmt,		\
+				 (lport)->host->host_no,		\
+				 (lport)->port_id, ##args))
 
-#define FC_DISC_DBG(disc, fmt, args...)				\
-	FC_CHECK_LOGGING(FC_DISC_LOGGING,			\
-			 printk(KERN_INFO "host%u: disc: " fmt,	\
-				fc_disc_lport(disc)->host->host_no,	\
-				##args))
+#define FC_DISC_DBG(disc, fmt, args...)					\
+	FC_CHECK_LOGGING(FC_DISC_LOGGING,				\
+			 pr_info("host%u: disc: " fmt,			\
+				 fc_disc_lport(disc)->host->host_no,	\
+				 ##args))
 
 #define FC_RPORT_ID_DBG(lport, port_id, fmt, args...)			\
 	FC_CHECK_LOGGING(FC_RPORT_LOGGING,				\
-			 printk(KERN_INFO "host%u: rport %6.6x: " fmt,	\
-				(lport)->host->host_no,			\
-				(port_id), ##args))
+			 pr_info("host%u: rport %6.6x: " fmt,		\
+				 (lport)->host->host_no,		\
+				 (port_id), ##args))
 
 #define FC_RPORT_DBG(rdata, fmt, args...)				\
 	FC_RPORT_ID_DBG((rdata)->local_port, (rdata)->ids.port_id, fmt, ##args)
@@ -70,13 +70,13 @@ extern unsigned int fc_debug_logging;
 		if ((pkt)->seq_ptr) {					\
 			struct fc_exch *_ep = NULL;			\
 			_ep = fc_seq_exch((pkt)->seq_ptr);		\
-			printk(KERN_INFO "host%u: fcp: %6.6x: "		\
+			pr_info("host%u: fcp: %6.6x: "			\
 				"xid %04x-%04x: " fmt,			\
 				(pkt)->lp->host->host_no,		\
 				(pkt)->rport->port_id,			\
 				(_ep)->oxid, (_ep)->rxid, ##args);	\
 		} else {						\
-			printk(KERN_INFO "host%u: fcp: %6.6x: " fmt,	\
+			pr_info("host%u: fcp: %6.6x: " fmt,		\
 				(pkt)->lp->host->host_no,		\
 				(pkt)->rport->port_id, ##args);		\
 		}							\
@@ -84,14 +84,14 @@ extern unsigned int fc_debug_logging;
 
 #define FC_EXCH_DBG(exch, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_EXCH_LOGGING,				\
-			 printk(KERN_INFO "host%u: xid %4x: " fmt,	\
-				(exch)->lp->host->host_no,		\
-				exch->xid, ##args))
+			 pr_info("host%u: xid %4x: " fmt,		\
+				 (exch)->lp->host->host_no,		\
+				 exch->xid, ##args))
 
 #define FC_SCSI_DBG(lport, fmt, args...)				\
 	FC_CHECK_LOGGING(FC_SCSI_LOGGING,				\
-			 printk(KERN_INFO "host%u: scsi: " fmt,		\
-				(lport)->host->host_no,	##args))
+			 pr_info("host%u: scsi: " fmt,			\
+				 (lport)->host->host_no, ##args))
 
 /*
  * FC-4 Providers.
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 83aa1efec875..d518d17e940f 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -582,7 +582,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp)
 static void fc_rport_error_retry(struct fc_rport_priv *rdata,
 				 struct fc_frame *fp)
 {
-	unsigned long delay = FC_DEF_E_D_TOV;
+	unsigned long delay = msecs_to_jiffies(FC_DEF_E_D_TOV);
 
 	/* make sure this isn't an FC_EX_CLOSED error, never retry those */
 	if (PTR_ERR(fp) == -FC_EX_CLOSED)
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index df4c13a5534c..7706c99ec8bb 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -466,11 +466,13 @@ enum intr_type_t {
 	MSIX,
 };
 
+#define LPFC_CT_CTX_MAX		64
 struct unsol_rcv_ct_ctx {
 	uint32_t ctxt_id;
 	uint32_t SID;
-	uint32_t flags;
-#define UNSOL_VALID	0x00000001
+	uint32_t valid;
+#define UNSOL_INVALID		0
+#define UNSOL_VALID		1
 	uint16_t oxid;
 	uint16_t rxid;
 };
@@ -750,6 +752,15 @@ struct lpfc_hba {
 	void __iomem *ctrl_regs_memmap_p;/* Kernel memory mapped address for
 					    PCI BAR2 */
 
+	void __iomem *pci_bar0_memmap_p; /* Kernel memory mapped address for
+					    PCI BAR0 with dual-ULP support */
+	void __iomem *pci_bar2_memmap_p; /* Kernel memory mapped address for
+					    PCI BAR2 with dual-ULP support */
+	void __iomem *pci_bar4_memmap_p; /* Kernel memory mapped address for
+					    PCI BAR4 with dual-ULP support */
+#define PCI_64BIT_BAR0	0
+#define PCI_64BIT_BAR2	2
+#define PCI_64BIT_BAR4	4
 	void __iomem *MBslimaddr;	/* virtual address for mbox cmds */
 	void __iomem *HAregaddr;	/* virtual address for host attn reg */
 	void __iomem *CAregaddr;	/* virtual address for chip attn reg */
@@ -938,7 +949,7 @@ struct lpfc_hba {
 
 	spinlock_t ct_ev_lock; /* synchronize access to ct_ev_waiters */
 	struct list_head ct_ev_waiters;
-	struct unsol_rcv_ct_ctx ct_ctx[64];
+	struct unsol_rcv_ct_ctx ct_ctx[LPFC_CT_CTX_MAX];
 	uint32_t ctx_idx;
 
 	uint8_t menlo_flag;	/* menlo generic flags */
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index f7368eb80415..32d5683e6181 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -955,9 +955,9 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		spin_lock_irqsave(&phba->ct_ev_lock, flags);
 		if (phba->sli_rev == LPFC_SLI_REV4) {
 			evt_dat->immed_dat = phba->ctx_idx;
-			phba->ctx_idx = (phba->ctx_idx + 1) % 64;
+			phba->ctx_idx = (phba->ctx_idx + 1) % LPFC_CT_CTX_MAX;
 			/* Provide warning for over-run of the ct_ctx array */
-			if (phba->ct_ctx[evt_dat->immed_dat].flags &
+			if (phba->ct_ctx[evt_dat->immed_dat].valid ==
 			    UNSOL_VALID)
 				lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
 						"2717 CT context array entry "
@@ -973,7 +973,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 				piocbq->iocb.unsli3.rcvsli3.ox_id;
 			phba->ct_ctx[evt_dat->immed_dat].SID =
 				piocbq->iocb.un.rcvels.remoteID;
-			phba->ct_ctx[evt_dat->immed_dat].flags = UNSOL_VALID;
+			phba->ct_ctx[evt_dat->immed_dat].valid = UNSOL_VALID;
 		} else
 			evt_dat->immed_dat = piocbq->iocb.ulpContext;
 
@@ -1013,6 +1013,47 @@ error_ct_unsol_exit:
 }
 
 /**
+ * lpfc_bsg_ct_unsol_abort - handler ct abort to management plane
+ * @phba: Pointer to HBA context object.
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
+ *
+ * This function handles abort to the CT command toward management plane
+ * for SLI4 port.
+ *
+ * If the pending context of a CT command to management plane present, clears
+ * such context and returns 1 for handled; otherwise, it returns 0 indicating
+ * no context exists.
+ **/
+int
+lpfc_bsg_ct_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf)
+{
+	struct fc_frame_header fc_hdr;
+	struct fc_frame_header *fc_hdr_ptr = &fc_hdr;
+	int ctx_idx, handled = 0;
+	uint16_t oxid, rxid;
+	uint32_t sid;
+
+	memcpy(fc_hdr_ptr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header));
+	sid = sli4_sid_from_fc_hdr(fc_hdr_ptr);
+	oxid = be16_to_cpu(fc_hdr_ptr->fh_ox_id);
+	rxid = be16_to_cpu(fc_hdr_ptr->fh_rx_id);
+
+	for (ctx_idx = 0; ctx_idx < LPFC_CT_CTX_MAX; ctx_idx++) {
+		if (phba->ct_ctx[ctx_idx].valid != UNSOL_VALID)
+			continue;
+		if (phba->ct_ctx[ctx_idx].rxid != rxid)
+			continue;
+		if (phba->ct_ctx[ctx_idx].oxid != oxid)
+			continue;
+		if (phba->ct_ctx[ctx_idx].SID != sid)
+			continue;
+		phba->ct_ctx[ctx_idx].valid = UNSOL_INVALID;
+		handled = 1;
+	}
+	return handled;
+}
+
+/**
  * lpfc_bsg_hba_set_event - process a SET_EVENT bsg vendor command
  * @job: SET_EVENT fc_bsg_job
  **/
@@ -1318,7 +1359,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag,
 	icmd->ulpClass = CLASS3;
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		/* Do not issue unsol response if oxid not marked as valid */
-		if (!(phba->ct_ctx[tag].flags & UNSOL_VALID)) {
+		if (phba->ct_ctx[tag].valid != UNSOL_VALID) {
 			rc = IOCB_ERROR;
 			goto issue_ct_rsp_exit;
 		}
@@ -1352,7 +1393,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag,
 				phba->sli4_hba.rpi_ids[ndlp->nlp_rpi];
 
 		/* The exchange is done, mark the entry as invalid */
-		phba->ct_ctx[tag].flags &= ~UNSOL_VALID;
+		phba->ct_ctx[tag].valid = UNSOL_INVALID;
 	} else
 		icmd->ulpContext = (ushort) tag;
 
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 69d66e3662cb..76ca65dae781 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -164,8 +164,7 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *);
 
 void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
 			 struct lpfc_iocbq *);
-void lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
-				    struct lpfc_iocbq *);
+int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *);
 int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t);
 int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int);
 void lpfc_fdmi_tmo(unsigned long);
@@ -427,6 +426,7 @@ int lpfc_bsg_request(struct fc_bsg_job *);
 int lpfc_bsg_timeout(struct fc_bsg_job *);
 int lpfc_bsg_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
 			     struct lpfc_iocbq *);
+int lpfc_bsg_ct_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *);
 void __lpfc_sli_ringtx_put(struct lpfc_hba *, struct lpfc_sli_ring *,
 	struct lpfc_iocbq *);
 struct lpfc_iocbq *lpfc_sli_ringtx_get(struct lpfc_hba *,
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 65f9fb6862e6..7bff3a19af56 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -164,37 +164,24 @@ lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 }
 
 /**
- * lpfc_sli4_ct_abort_unsol_event - Default handle for sli4 unsol abort
+ * lpfc_ct_handle_unsol_abort - ct upper level protocol abort handler
  * @phba: Pointer to HBA context object.
- * @pring: Pointer to the driver internal I/O ring.
- * @piocbq: Pointer to the IOCBQ.
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
  *
- * This function serves as the default handler for the sli4 unsolicited
- * abort event. It shall be invoked when there is no application interface
- * registered unsolicited abort handler. This handler does nothing but
- * just simply releases the dma buffer used by the unsol abort event.
+ * This function serves as the upper level protocol abort handler for CT
+ * protocol.
+ *
+ * Return 1 if abort has been handled, 0 otherwise.
  **/
-void
-lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *phba,
-			       struct lpfc_sli_ring *pring,
-			       struct lpfc_iocbq *piocbq)
+int
+lpfc_ct_handle_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf)
 {
-	IOCB_t *icmd = &piocbq->iocb;
-	struct lpfc_dmabuf *bdeBuf;
-	uint32_t size;
+	int handled;
 
-	/* Forward abort event to any process registered to receive ct event */
-	if (lpfc_bsg_ct_unsol_event(phba, pring, piocbq) == 0)
-		return;
+	/* CT upper level goes through BSG */
+	handled = lpfc_bsg_ct_unsol_abort(phba, dmabuf);
 
-	/* If there is no BDE associated with IOCB, there is nothing to do */
-	if (icmd->ulpBdeCount == 0)
-		return;
-	bdeBuf = piocbq->context2;
-	piocbq->context2 = NULL;
-	size  = icmd->un.cont64[0].tus.f.bdeSize;
-	lpfc_ct_unsol_buffer(phba, piocbq, bdeBuf, size);
-	lpfc_in_buf_free(phba, bdeBuf);
+	return handled;
 }
 
 static void
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index b9440deaad45..08d156a9094f 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -3122,6 +3122,13 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 		case IOERR_SEQUENCE_TIMEOUT:
 		case IOERR_INVALID_RPI:
+			if (cmd == ELS_CMD_PLOGI &&
+			    did == NameServer_DID) {
+				/* Continue forever if plogi to */
+				/* the nameserver fails */
+				maxretry = 0;
+				delay = 100;
+			}
 			retry = 1;
 			break;
 		}
@@ -6517,7 +6524,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	struct lpfc_nodelist *ndlp;
 	struct ls_rjt stat;
 	uint32_t *payload;
-	uint32_t cmd, did, newnode, rjt_err = 0;
+	uint32_t cmd, did, newnode;
+	uint8_t rjt_exp, rjt_err = 0;
 	IOCB_t *icmd = &elsiocb->iocb;
 
 	if (!vport || !(elsiocb->context2))
@@ -6606,12 +6614,14 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		/* If Nport discovery is delayed, reject PLOGIs */
 		if (vport->fc_flag & FC_DISC_DELAYED) {
 			rjt_err = LSRJT_UNABLE_TPC;
+			rjt_exp = LSEXP_NOTHING_MORE;
 			break;
 		}
 		if (vport->port_state < LPFC_DISC_AUTH) {
 			if (!(phba->pport->fc_flag & FC_PT2PT) ||
 				(phba->pport->fc_flag & FC_PT2PT_PLOGI)) {
 				rjt_err = LSRJT_UNABLE_TPC;
+				rjt_exp = LSEXP_NOTHING_MORE;
 				break;
 			}
 			/* We get here, and drop thru, if we are PT2PT with
@@ -6648,6 +6658,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		lpfc_send_els_event(vport, ndlp, payload);
 		if (vport->port_state < LPFC_DISC_AUTH) {
 			rjt_err = LSRJT_UNABLE_TPC;
+			rjt_exp = LSEXP_NOTHING_MORE;
 			break;
 		}
 		lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO);
@@ -6661,6 +6672,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		lpfc_send_els_event(vport, ndlp, payload);
 		if (vport->port_state < LPFC_DISC_AUTH) {
 			rjt_err = LSRJT_UNABLE_TPC;
+			rjt_exp = LSEXP_NOTHING_MORE;
 			break;
 		}
 		lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO);
@@ -6680,6 +6692,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		phba->fc_stat.elsRcvADISC++;
 		if (vport->port_state < LPFC_DISC_AUTH) {
 			rjt_err = LSRJT_UNABLE_TPC;
+			rjt_exp = LSEXP_NOTHING_MORE;
 			break;
 		}
 		lpfc_disc_state_machine(vport, ndlp, elsiocb,
@@ -6693,6 +6706,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		phba->fc_stat.elsRcvPDISC++;
 		if (vport->port_state < LPFC_DISC_AUTH) {
 			rjt_err = LSRJT_UNABLE_TPC;
+			rjt_exp = LSEXP_NOTHING_MORE;
 			break;
 		}
 		lpfc_disc_state_machine(vport, ndlp, elsiocb,
@@ -6730,6 +6744,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		phba->fc_stat.elsRcvPRLI++;
 		if (vport->port_state < LPFC_DISC_AUTH) {
 			rjt_err = LSRJT_UNABLE_TPC;
+			rjt_exp = LSEXP_NOTHING_MORE;
 			break;
 		}
 		lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI);
@@ -6813,6 +6828,11 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		if (newnode)
 			lpfc_nlp_put(ndlp);
 		break;
+	case ELS_CMD_REC:
+			/* receive this due to exchange closed */
+			rjt_err = LSRJT_UNABLE_TPC;
+			rjt_exp = LSEXP_INVALID_OX_RX;
+		break;
 	default:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
 			"RCV ELS cmd:     cmd:x%x did:x%x/ste:x%x",
@@ -6820,6 +6840,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
 		/* Unsupported ELS command, reject */
 		rjt_err = LSRJT_CMD_UNSUPPORTED;
+		rjt_exp = LSEXP_NOTHING_MORE;
 
 		/* Unknown ELS command <elsCmd> received from NPORT <did> */
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
@@ -6834,7 +6855,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	if (rjt_err) {
 		memset(&stat, 0, sizeof(stat));
 		stat.un.b.lsRjtRsnCode = rjt_err;
-		stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
+		stat.un.b.lsRjtRsnCodeExp = rjt_exp;
 		lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp,
 			NULL);
 	}
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 7398ca862e97..e8c476031703 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -538,6 +538,7 @@ struct fc_vft_header {
 #define ELS_CMD_ECHO      0x10000000
 #define ELS_CMD_TEST      0x11000000
 #define ELS_CMD_RRQ       0x12000000
+#define ELS_CMD_REC       0x13000000
 #define ELS_CMD_PRLI      0x20100014
 #define ELS_CMD_PRLO      0x21100014
 #define ELS_CMD_PRLO_ACC  0x02100014
@@ -574,6 +575,7 @@ struct fc_vft_header {
 #define ELS_CMD_ECHO      0x10
 #define ELS_CMD_TEST      0x11
 #define ELS_CMD_RRQ       0x12
+#define ELS_CMD_REC       0x13
 #define ELS_CMD_PRLI      0x14001020
 #define ELS_CMD_PRLO      0x14001021
 #define ELS_CMD_PRLO_ACC  0x14001002
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index a47cfbdd05f2..6e93b886cd4d 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -106,6 +106,7 @@ struct lpfc_sli_intf {
 
 #define LPFC_SLI4_MB_WORD_COUNT		64
 #define LPFC_MAX_MQ_PAGE		8
+#define LPFC_MAX_WQ_PAGE_V0		4
 #define LPFC_MAX_WQ_PAGE		8
 #define LPFC_MAX_CQ_PAGE		4
 #define LPFC_MAX_EQ_PAGE		8
@@ -703,24 +704,41 @@ struct lpfc_register {
  * BAR0.  The offsets are the same so the driver must account for
  * any base address difference.
  */
-#define LPFC_RQ_DOORBELL		0x00A0
-#define lpfc_rq_doorbell_num_posted_SHIFT	16
-#define lpfc_rq_doorbell_num_posted_MASK	0x3FFF
-#define lpfc_rq_doorbell_num_posted_WORD	word0
-#define lpfc_rq_doorbell_id_SHIFT		0
-#define lpfc_rq_doorbell_id_MASK		0xFFFF
-#define lpfc_rq_doorbell_id_WORD		word0
-
-#define LPFC_WQ_DOORBELL		0x0040
-#define lpfc_wq_doorbell_num_posted_SHIFT	24
-#define lpfc_wq_doorbell_num_posted_MASK	0x00FF
-#define lpfc_wq_doorbell_num_posted_WORD	word0
-#define lpfc_wq_doorbell_index_SHIFT		16
-#define lpfc_wq_doorbell_index_MASK		0x00FF
-#define lpfc_wq_doorbell_index_WORD		word0
-#define lpfc_wq_doorbell_id_SHIFT		0
-#define lpfc_wq_doorbell_id_MASK		0xFFFF
-#define lpfc_wq_doorbell_id_WORD		word0
+#define LPFC_ULP0_RQ_DOORBELL		0x00A0
+#define LPFC_ULP1_RQ_DOORBELL		0x00C0
+#define lpfc_rq_db_list_fm_num_posted_SHIFT	24
+#define lpfc_rq_db_list_fm_num_posted_MASK	0x00FF
+#define lpfc_rq_db_list_fm_num_posted_WORD	word0
+#define lpfc_rq_db_list_fm_index_SHIFT		16
+#define lpfc_rq_db_list_fm_index_MASK		0x00FF
+#define lpfc_rq_db_list_fm_index_WORD		word0
+#define lpfc_rq_db_list_fm_id_SHIFT		0
+#define lpfc_rq_db_list_fm_id_MASK		0xFFFF
+#define lpfc_rq_db_list_fm_id_WORD		word0
+#define lpfc_rq_db_ring_fm_num_posted_SHIFT	16
+#define lpfc_rq_db_ring_fm_num_posted_MASK	0x3FFF
+#define lpfc_rq_db_ring_fm_num_posted_WORD	word0
+#define lpfc_rq_db_ring_fm_id_SHIFT		0
+#define lpfc_rq_db_ring_fm_id_MASK		0xFFFF
+#define lpfc_rq_db_ring_fm_id_WORD		word0
+
+#define LPFC_ULP0_WQ_DOORBELL		0x0040
+#define LPFC_ULP1_WQ_DOORBELL		0x0060
+#define lpfc_wq_db_list_fm_num_posted_SHIFT	24
+#define lpfc_wq_db_list_fm_num_posted_MASK	0x00FF
+#define lpfc_wq_db_list_fm_num_posted_WORD	word0
+#define lpfc_wq_db_list_fm_index_SHIFT		16
+#define lpfc_wq_db_list_fm_index_MASK		0x00FF
+#define lpfc_wq_db_list_fm_index_WORD		word0
+#define lpfc_wq_db_list_fm_id_SHIFT		0
+#define lpfc_wq_db_list_fm_id_MASK		0xFFFF
+#define lpfc_wq_db_list_fm_id_WORD		word0
+#define lpfc_wq_db_ring_fm_num_posted_SHIFT     16
+#define lpfc_wq_db_ring_fm_num_posted_MASK      0x3FFF
+#define lpfc_wq_db_ring_fm_num_posted_WORD      word0
+#define lpfc_wq_db_ring_fm_id_SHIFT             0
+#define lpfc_wq_db_ring_fm_id_MASK              0xFFFF
+#define lpfc_wq_db_ring_fm_id_WORD              word0
 
 #define LPFC_EQCQ_DOORBELL		0x0120
 #define lpfc_eqcq_doorbell_se_SHIFT		31
@@ -1131,12 +1149,22 @@ struct lpfc_mbx_wq_create {
 		struct {	/* Version 0 Request */
 			uint32_t word0;
 #define lpfc_mbx_wq_create_num_pages_SHIFT	0
-#define lpfc_mbx_wq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_wq_create_num_pages_MASK	0x000000FF
 #define lpfc_mbx_wq_create_num_pages_WORD	word0
+#define lpfc_mbx_wq_create_dua_SHIFT		8
+#define lpfc_mbx_wq_create_dua_MASK		0x00000001
+#define lpfc_mbx_wq_create_dua_WORD		word0
 #define lpfc_mbx_wq_create_cq_id_SHIFT		16
 #define lpfc_mbx_wq_create_cq_id_MASK		0x0000FFFF
 #define lpfc_mbx_wq_create_cq_id_WORD		word0
-			struct dma_address page[LPFC_MAX_WQ_PAGE];
+			struct dma_address page[LPFC_MAX_WQ_PAGE_V0];
+			uint32_t word9;
+#define lpfc_mbx_wq_create_bua_SHIFT		0
+#define lpfc_mbx_wq_create_bua_MASK		0x00000001
+#define lpfc_mbx_wq_create_bua_WORD		word9
+#define lpfc_mbx_wq_create_ulp_num_SHIFT	8
+#define lpfc_mbx_wq_create_ulp_num_MASK		0x000000FF
+#define lpfc_mbx_wq_create_ulp_num_WORD		word9
 		} request;
 		struct {	/* Version 1 Request */
 			uint32_t word0;	/* Word 0 is the same as in v0 */
@@ -1160,6 +1188,17 @@ struct lpfc_mbx_wq_create {
 #define lpfc_mbx_wq_create_q_id_SHIFT	0
 #define lpfc_mbx_wq_create_q_id_MASK	0x0000FFFF
 #define lpfc_mbx_wq_create_q_id_WORD	word0
+			uint32_t doorbell_offset;
+			uint32_t word2;
+#define lpfc_mbx_wq_create_bar_set_SHIFT	0
+#define lpfc_mbx_wq_create_bar_set_MASK		0x0000FFFF
+#define lpfc_mbx_wq_create_bar_set_WORD		word2
+#define WQ_PCI_BAR_0_AND_1	0x00
+#define WQ_PCI_BAR_2_AND_3	0x01
+#define WQ_PCI_BAR_4_AND_5	0x02
+#define lpfc_mbx_wq_create_db_format_SHIFT	16
+#define lpfc_mbx_wq_create_db_format_MASK	0x0000FFFF
+#define lpfc_mbx_wq_create_db_format_WORD	word2
 		} response;
 	} u;
 };
@@ -1223,14 +1262,31 @@ struct lpfc_mbx_rq_create {
 #define lpfc_mbx_rq_create_num_pages_SHIFT	0
 #define lpfc_mbx_rq_create_num_pages_MASK	0x0000FFFF
 #define lpfc_mbx_rq_create_num_pages_WORD	word0
+#define lpfc_mbx_rq_create_dua_SHIFT		16
+#define lpfc_mbx_rq_create_dua_MASK		0x00000001
+#define lpfc_mbx_rq_create_dua_WORD		word0
+#define lpfc_mbx_rq_create_bqu_SHIFT		17
+#define lpfc_mbx_rq_create_bqu_MASK		0x00000001
+#define lpfc_mbx_rq_create_bqu_WORD		word0
+#define lpfc_mbx_rq_create_ulp_num_SHIFT	24
+#define lpfc_mbx_rq_create_ulp_num_MASK		0x000000FF
+#define lpfc_mbx_rq_create_ulp_num_WORD		word0
 			struct rq_context context;
 			struct dma_address page[LPFC_MAX_WQ_PAGE];
 		} request;
 		struct {
 			uint32_t word0;
-#define lpfc_mbx_rq_create_q_id_SHIFT	0
-#define lpfc_mbx_rq_create_q_id_MASK	0x0000FFFF
-#define lpfc_mbx_rq_create_q_id_WORD	word0
+#define lpfc_mbx_rq_create_q_id_SHIFT		0
+#define lpfc_mbx_rq_create_q_id_MASK		0x0000FFFF
+#define lpfc_mbx_rq_create_q_id_WORD		word0
+			uint32_t doorbell_offset;
+			uint32_t word2;
+#define lpfc_mbx_rq_create_bar_set_SHIFT	0
+#define lpfc_mbx_rq_create_bar_set_MASK		0x0000FFFF
+#define lpfc_mbx_rq_create_bar_set_WORD		word2
+#define lpfc_mbx_rq_create_db_format_SHIFT	16
+#define lpfc_mbx_rq_create_db_format_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_db_format_WORD	word2
 		} response;
 	} u;
 };
@@ -1388,6 +1444,33 @@ struct lpfc_mbx_get_rsrc_extent_info {
 	} u;
 };
 
+struct lpfc_mbx_query_fw_config {
+	struct mbox_header header;
+	struct {
+		uint32_t config_number;
+#define	LPFC_FC_FCOE		0x00000007
+		uint32_t asic_revision;
+		uint32_t physical_port;
+		uint32_t function_mode;
+#define LPFC_FCOE_INI_MODE	0x00000040
+#define LPFC_FCOE_TGT_MODE	0x00000080
+#define LPFC_DUA_MODE		0x00000800
+		uint32_t ulp0_mode;
+#define LPFC_ULP_FCOE_INIT_MODE	0x00000040
+#define LPFC_ULP_FCOE_TGT_MODE	0x00000080
+		uint32_t ulp0_nap_words[12];
+		uint32_t ulp1_mode;
+		uint32_t ulp1_nap_words[12];
+		uint32_t function_capabilities;
+		uint32_t cqid_base;
+		uint32_t cqid_tot;
+		uint32_t eqid_base;
+		uint32_t eqid_tot;
+		uint32_t ulp0_nap2_words[2];
+		uint32_t ulp1_nap2_words[2];
+	} rsp;
+};
+
 struct lpfc_id_range {
 	uint32_t word5;
 #define lpfc_mbx_rsrc_id_word4_0_SHIFT	0
@@ -1803,51 +1886,6 @@ struct lpfc_mbx_redisc_fcf_tbl {
 #define lpfc_mbx_redisc_fcf_index_WORD		word12
 };
 
-struct lpfc_mbx_query_fw_cfg {
-	struct mbox_header header;
-	uint32_t config_number;
-	uint32_t asic_rev;
-	uint32_t phys_port;
-	uint32_t function_mode;
-/* firmware Function Mode */
-#define lpfc_function_mode_toe_SHIFT		0
-#define lpfc_function_mode_toe_MASK		0x00000001
-#define lpfc_function_mode_toe_WORD		function_mode
-#define lpfc_function_mode_nic_SHIFT		1
-#define lpfc_function_mode_nic_MASK		0x00000001
-#define lpfc_function_mode_nic_WORD		function_mode
-#define lpfc_function_mode_rdma_SHIFT		2
-#define lpfc_function_mode_rdma_MASK		0x00000001
-#define lpfc_function_mode_rdma_WORD		function_mode
-#define lpfc_function_mode_vm_SHIFT		3
-#define lpfc_function_mode_vm_MASK		0x00000001
-#define lpfc_function_mode_vm_WORD		function_mode
-#define lpfc_function_mode_iscsi_i_SHIFT	4
-#define lpfc_function_mode_iscsi_i_MASK		0x00000001
-#define lpfc_function_mode_iscsi_i_WORD		function_mode
-#define lpfc_function_mode_iscsi_t_SHIFT	5
-#define lpfc_function_mode_iscsi_t_MASK		0x00000001
-#define lpfc_function_mode_iscsi_t_WORD		function_mode
-#define lpfc_function_mode_fcoe_i_SHIFT		6
-#define lpfc_function_mode_fcoe_i_MASK		0x00000001
-#define lpfc_function_mode_fcoe_i_WORD		function_mode
-#define lpfc_function_mode_fcoe_t_SHIFT		7
-#define lpfc_function_mode_fcoe_t_MASK		0x00000001
-#define lpfc_function_mode_fcoe_t_WORD		function_mode
-#define lpfc_function_mode_dal_SHIFT		8
-#define lpfc_function_mode_dal_MASK		0x00000001
-#define lpfc_function_mode_dal_WORD		function_mode
-#define lpfc_function_mode_lro_SHIFT		9
-#define lpfc_function_mode_lro_MASK		0x00000001
-#define lpfc_function_mode_lro_WORD		function_mode
-#define lpfc_function_mode_flex10_SHIFT		10
-#define lpfc_function_mode_flex10_MASK		0x00000001
-#define lpfc_function_mode_flex10_WORD		function_mode
-#define lpfc_function_mode_ncsi_SHIFT		11
-#define lpfc_function_mode_ncsi_MASK		0x00000001
-#define lpfc_function_mode_ncsi_WORD		function_mode
-};
-
 /* Status field for embedded SLI_CONFIG mailbox command */
 #define STATUS_SUCCESS					0x0
 #define STATUS_FAILED 					0x1
@@ -2965,7 +3003,7 @@ struct lpfc_mqe {
 		struct lpfc_mbx_read_config rd_config;
 		struct lpfc_mbx_request_features req_ftrs;
 		struct lpfc_mbx_post_hdr_tmpl hdr_tmpl;
-		struct lpfc_mbx_query_fw_cfg query_fw_cfg;
+		struct lpfc_mbx_query_fw_config query_fw_cfg;
 		struct lpfc_mbx_supp_pages supp_pages;
 		struct lpfc_mbx_pc_sli4_params sli4_params;
 		struct lpfc_mbx_get_sli4_parameters get_sli4_parameters;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 89ad55807012..314b4f61b9e3 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3165,14 +3165,10 @@ destroy_port(struct lpfc_vport *vport)
 int
 lpfc_get_instance(void)
 {
-	int instance = 0;
+	int ret;
 
-	/* Assign an unused number */
-	if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL))
-		return -1;
-	if (idr_get_new(&lpfc_hba_index, NULL, &instance))
-		return -1;
-	return instance;
+	ret = idr_alloc(&lpfc_hba_index, NULL, 0, 0, GFP_KERNEL);
+	return ret < 0 ? -1 : ret;
 }
 
 /**
@@ -6233,9 +6229,11 @@ lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba, uint32_t if_type)
 			phba->sli4_hba.conf_regs_memmap_p +
 						LPFC_CTL_PORT_SEM_OFFSET;
 		phba->sli4_hba.RQDBregaddr =
-			phba->sli4_hba.conf_regs_memmap_p + LPFC_RQ_DOORBELL;
+			phba->sli4_hba.conf_regs_memmap_p +
+						LPFC_ULP0_RQ_DOORBELL;
 		phba->sli4_hba.WQDBregaddr =
-			phba->sli4_hba.conf_regs_memmap_p + LPFC_WQ_DOORBELL;
+			phba->sli4_hba.conf_regs_memmap_p +
+						LPFC_ULP0_WQ_DOORBELL;
 		phba->sli4_hba.EQCQDBregaddr =
 			phba->sli4_hba.conf_regs_memmap_p + LPFC_EQCQ_DOORBELL;
 		phba->sli4_hba.MQDBregaddr =
@@ -6289,9 +6287,11 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf)
 		return -ENODEV;
 
 	phba->sli4_hba.RQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
-				vf * LPFC_VFR_PAGE_SIZE + LPFC_RQ_DOORBELL);
+				vf * LPFC_VFR_PAGE_SIZE +
+					LPFC_ULP0_RQ_DOORBELL);
 	phba->sli4_hba.WQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
-				vf * LPFC_VFR_PAGE_SIZE + LPFC_WQ_DOORBELL);
+				vf * LPFC_VFR_PAGE_SIZE +
+					LPFC_ULP0_WQ_DOORBELL);
 	phba->sli4_hba.EQCQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
 				vf * LPFC_VFR_PAGE_SIZE + LPFC_EQCQ_DOORBELL);
 	phba->sli4_hba.MQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
@@ -6987,6 +6987,19 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 		phba->sli4_hba.fcp_wq = NULL;
 	}
 
+	if (phba->pci_bar0_memmap_p) {
+		iounmap(phba->pci_bar0_memmap_p);
+		phba->pci_bar0_memmap_p = NULL;
+	}
+	if (phba->pci_bar2_memmap_p) {
+		iounmap(phba->pci_bar2_memmap_p);
+		phba->pci_bar2_memmap_p = NULL;
+	}
+	if (phba->pci_bar4_memmap_p) {
+		iounmap(phba->pci_bar4_memmap_p);
+		phba->pci_bar4_memmap_p = NULL;
+	}
+
 	/* Release FCP CQ mapping array */
 	if (phba->sli4_hba.fcp_cq_map != NULL) {
 		kfree(phba->sli4_hba.fcp_cq_map);
@@ -7050,6 +7063,53 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 	int rc = -ENOMEM;
 	int fcp_eqidx, fcp_cqidx, fcp_wqidx;
 	int fcp_cq_index = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	LPFC_MBOXQ_t *mboxq;
+	uint32_t length;
+
+	/* Check for dual-ULP support */
+	mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mboxq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3249 Unable to allocate memory for "
+				"QUERY_FW_CFG mailbox command\n");
+		return -ENOMEM;
+	}
+	length = (sizeof(struct lpfc_mbx_query_fw_config) -
+		  sizeof(struct lpfc_sli4_cfg_mhdr));
+	lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
+			 LPFC_MBOX_OPCODE_QUERY_FW_CFG,
+			 length, LPFC_SLI4_MBX_EMBED);
+
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+
+	shdr = (union lpfc_sli4_cfg_shdr *)
+			&mboxq->u.mqe.un.sli4_config.header.cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3250 QUERY_FW_CFG mailbox failed with status "
+				"x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		if (rc != MBX_TIMEOUT)
+			mempool_free(mboxq, phba->mbox_mem_pool);
+		rc = -ENXIO;
+		goto out_error;
+	}
+
+	phba->sli4_hba.fw_func_mode =
+			mboxq->u.mqe.un.query_fw_cfg.rsp.function_mode;
+	phba->sli4_hba.ulp0_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp0_mode;
+	phba->sli4_hba.ulp1_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp1_mode;
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"3251 QUERY_FW_CFG: func_mode:x%x, ulp0_mode:x%x, "
+			"ulp1_mode:x%x\n", phba->sli4_hba.fw_func_mode,
+			phba->sli4_hba.ulp0_mode, phba->sli4_hba.ulp1_mode);
+
+	if (rc != MBX_TIMEOUT)
+		mempool_free(mboxq, phba->mbox_mem_pool);
 
 	/*
 	 * Set up HBA Event Queues (EQs)
@@ -7664,78 +7724,6 @@ out:
 }
 
 /**
- * lpfc_sli4_send_nop_mbox_cmds - Send sli-4 nop mailbox commands
- * @phba: pointer to lpfc hba data structure.
- * @cnt: number of nop mailbox commands to send.
- *
- * This routine is invoked to send a number @cnt of NOP mailbox command and
- * wait for each command to complete.
- *
- * Return: the number of NOP mailbox command completed.
- **/
-static int
-lpfc_sli4_send_nop_mbox_cmds(struct lpfc_hba *phba, uint32_t cnt)
-{
-	LPFC_MBOXQ_t *mboxq;
-	int length, cmdsent;
-	uint32_t mbox_tmo;
-	uint32_t rc = 0;
-	uint32_t shdr_status, shdr_add_status;
-	union lpfc_sli4_cfg_shdr *shdr;
-
-	if (cnt == 0) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"2518 Requested to send 0 NOP mailbox cmd\n");
-		return cnt;
-	}
-
-	mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
-	if (!mboxq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2519 Unable to allocate memory for issuing "
-				"NOP mailbox command\n");
-		return 0;
-	}
-
-	/* Set up NOP SLI4_CONFIG mailbox-ioctl command */
-	length = (sizeof(struct lpfc_mbx_nop) -
-		  sizeof(struct lpfc_sli4_cfg_mhdr));
-
-	for (cmdsent = 0; cmdsent < cnt; cmdsent++) {
-		lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
-				 LPFC_MBOX_OPCODE_NOP, length,
-				 LPFC_SLI4_MBX_EMBED);
-		if (!phba->sli4_hba.intr_enable)
-			rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
-		else {
-			mbox_tmo = lpfc_mbox_tmo_val(phba, mboxq);
-			rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
-		}
-		if (rc == MBX_TIMEOUT)
-			break;
-		/* Check return status */
-		shdr = (union lpfc_sli4_cfg_shdr *)
-			&mboxq->u.mqe.un.sli4_config.header.cfg_shdr;
-		shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
-		shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
-					 &shdr->response);
-		if (shdr_status || shdr_add_status || rc) {
-			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-					"2520 NOP mailbox command failed "
-					"status x%x add_status x%x mbx "
-					"status x%x\n", shdr_status,
-					shdr_add_status, rc);
-			break;
-		}
-	}
-
-	if (rc != MBX_TIMEOUT)
-		mempool_free(mboxq, phba->mbox_mem_pool);
-
-	return cmdsent;
-}
-
-/**
  * lpfc_sli4_pci_mem_setup - Setup SLI4 HBA PCI memory space.
  * @phba: pointer to lpfc hba data structure.
  *
@@ -8503,37 +8491,6 @@ lpfc_unset_hba(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli4_unset_hba - Unset SLI4 hba device initialization.
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to unset the HBA device initialization steps to
- * a device with SLI-4 interface spec.
- **/
-static void
-lpfc_sli4_unset_hba(struct lpfc_hba *phba)
-{
-	struct lpfc_vport *vport = phba->pport;
-	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
-
-	spin_lock_irq(shost->host_lock);
-	vport->load_flag |= FC_UNLOADING;
-	spin_unlock_irq(shost->host_lock);
-
-	phba->pport->work_port_events = 0;
-
-	/* Stop the SLI4 device port */
-	lpfc_stop_port(phba);
-
-	lpfc_sli4_disable_intr(phba);
-
-	/* Reset SLI4 HBA FCoE function */
-	lpfc_pci_function_reset(phba);
-	lpfc_sli4_queue_destroy(phba);
-
-	return;
-}
-
-/**
  * lpfc_sli4_xri_exchange_busy_wait - Wait for device XRI exchange busy
  * @phba: Pointer to HBA context object.
  *
@@ -9595,7 +9552,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	struct Scsi_Host  *shost = NULL;
 	int error, ret;
 	uint32_t cfg_mode, intr_mode;
-	int mcnt;
 	int adjusted_fcp_io_channel;
 
 	/* Allocate memory for HBA structure */
@@ -9684,58 +9640,35 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	shost = lpfc_shost_from_vport(vport); /* save shost for error cleanup */
 	/* Now, trying to enable interrupt and bring up the device */
 	cfg_mode = phba->cfg_use_msi;
-	while (true) {
-		/* Put device to a known state before enabling interrupt */
-		lpfc_stop_port(phba);
-		/* Configure and enable interrupt */
-		intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode);
-		if (intr_mode == LPFC_INTR_ERROR) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0426 Failed to enable interrupt.\n");
-			error = -ENODEV;
-			goto out_free_sysfs_attr;
-		}
-		/* Default to single EQ for non-MSI-X */
-		if (phba->intr_type != MSIX)
-			adjusted_fcp_io_channel = 1;
-		else
-			adjusted_fcp_io_channel = phba->cfg_fcp_io_channel;
-		phba->cfg_fcp_io_channel = adjusted_fcp_io_channel;
-		/* Set up SLI-4 HBA */
-		if (lpfc_sli4_hba_setup(phba)) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"1421 Failed to set up hba\n");
-			error = -ENODEV;
-			goto out_disable_intr;
-		}
 
-		/* Send NOP mbx cmds for non-INTx mode active interrupt test */
-		if (intr_mode != 0)
-			mcnt = lpfc_sli4_send_nop_mbox_cmds(phba,
-							    LPFC_ACT_INTR_CNT);
-
-		/* Check active interrupts received only for MSI/MSI-X */
-		if (intr_mode == 0 ||
-		    phba->sli.slistat.sli_intr >= LPFC_ACT_INTR_CNT) {
-			/* Log the current active interrupt mode */
-			phba->intr_mode = intr_mode;
-			lpfc_log_intr_mode(phba, intr_mode);
-			break;
-		}
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0451 Configure interrupt mode (%d) "
-				"failed active interrupt test.\n",
-				intr_mode);
-		/* Unset the previous SLI-4 HBA setup. */
-		/*
-		 * TODO:  Is this operation compatible with IF TYPE 2
-		 * devices?  All port state is deleted and cleared.
-		 */
-		lpfc_sli4_unset_hba(phba);
-		/* Try next level of interrupt mode */
-		cfg_mode = --intr_mode;
+	/* Put device to a known state before enabling interrupt */
+	lpfc_stop_port(phba);
+	/* Configure and enable interrupt */
+	intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode);
+	if (intr_mode == LPFC_INTR_ERROR) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0426 Failed to enable interrupt.\n");
+		error = -ENODEV;
+		goto out_free_sysfs_attr;
+	}
+	/* Default to single EQ for non-MSI-X */
+	if (phba->intr_type != MSIX)
+		adjusted_fcp_io_channel = 1;
+	else
+		adjusted_fcp_io_channel = phba->cfg_fcp_io_channel;
+	phba->cfg_fcp_io_channel = adjusted_fcp_io_channel;
+	/* Set up SLI-4 HBA */
+	if (lpfc_sli4_hba_setup(phba)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"1421 Failed to set up hba\n");
+		error = -ENODEV;
+		goto out_disable_intr;
 	}
 
+	/* Log the current active interrupt mode */
+	phba->intr_mode = intr_mode;
+	lpfc_log_intr_mode(phba, intr_mode);
+
 	/* Perform post initialization setup */
 	lpfc_post_init_setup(phba);
 
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index d8fadcb2db73..46128c679202 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1115,6 +1115,13 @@ out:
 				 "0261 Cannot Register NameServer login\n");
 	}
 
+	/*
+	** In case the node reference counter does not go to zero, ensure that
+	** the stale state for the node is not processed.
+	*/
+
+	ndlp->nlp_prev_state = ndlp->nlp_state;
+	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
 	spin_lock_irq(shost->host_lock);
 	ndlp->nlp_flag |= NLP_DEFER_RM;
 	spin_unlock_irq(shost->host_lock);
@@ -2159,13 +2166,16 @@ lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 {
 	struct lpfc_iocbq *cmdiocb, *rspiocb;
 	IOCB_t *irsp;
+	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
 	cmdiocb = (struct lpfc_iocbq *) arg;
 	rspiocb = cmdiocb->context_un.rsp_iocb;
 
 	irsp = &rspiocb->iocb;
 	if (irsp->ulpStatus) {
+		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_DEFER_RM;
+		spin_unlock_irq(shost->host_lock);
 		return NLP_STE_FREED_NODE;
 	}
 	return ndlp->nlp_state;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 60e5a177644c..98af07c6e300 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -288,6 +288,26 @@ lpfc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
 }
 
 /**
+ * lpfc_change_queue_type() - Change a device's scsi tag queuing type
+ * @sdev: Pointer the scsi device whose queue depth is to change
+ * @tag_type: Identifier for queue tag type
+ */
+static int
+lpfc_change_queue_type(struct scsi_device *sdev, int tag_type)
+{
+	if (sdev->tagged_supported) {
+		scsi_set_tag_type(sdev, tag_type);
+		if (tag_type)
+			scsi_activate_tcq(sdev, sdev->queue_depth);
+		else
+			scsi_deactivate_tcq(sdev, sdev->queue_depth);
+	} else
+		tag_type = 0;
+
+	return tag_type;
+}
+
+/**
  * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread
  * @phba: The Hba for which this call is being executed.
  *
@@ -3972,7 +3992,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 			break;
 		}
 	} else
-		fcp_cmnd->fcpCntl1 = 0;
+		fcp_cmnd->fcpCntl1 = SIMPLE_Q;
 
 	sli4 = (phba->sli_rev == LPFC_SLI_REV4);
 
@@ -5150,6 +5170,7 @@ struct scsi_host_template lpfc_template = {
 	.max_sectors		= 0xFFFF,
 	.vendor_id		= LPFC_NL_VENDOR_ID,
 	.change_queue_depth	= lpfc_change_queue_depth,
+	.change_queue_type	= lpfc_change_queue_type,
 };
 
 struct scsi_host_template lpfc_vport_template = {
@@ -5172,4 +5193,5 @@ struct scsi_host_template lpfc_vport_template = {
 	.shost_attrs		= lpfc_vport_attrs,
 	.max_sectors		= 0xFFFF,
 	.change_queue_depth	= lpfc_change_queue_depth,
+	.change_queue_type	= lpfc_change_queue_type,
 };
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 624eab370396..74b67d98e952 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -124,10 +124,17 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe)
 
 	/* Ring Doorbell */
 	doorbell.word0 = 0;
-	bf_set(lpfc_wq_doorbell_num_posted, &doorbell, 1);
-	bf_set(lpfc_wq_doorbell_index, &doorbell, host_index);
-	bf_set(lpfc_wq_doorbell_id, &doorbell, q->queue_id);
-	writel(doorbell.word0, q->phba->sli4_hba.WQDBregaddr);
+	if (q->db_format == LPFC_DB_LIST_FORMAT) {
+		bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1);
+		bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index);
+		bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id);
+	} else if (q->db_format == LPFC_DB_RING_FORMAT) {
+		bf_set(lpfc_wq_db_ring_fm_num_posted, &doorbell, 1);
+		bf_set(lpfc_wq_db_ring_fm_id, &doorbell, q->queue_id);
+	} else {
+		return -EINVAL;
+	}
+	writel(doorbell.word0, q->db_regaddr);
 
 	return 0;
 }
@@ -456,10 +463,20 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 	/* Ring The Header Receive Queue Doorbell */
 	if (!(hq->host_index % hq->entry_repost)) {
 		doorbell.word0 = 0;
-		bf_set(lpfc_rq_doorbell_num_posted, &doorbell,
-		       hq->entry_repost);
-		bf_set(lpfc_rq_doorbell_id, &doorbell, hq->queue_id);
-		writel(doorbell.word0, hq->phba->sli4_hba.RQDBregaddr);
+		if (hq->db_format == LPFC_DB_RING_FORMAT) {
+			bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell,
+			       hq->entry_repost);
+			bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id);
+		} else if (hq->db_format == LPFC_DB_LIST_FORMAT) {
+			bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell,
+			       hq->entry_repost);
+			bf_set(lpfc_rq_db_list_fm_index, &doorbell,
+			       hq->host_index);
+			bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id);
+		} else {
+			return -EINVAL;
+		}
+		writel(doorbell.word0, hq->db_regaddr);
 	}
 	return put_index;
 }
@@ -4939,7 +4956,7 @@ out_free_mboxq:
 static void
 lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 {
-	uint8_t fcp_eqidx;
+	int fcp_eqidx;
 
 	lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM);
 	lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM);
@@ -5622,6 +5639,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba)
 		}
 		/* RPIs. */
 		count = phba->sli4_hba.max_cfg_param.max_rpi;
+		if (count <= 0) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"3279 Invalid provisioning of "
+					"rpi:%d\n", count);
+			rc = -EINVAL;
+			goto err_exit;
+		}
 		base = phba->sli4_hba.max_cfg_param.rpi_base;
 		longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG;
 		phba->sli4_hba.rpi_bmask = kzalloc(longs *
@@ -5644,6 +5668,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba)
 
 		/* VPIs. */
 		count = phba->sli4_hba.max_cfg_param.max_vpi;
+		if (count <= 0) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"3280 Invalid provisioning of "
+					"vpi:%d\n", count);
+			rc = -EINVAL;
+			goto free_rpi_ids;
+		}
 		base = phba->sli4_hba.max_cfg_param.vpi_base;
 		longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG;
 		phba->vpi_bmask = kzalloc(longs *
@@ -5666,6 +5697,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba)
 
 		/* XRIs. */
 		count = phba->sli4_hba.max_cfg_param.max_xri;
+		if (count <= 0) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"3281 Invalid provisioning of "
+					"xri:%d\n", count);
+			rc = -EINVAL;
+			goto free_vpi_ids;
+		}
 		base = phba->sli4_hba.max_cfg_param.xri_base;
 		longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG;
 		phba->sli4_hba.xri_bmask = kzalloc(longs *
@@ -5689,6 +5727,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba)
 
 		/* VFIs. */
 		count = phba->sli4_hba.max_cfg_param.max_vfi;
+		if (count <= 0) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"3282 Invalid provisioning of "
+					"vfi:%d\n", count);
+			rc = -EINVAL;
+			goto free_xri_ids;
+		}
 		base = phba->sli4_hba.max_cfg_param.vfi_base;
 		longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG;
 		phba->sli4_hba.vfi_bmask = kzalloc(longs *
@@ -6599,7 +6644,7 @@ static int
 lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 		       uint32_t flag)
 {
-	MAILBOX_t *mb;
+	MAILBOX_t *mbx;
 	struct lpfc_sli *psli = &phba->sli;
 	uint32_t status, evtctr;
 	uint32_t ha_copy, hc_copy;
@@ -6653,7 +6698,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 
 	psli = &phba->sli;
 
-	mb = &pmbox->u.mb;
+	mbx = &pmbox->u.mb;
 	status = MBX_SUCCESS;
 
 	if (phba->link_state == LPFC_HBA_ERROR) {
@@ -6668,7 +6713,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 		goto out_not_finished;
 	}
 
-	if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT) {
+	if (mbx->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT) {
 		if (lpfc_readl(phba->HCregaddr, &hc_copy) ||
 			!(hc_copy & HC_MBINT_ENA)) {
 			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
@@ -6722,7 +6767,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 				"(%d):0308 Mbox cmd issue - BUSY Data: "
 				"x%x x%x x%x x%x\n",
 				pmbox->vport ? pmbox->vport->vpi : 0xffffff,
-				mb->mbxCommand, phba->pport->port_state,
+				mbx->mbxCommand, phba->pport->port_state,
 				psli->sli_flag, flag);
 
 		psli->slistat.mbox_busy++;
@@ -6732,15 +6777,15 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 			lpfc_debugfs_disc_trc(pmbox->vport,
 				LPFC_DISC_TRC_MBOX_VPORT,
 				"MBOX Bsy vport:  cmd:x%x mb:x%x x%x",
-				(uint32_t)mb->mbxCommand,
-				mb->un.varWords[0], mb->un.varWords[1]);
+				(uint32_t)mbx->mbxCommand,
+				mbx->un.varWords[0], mbx->un.varWords[1]);
 		}
 		else {
 			lpfc_debugfs_disc_trc(phba->pport,
 				LPFC_DISC_TRC_MBOX,
 				"MBOX Bsy:        cmd:x%x mb:x%x x%x",
-				(uint32_t)mb->mbxCommand,
-				mb->un.varWords[0], mb->un.varWords[1]);
+				(uint32_t)mbx->mbxCommand,
+				mbx->un.varWords[0], mbx->un.varWords[1]);
 		}
 
 		return MBX_BUSY;
@@ -6751,7 +6796,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 	/* If we are not polling, we MUST be in SLI2 mode */
 	if (flag != MBX_POLL) {
 		if (!(psli->sli_flag & LPFC_SLI_ACTIVE) &&
-		    (mb->mbxCommand != MBX_KILL_BOARD)) {
+		    (mbx->mbxCommand != MBX_KILL_BOARD)) {
 			psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
 			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
 			/* Mbox command <mbxCommand> cannot issue */
@@ -6773,23 +6818,23 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 			"(%d):0309 Mailbox cmd x%x issue Data: x%x x%x "
 			"x%x\n",
 			pmbox->vport ? pmbox->vport->vpi : 0,
-			mb->mbxCommand, phba->pport->port_state,
+			mbx->mbxCommand, phba->pport->port_state,
 			psli->sli_flag, flag);
 
-	if (mb->mbxCommand != MBX_HEARTBEAT) {
+	if (mbx->mbxCommand != MBX_HEARTBEAT) {
 		if (pmbox->vport) {
 			lpfc_debugfs_disc_trc(pmbox->vport,
 				LPFC_DISC_TRC_MBOX_VPORT,
 				"MBOX Send vport: cmd:x%x mb:x%x x%x",
-				(uint32_t)mb->mbxCommand,
-				mb->un.varWords[0], mb->un.varWords[1]);
+				(uint32_t)mbx->mbxCommand,
+				mbx->un.varWords[0], mbx->un.varWords[1]);
 		}
 		else {
 			lpfc_debugfs_disc_trc(phba->pport,
 				LPFC_DISC_TRC_MBOX,
 				"MBOX Send:       cmd:x%x mb:x%x x%x",
-				(uint32_t)mb->mbxCommand,
-				mb->un.varWords[0], mb->un.varWords[1]);
+				(uint32_t)mbx->mbxCommand,
+				mbx->un.varWords[0], mbx->un.varWords[1]);
 		}
 	}
 
@@ -6797,12 +6842,12 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 	evtctr = psli->slistat.mbox_event;
 
 	/* next set own bit for the adapter and copy over command word */
-	mb->mbxOwner = OWN_CHIP;
+	mbx->mbxOwner = OWN_CHIP;
 
 	if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 		/* Populate mbox extension offset word. */
 		if (pmbox->in_ext_byte_len || pmbox->out_ext_byte_len) {
-			*(((uint32_t *)mb) + pmbox->mbox_offset_word)
+			*(((uint32_t *)mbx) + pmbox->mbox_offset_word)
 				= (uint8_t *)phba->mbox_ext
 				  - (uint8_t *)phba->mbox;
 		}
@@ -6814,11 +6859,11 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 				pmbox->in_ext_byte_len);
 		}
 		/* Copy command data to host SLIM area */
-		lpfc_sli_pcimem_bcopy(mb, phba->mbox, MAILBOX_CMD_SIZE);
+		lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE);
 	} else {
 		/* Populate mbox extension offset word. */
 		if (pmbox->in_ext_byte_len || pmbox->out_ext_byte_len)
-			*(((uint32_t *)mb) + pmbox->mbox_offset_word)
+			*(((uint32_t *)mbx) + pmbox->mbox_offset_word)
 				= MAILBOX_HBA_EXT_OFFSET;
 
 		/* Copy the mailbox extension data */
@@ -6828,24 +6873,24 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 				pmbox->context2, pmbox->in_ext_byte_len);
 
 		}
-		if (mb->mbxCommand == MBX_CONFIG_PORT) {
+		if (mbx->mbxCommand == MBX_CONFIG_PORT) {
 			/* copy command data into host mbox for cmpl */
-			lpfc_sli_pcimem_bcopy(mb, phba->mbox, MAILBOX_CMD_SIZE);
+			lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE);
 		}
 
 		/* First copy mbox command data to HBA SLIM, skip past first
 		   word */
 		to_slim = phba->MBslimaddr + sizeof (uint32_t);
-		lpfc_memcpy_to_slim(to_slim, &mb->un.varWords[0],
+		lpfc_memcpy_to_slim(to_slim, &mbx->un.varWords[0],
 			    MAILBOX_CMD_SIZE - sizeof (uint32_t));
 
 		/* Next copy over first word, with mbxOwner set */
-		ldata = *((uint32_t *)mb);
+		ldata = *((uint32_t *)mbx);
 		to_slim = phba->MBslimaddr;
 		writel(ldata, to_slim);
 		readl(to_slim); /* flush */
 
-		if (mb->mbxCommand == MBX_CONFIG_PORT) {
+		if (mbx->mbxCommand == MBX_CONFIG_PORT) {
 			/* switch over to host mailbox */
 			psli->sli_flag |= LPFC_SLI_ACTIVE;
 		}
@@ -6920,7 +6965,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 				/* First copy command data */
 				word0 = *((uint32_t *)phba->mbox);
 				word0 = le32_to_cpu(word0);
-				if (mb->mbxCommand == MBX_CONFIG_PORT) {
+				if (mbx->mbxCommand == MBX_CONFIG_PORT) {
 					MAILBOX_t *slimmb;
 					uint32_t slimword0;
 					/* Check real SLIM for any errors */
@@ -6947,7 +6992,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 
 		if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 			/* copy results back to user */
-			lpfc_sli_pcimem_bcopy(phba->mbox, mb, MAILBOX_CMD_SIZE);
+			lpfc_sli_pcimem_bcopy(phba->mbox, mbx, MAILBOX_CMD_SIZE);
 			/* Copy the mailbox extension data */
 			if (pmbox->out_ext_byte_len && pmbox->context2) {
 				lpfc_sli_pcimem_bcopy(phba->mbox_ext,
@@ -6956,7 +7001,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 			}
 		} else {
 			/* First copy command data */
-			lpfc_memcpy_from_slim(mb, phba->MBslimaddr,
+			lpfc_memcpy_from_slim(mbx, phba->MBslimaddr,
 							MAILBOX_CMD_SIZE);
 			/* Copy the mailbox extension data */
 			if (pmbox->out_ext_byte_len && pmbox->context2) {
@@ -6971,7 +7016,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 		readl(phba->HAregaddr); /* flush */
 
 		psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
-		status = mb->mbxStatus;
+		status = mbx->mbxStatus;
 	}
 
 	spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
@@ -8370,7 +8415,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 		 * This is a continuation of a commandi,(CX) so this
 		 * sglq is on the active list
 		 */
-		sglq = __lpfc_get_active_sglq(phba, piocb->sli4_xritag);
+		sglq = __lpfc_get_active_sglq(phba, piocb->sli4_lxritag);
 		if (!sglq)
 			return IOCB_ERROR;
 	}
@@ -8855,12 +8900,6 @@ lpfc_sli_setup(struct lpfc_hba *phba)
 			pring->prt[3].type = FC_TYPE_CT;
 			pring->prt[3].lpfc_sli_rcv_unsol_event =
 			    lpfc_ct_unsol_event;
-			/* abort unsolicited sequence */
-			pring->prt[4].profile = 0;	/* Mask 4 */
-			pring->prt[4].rctl = FC_RCTL_BA_ABTS;
-			pring->prt[4].type = FC_TYPE_BLS;
-			pring->prt[4].lpfc_sli_rcv_unsol_event =
-			    lpfc_sli4_ct_abort_unsol_event;
 			break;
 		}
 		totiocbsize += (pring->sli.sli3.numCiocb *
@@ -11873,7 +11912,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 	struct lpfc_eqe *eqe;
 	unsigned long iflag;
 	int ecount = 0;
-	uint32_t fcp_eqidx;
+	int fcp_eqidx;
 
 	/* Get the driver's phba structure from the dev_id */
 	fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id;
@@ -11975,7 +12014,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
 	struct lpfc_hba  *phba;
 	irqreturn_t hba_irq_rc;
 	bool hba_handled = false;
-	uint32_t fcp_eqidx;
+	int fcp_eqidx;
 
 	/* Get the driver's phba structure from the dev_id */
 	phba = (struct lpfc_hba *)dev_id;
@@ -12097,6 +12136,54 @@ out_fail:
 }
 
 /**
+ * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @pci_barset: PCI BAR set flag.
+ *
+ * This function shall perform iomap of the specified PCI BAR address to host
+ * memory address if not already done so and return it. The returned host
+ * memory address can be NULL.
+ */
+static void __iomem *
+lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
+{
+	struct pci_dev *pdev;
+	unsigned long bar_map, bar_map_len;
+
+	if (!phba->pcidev)
+		return NULL;
+	else
+		pdev = phba->pcidev;
+
+	switch (pci_barset) {
+	case WQ_PCI_BAR_0_AND_1:
+		if (!phba->pci_bar0_memmap_p) {
+			bar_map = pci_resource_start(pdev, PCI_64BIT_BAR0);
+			bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR0);
+			phba->pci_bar0_memmap_p = ioremap(bar_map, bar_map_len);
+		}
+		return phba->pci_bar0_memmap_p;
+	case WQ_PCI_BAR_2_AND_3:
+		if (!phba->pci_bar2_memmap_p) {
+			bar_map = pci_resource_start(pdev, PCI_64BIT_BAR2);
+			bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR2);
+			phba->pci_bar2_memmap_p = ioremap(bar_map, bar_map_len);
+		}
+		return phba->pci_bar2_memmap_p;
+	case WQ_PCI_BAR_4_AND_5:
+		if (!phba->pci_bar4_memmap_p) {
+			bar_map = pci_resource_start(pdev, PCI_64BIT_BAR4);
+			bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR4);
+			phba->pci_bar4_memmap_p = ioremap(bar_map, bar_map_len);
+		}
+		return phba->pci_bar4_memmap_p;
+	default:
+		break;
+	}
+	return NULL;
+}
+
+/**
  * lpfc_modify_fcp_eq_delay - Modify Delay Multiplier on FCP EQs
  * @phba: HBA structure that indicates port to create a queue on.
  * @startq: The starting FCP EQ to modify
@@ -12673,6 +12760,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 	union lpfc_sli4_cfg_shdr *shdr;
 	uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
 	struct dma_address *page;
+	void __iomem *bar_memmap_p;
+	uint32_t db_offset;
+	uint16_t pci_barset;
 
 	/* sanity check on queue memory */
 	if (!wq || !cq)
@@ -12696,6 +12786,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 		    cq->queue_id);
 	bf_set(lpfc_mbox_hdr_version, &shdr->request,
 	       phba->sli4_hba.pc_sli4_params.wqv);
+
 	if (phba->sli4_hba.pc_sli4_params.wqv == LPFC_Q_CREATE_VERSION_1) {
 		bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1,
 		       wq->entry_count);
@@ -12723,6 +12814,10 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 		page[dmabuf->buffer_tag].addr_lo = putPaddrLow(dmabuf->phys);
 		page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys);
 	}
+
+	if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE)
+		bf_set(lpfc_mbx_wq_create_dua, &wq_create->u.request, 1);
+
 	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
 	/* The IOCTL status is embedded in the mailbox subheader. */
 	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
@@ -12740,6 +12835,47 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 		status = -ENXIO;
 		goto out;
 	}
+	if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) {
+		wq->db_format = bf_get(lpfc_mbx_wq_create_db_format,
+				       &wq_create->u.response);
+		if ((wq->db_format != LPFC_DB_LIST_FORMAT) &&
+		    (wq->db_format != LPFC_DB_RING_FORMAT)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3265 WQ[%d] doorbell format not "
+					"supported: x%x\n", wq->queue_id,
+					wq->db_format);
+			status = -EINVAL;
+			goto out;
+		}
+		pci_barset = bf_get(lpfc_mbx_wq_create_bar_set,
+				    &wq_create->u.response);
+		bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset);
+		if (!bar_memmap_p) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3263 WQ[%d] failed to memmap pci "
+					"barset:x%x\n", wq->queue_id,
+					pci_barset);
+			status = -ENOMEM;
+			goto out;
+		}
+		db_offset = wq_create->u.response.doorbell_offset;
+		if ((db_offset != LPFC_ULP0_WQ_DOORBELL) &&
+		    (db_offset != LPFC_ULP1_WQ_DOORBELL)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3252 WQ[%d] doorbell offset not "
+					"supported: x%x\n", wq->queue_id,
+					db_offset);
+			status = -EINVAL;
+			goto out;
+		}
+		wq->db_regaddr = bar_memmap_p + db_offset;
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"3264 WQ[%d]: barset:x%x, offset:x%x\n",
+				wq->queue_id, pci_barset, db_offset);
+	} else {
+		wq->db_format = LPFC_DB_LIST_FORMAT;
+		wq->db_regaddr = phba->sli4_hba.WQDBregaddr;
+	}
 	wq->type = LPFC_WQ;
 	wq->assoc_qid = cq->queue_id;
 	wq->subtype = subtype;
@@ -12816,6 +12952,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	uint32_t shdr_status, shdr_add_status;
 	union lpfc_sli4_cfg_shdr *shdr;
 	uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
+	void __iomem *bar_memmap_p;
+	uint32_t db_offset;
+	uint16_t pci_barset;
 
 	/* sanity check on queue memory */
 	if (!hrq || !drq || !cq)
@@ -12894,6 +13033,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 		rq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
 					putPaddrHigh(dmabuf->phys);
 	}
+	if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE)
+		bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1);
+
 	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
 	/* The IOCTL status is embedded in the mailbox subheader. */
 	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
@@ -12911,6 +13053,50 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 		status = -ENXIO;
 		goto out;
 	}
+
+	if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) {
+		hrq->db_format = bf_get(lpfc_mbx_rq_create_db_format,
+					&rq_create->u.response);
+		if ((hrq->db_format != LPFC_DB_LIST_FORMAT) &&
+		    (hrq->db_format != LPFC_DB_RING_FORMAT)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3262 RQ [%d] doorbell format not "
+					"supported: x%x\n", hrq->queue_id,
+					hrq->db_format);
+			status = -EINVAL;
+			goto out;
+		}
+
+		pci_barset = bf_get(lpfc_mbx_rq_create_bar_set,
+				    &rq_create->u.response);
+		bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset);
+		if (!bar_memmap_p) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3269 RQ[%d] failed to memmap pci "
+					"barset:x%x\n", hrq->queue_id,
+					pci_barset);
+			status = -ENOMEM;
+			goto out;
+		}
+
+		db_offset = rq_create->u.response.doorbell_offset;
+		if ((db_offset != LPFC_ULP0_RQ_DOORBELL) &&
+		    (db_offset != LPFC_ULP1_RQ_DOORBELL)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3270 RQ[%d] doorbell offset not "
+					"supported: x%x\n", hrq->queue_id,
+					db_offset);
+			status = -EINVAL;
+			goto out;
+		}
+		hrq->db_regaddr = bar_memmap_p + db_offset;
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"3266 RQ[qid:%d]: barset:x%x, offset:x%x\n",
+				hrq->queue_id, pci_barset, db_offset);
+	} else {
+		hrq->db_format = LPFC_DB_RING_FORMAT;
+		hrq->db_regaddr = phba->sli4_hba.RQDBregaddr;
+	}
 	hrq->type = LPFC_HRQ;
 	hrq->assoc_qid = cq->queue_id;
 	hrq->subtype = subtype;
@@ -12976,6 +13162,8 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 		rq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
 					putPaddrHigh(dmabuf->phys);
 	}
+	if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE)
+		bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1);
 	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
 	/* The IOCTL status is embedded in the mailbox subheader. */
 	shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr;
@@ -14063,6 +14251,40 @@ lpfc_sli4_abort_partial_seq(struct lpfc_vport *vport,
 }
 
 /**
+ * lpfc_sli4_abort_ulp_seq - Abort assembled unsol sequence from ulp
+ * @vport: pointer to a vitural port
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
+ *
+ * This function tries to abort from the assembed sequence from upper level
+ * protocol, described by the information from basic abbort @dmabuf. It
+ * checks to see whether such pending context exists at upper level protocol.
+ * If so, it shall clean up the pending context.
+ *
+ * Return
+ * true  -- if there is matching pending context of the sequence cleaned
+ *          at ulp;
+ * false -- if there is no matching pending context of the sequence present
+ *          at ulp.
+ **/
+static bool
+lpfc_sli4_abort_ulp_seq(struct lpfc_vport *vport, struct hbq_dmabuf *dmabuf)
+{
+	struct lpfc_hba *phba = vport->phba;
+	int handled;
+
+	/* Accepting abort at ulp with SLI4 only */
+	if (phba->sli_rev < LPFC_SLI_REV4)
+		return false;
+
+	/* Register all caring upper level protocols to attend abort */
+	handled = lpfc_ct_handle_unsol_abort(phba, dmabuf);
+	if (handled)
+		return true;
+
+	return false;
+}
+
+/**
  * lpfc_sli4_seq_abort_rsp_cmpl - BLS ABORT RSP seq abort iocb complete handler
  * @phba: Pointer to HBA context object.
  * @cmd_iocbq: pointer to the command iocbq structure.
@@ -14077,8 +14299,14 @@ lpfc_sli4_seq_abort_rsp_cmpl(struct lpfc_hba *phba,
 			     struct lpfc_iocbq *cmd_iocbq,
 			     struct lpfc_iocbq *rsp_iocbq)
 {
-	if (cmd_iocbq)
+	struct lpfc_nodelist *ndlp;
+
+	if (cmd_iocbq) {
+		ndlp = (struct lpfc_nodelist *)cmd_iocbq->context1;
+		lpfc_nlp_put(ndlp);
+		lpfc_nlp_not_used(ndlp);
 		lpfc_sli_release_iocbq(phba, cmd_iocbq);
+	}
 
 	/* Failure means BLS ABORT RSP did not get delivered to remote node*/
 	if (rsp_iocbq && rsp_iocbq->iocb.ulpStatus)
@@ -14118,9 +14346,10 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba,
  * event after aborting the sequence handling.
  **/
 static void
-lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
-			struct fc_frame_header *fc_hdr)
+lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
+			struct fc_frame_header *fc_hdr, bool aborted)
 {
+	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_iocbq *ctiocb = NULL;
 	struct lpfc_nodelist *ndlp;
 	uint16_t oxid, rxid, xri, lxri;
@@ -14135,12 +14364,27 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
 	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
 	rxid = be16_to_cpu(fc_hdr->fh_rx_id);
 
-	ndlp = lpfc_findnode_did(phba->pport, sid);
+	ndlp = lpfc_findnode_did(vport, sid);
 	if (!ndlp) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
-				"1268 Find ndlp returned NULL for oxid:x%x "
-				"SID:x%x\n", oxid, sid);
-		return;
+		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+		if (!ndlp) {
+			lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
+					 "1268 Failed to allocate ndlp for "
+					 "oxid:x%x SID:x%x\n", oxid, sid);
+			return;
+		}
+		lpfc_nlp_init(vport, ndlp, sid);
+		/* Put ndlp onto pport node list */
+		lpfc_enqueue_node(vport, ndlp);
+	} else if (!NLP_CHK_NODE_ACT(ndlp)) {
+		/* re-setup ndlp without removing from node list */
+		ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE);
+		if (!ndlp) {
+			lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
+					 "3275 Failed to active ndlp found "
+					 "for oxid:x%x SID:x%x\n", oxid, sid);
+			return;
+		}
 	}
 
 	/* Allocate buffer for rsp iocb */
@@ -14164,7 +14408,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
 	icmd->ulpLe = 1;
 	icmd->ulpClass = CLASS3;
 	icmd->ulpContext = phba->sli4_hba.rpi_ids[ndlp->nlp_rpi];
-	ctiocb->context1 = ndlp;
+	ctiocb->context1 = lpfc_nlp_get(ndlp);
 
 	ctiocb->iocb_cmpl = NULL;
 	ctiocb->vport = phba->pport;
@@ -14183,14 +14427,24 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
 	if (lxri != NO_XRI)
 		lpfc_set_rrq_active(phba, ndlp, lxri,
 			(xri == oxid) ? rxid : oxid, 0);
-	/* If the oxid maps to the FCP XRI range or if it is out of range,
-	 * send a BLS_RJT.  The driver no longer has that exchange.
-	 * Override the IOCB for a BA_RJT.
+	/* For BA_ABTS from exchange responder, if the logical xri with
+	 * the oxid maps to the FCP XRI range, the port no longer has
+	 * that exchange context, send a BLS_RJT. Override the IOCB for
+	 * a BA_RJT.
 	 */
-	if (xri > (phba->sli4_hba.max_cfg_param.max_xri +
-		    phba->sli4_hba.max_cfg_param.xri_base) ||
-	    xri > (lpfc_sli4_get_els_iocb_cnt(phba) +
-		    phba->sli4_hba.max_cfg_param.xri_base)) {
+	if ((fctl & FC_FC_EX_CTX) &&
+	    (lxri > lpfc_sli4_get_els_iocb_cnt(phba))) {
+		icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT;
+		bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0);
+		bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID);
+		bf_set(lpfc_rsn_code, &icmd->un.bls_rsp, FC_BA_RJT_UNABLE);
+	}
+
+	/* If BA_ABTS failed to abort a partially assembled receive sequence,
+	 * the driver no longer has that exchange, send a BLS_RJT. Override
+	 * the IOCB for a BA_RJT.
+	 */
+	if (aborted == false) {
 		icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT;
 		bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0);
 		bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID);
@@ -14214,17 +14468,19 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
 	bf_set(lpfc_abts_oxid, &icmd->un.bls_rsp, oxid);
 
 	/* Xmit CT abts response on exchange <xid> */
-	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
-			"1200 Send BLS cmd x%x on oxid x%x Data: x%x\n",
-			icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state);
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+			 "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n",
+			 icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state);
 
 	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0);
 	if (rc == IOCB_ERROR) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
-				"2925 Failed to issue CT ABTS RSP x%x on "
-				"xri x%x, Data x%x\n",
-				icmd->un.xseq64.w5.hcsw.Rctl, oxid,
-				phba->link_state);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+				 "2925 Failed to issue CT ABTS RSP x%x on "
+				 "xri x%x, Data x%x\n",
+				 icmd->un.xseq64.w5.hcsw.Rctl, oxid,
+				 phba->link_state);
+		lpfc_nlp_put(ndlp);
+		ctiocb->context1 = NULL;
 		lpfc_sli_release_iocbq(phba, ctiocb);
 	}
 }
@@ -14249,32 +14505,25 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport,
 	struct lpfc_hba *phba = vport->phba;
 	struct fc_frame_header fc_hdr;
 	uint32_t fctl;
-	bool abts_par;
+	bool aborted;
 
 	/* Make a copy of fc_hdr before the dmabuf being released */
 	memcpy(&fc_hdr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header));
 	fctl = sli4_fctl_from_fc_hdr(&fc_hdr);
 
 	if (fctl & FC_FC_EX_CTX) {
-		/*
-		 * ABTS sent by responder to exchange, just free the buffer
-		 */
-		lpfc_in_buf_free(phba, &dmabuf->dbuf);
+		/* ABTS by responder to exchange, no cleanup needed */
+		aborted = true;
 	} else {
-		/*
-		 * ABTS sent by initiator to exchange, need to do cleanup
-		 */
-		/* Try to abort partially assembled seq */
-		abts_par = lpfc_sli4_abort_partial_seq(vport, dmabuf);
-
-		/* Send abort to ULP if partially seq abort failed */
-		if (abts_par == false)
-			lpfc_sli4_send_seq_to_ulp(vport, dmabuf);
-		else
-			lpfc_in_buf_free(phba, &dmabuf->dbuf);
+		/* ABTS by initiator to exchange, need to do cleanup */
+		aborted = lpfc_sli4_abort_partial_seq(vport, dmabuf);
+		if (aborted == false)
+			aborted = lpfc_sli4_abort_ulp_seq(vport, dmabuf);
 	}
-	/* Send basic accept (BA_ACC) to the abort requester */
-	lpfc_sli4_seq_abort_rsp(phba, &fc_hdr);
+	lpfc_in_buf_free(phba, &dmabuf->dbuf);
+
+	/* Respond with BA_ACC or BA_RJT accordingly */
+	lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted);
 }
 
 /**
@@ -15307,10 +15556,13 @@ lpfc_sli4_fcf_rr_next_index_get(struct lpfc_hba *phba)
 {
 	uint16_t next_fcf_index;
 
+initial_priority:
 	/* Search start from next bit of currently registered FCF index */
+	next_fcf_index = phba->fcf.current_rec.fcf_indx;
+
 next_priority:
-	next_fcf_index = (phba->fcf.current_rec.fcf_indx + 1) %
-					LPFC_SLI4_FCF_TBL_INDX_MAX;
+	/* Determine the next fcf index to check */
+	next_fcf_index = (next_fcf_index + 1) % LPFC_SLI4_FCF_TBL_INDX_MAX;
 	next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask,
 				       LPFC_SLI4_FCF_TBL_INDX_MAX,
 				       next_fcf_index);
@@ -15337,7 +15589,7 @@ next_priority:
 		 * at that level and continue the selection process.
 		 */
 		if (lpfc_check_next_fcf_pri_level(phba))
-			goto next_priority;
+			goto initial_priority;
 		lpfc_printf_log(phba, KERN_WARNING, LOG_FIP,
 				"2844 No roundrobin failover FCF available\n");
 		if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX)
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 44c427a45d66..be02b59ea279 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -139,6 +139,10 @@ struct lpfc_queue {
 
 	struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
 
+	uint16_t db_format;
+#define LPFC_DB_RING_FORMAT	0x01
+#define LPFC_DB_LIST_FORMAT	0x02
+	void __iomem *db_regaddr;
 	/* For q stats */
 	uint32_t q_cnt_1;
 	uint32_t q_cnt_2;
@@ -508,6 +512,10 @@ struct lpfc_sli4_hba {
 	struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */
 	struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */
 
+	uint8_t fw_func_mode;	/* FW function protocol mode */
+	uint32_t ulp0_mode;	/* ULP0 protocol mode */
+	uint32_t ulp1_mode;	/* ULP1 protocol mode */
+
 	/* Setup information for various queue parameters */
 	int eq_esize;
 	int eq_ecount;
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index ba596e854bbc..f3b7795a296b 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.3.36"
+#define LPFC_DRIVER_VERSION "8.3.37"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index 3b2365c8eab2..408d2548a748 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -33,9 +33,9 @@
 /*
  * MegaRAID SAS Driver meta data
  */
-#define MEGASAS_VERSION				"06.504.01.00-rc1"
-#define MEGASAS_RELDATE				"Oct. 1, 2012"
-#define MEGASAS_EXT_VERSION			"Mon. Oct. 1 17:00:00 PDT 2012"
+#define MEGASAS_VERSION				"06.506.00.00-rc1"
+#define MEGASAS_RELDATE				"Feb. 9, 2013"
+#define MEGASAS_EXT_VERSION			"Sat. Feb. 9 17:00:00 PDT 2013"
 
 /*
  * Device IDs
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 66a0fec0437b..9d53540207ec 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -18,7 +18,7 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  *
  *  FILE: megaraid_sas_base.c
- *  Version : v06.504.01.00-rc1
+ *  Version : v06.506.00.00-rc1
  *
  *  Authors: LSI Corporation
  *           Sreenivas Bagalkote
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 74030aff69ad..a7d56687bfca 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -1206,7 +1206,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
 				MPI2_SCSIIO_EEDPFLAGS_INSERT_OP;
 		}
 		io_request->Control |= (0x4 << 26);
-		io_request->EEDPBlockSize = MEGASAS_EEDPBLOCKSIZE;
+		io_request->EEDPBlockSize = scp->device->sector_size;
 	} else {
 		/* Some drives don't support 16/12 byte CDB's, convert to 10 */
 		if (((cdb_len == 12) || (cdb_len == 16)) &&
@@ -1511,7 +1511,8 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance,
 	if (scmd->device->channel < MEGASAS_MAX_PD_CHANNELS &&
 	    instance->pd_list[pd_index].driveState == MR_PD_STATE_SYSTEM) {
 		io_request->Function = 0;
-		io_request->DevHandle =
+		if (fusion->fast_path_io)
+			io_request->DevHandle =
 			local_map_ptr->raidMap.devHndlInfo[device_id].curDevHdl;
 		io_request->RaidContext.timeoutValue =
 			local_map_ptr->raidMap.fpPdIoTimeoutSec;
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h
index a7c64f051996..f68a3cd11d5d 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.h
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h
@@ -61,7 +61,6 @@
 #define MEGASAS_SCSI_ADDL_CDB_LEN                   0x18
 #define MEGASAS_RD_WR_PROTECT_CHECK_ALL		    0x20
 #define MEGASAS_RD_WR_PROTECT_CHECK_NONE	    0x60
-#define MEGASAS_EEDPBLOCKSIZE			    512
 
 /*
  * Raid context flags
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index ffd85c511c8e..bcb23d28b3e8 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -155,7 +155,7 @@ _base_fault_reset_work(struct work_struct *work)
 	struct task_struct *p;
 
 	spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
-	if (ioc->shost_recovery)
+	if (ioc->shost_recovery || ioc->pci_error_recovery)
 		goto rearm_timer;
 	spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
 
@@ -164,6 +164,20 @@ _base_fault_reset_work(struct work_struct *work)
 		printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n",
 			ioc->name, __func__);
 
+		/* It may be possible that EEH recovery can resolve some of
+		 * pci bus failure issues rather removing the dead ioc function
+		 * by considering controller is in a non-operational state. So
+		 * here priority is given to the EEH recovery. If it doesn't
+		 * not resolve this issue, mpt2sas driver will consider this
+		 * controller to non-operational state and remove the dead ioc
+		 * function.
+		 */
+		if (ioc->non_operational_loop++ < 5) {
+			spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock,
+							 flags);
+			goto rearm_timer;
+		}
+
 		/*
 		 * Call _scsih_flush_pending_cmds callback so that we flush all
 		 * pending commands back to OS. This call is required to aovid
@@ -193,6 +207,8 @@ _base_fault_reset_work(struct work_struct *work)
 		return; /* don't rearm timer */
 	}
 
+	ioc->non_operational_loop = 0;
+
 	if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
 		rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP,
 		    FORCE_BIG_HAMMER);
@@ -2007,6 +2023,14 @@ _base_display_intel_branding(struct MPT2SAS_ADAPTER *ioc)
 			printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
 			    MPT2SAS_INTEL_RMS25KB040_BRANDING);
 			break;
+		case MPT2SAS_INTEL_RMS25LB040_SSDID:
+			printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
+			    MPT2SAS_INTEL_RMS25LB040_BRANDING);
+			break;
+		case MPT2SAS_INTEL_RMS25LB080_SSDID:
+			printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
+			    MPT2SAS_INTEL_RMS25LB080_BRANDING);
+			break;
 		default:
 			break;
 		}
@@ -4386,6 +4410,7 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc)
 	if (missing_delay[0] != -1 && missing_delay[1] != -1)
 		_base_update_missing_delay(ioc, missing_delay[0],
 		    missing_delay[1]);
+	ioc->non_operational_loop = 0;
 
 	return 0;
 
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index 543d8d637479..4caaac13682f 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -165,6 +165,10 @@
 				"Intel(R) Integrated RAID Module RMS25KB080"
 #define MPT2SAS_INTEL_RMS25KB040_BRANDING    \
 				"Intel(R) Integrated RAID Module RMS25KB040"
+#define MPT2SAS_INTEL_RMS25LB040_BRANDING	\
+				"Intel(R) Integrated RAID Module RMS25LB040"
+#define MPT2SAS_INTEL_RMS25LB080_BRANDING	\
+				"Intel(R) Integrated RAID Module RMS25LB080"
 #define MPT2SAS_INTEL_RMS2LL080_BRANDING	\
 				"Intel Integrated RAID Module RMS2LL080"
 #define MPT2SAS_INTEL_RMS2LL040_BRANDING	\
@@ -180,6 +184,8 @@
 #define MPT2SAS_INTEL_RMS25JB040_SSDID         0x3517
 #define MPT2SAS_INTEL_RMS25KB080_SSDID         0x3518
 #define MPT2SAS_INTEL_RMS25KB040_SSDID         0x3519
+#define MPT2SAS_INTEL_RMS25LB040_SSDID         0x351A
+#define MPT2SAS_INTEL_RMS25LB080_SSDID         0x351B
 #define MPT2SAS_INTEL_RMS2LL080_SSDID          0x350E
 #define MPT2SAS_INTEL_RMS2LL040_SSDID          0x350F
 #define MPT2SAS_INTEL_RS25GB008_SSDID          0x3000
@@ -835,6 +841,7 @@ struct MPT2SAS_ADAPTER {
 	u16		cpu_msix_table_sz;
 	u32		ioc_reset_count;
 	MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds;
+	u32             non_operational_loop;
 
 	/* internal commands, callback index */
 	u8		scsi_io_cb_idx;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 04f8010f0770..18360032a520 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -42,7 +42,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
@@ -1310,7 +1309,6 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc,
 	void *sg_local, *chain;
 	u32 chain_offset;
 	u32 chain_length;
-	u32 chain_flags;
 	int sges_left;
 	u32 sges_in_segment;
 	u8 simple_sgl_flags;
@@ -1356,8 +1354,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc,
 		sges_in_segment--;
 	}
 
-	/* initializing the chain flags and pointers */
-	chain_flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT << MPI2_SGE_FLAGS_SHIFT;
+	/* initializing the pointers */
 	chain_req = _base_get_chain_buffer_tracker(ioc, smid);
 	if (!chain_req)
 		return -1;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c
index ce7e59b2fc08..1df9ed4f371d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_config.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_config.c
@@ -41,7 +41,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index 8af944d7d13d..054d5231c974 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -42,7 +42,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
@@ -3136,7 +3135,7 @@ _ctl_diag_trigger_mpi_store(struct device *cdev,
 	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
 	sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count);
 	memset(&ioc->diag_trigger_mpi, 0,
-	    sizeof(struct SL_WH_EVENT_TRIGGERS_T));
+	    sizeof(ioc->diag_trigger_mpi));
 	memcpy(&ioc->diag_trigger_mpi, buf, sz);
 	if (ioc->diag_trigger_mpi.ValidEntries > NUM_VALID_ENTRIES)
 		ioc->diag_trigger_mpi.ValidEntries = NUM_VALID_ENTRIES;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 6421a06c4ce2..dcbf7c880cb2 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -41,7 +41,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -2755,13 +2754,11 @@ _scsih_block_io_to_children_attached_directly(struct MPT3SAS_ADAPTER *ioc,
 	int i;
 	u16 handle;
 	u16 reason_code;
-	u8 phy_number;
 
 	for (i = 0; i < event_data->NumEntries; i++) {
 		handle = le16_to_cpu(event_data->PHY[i].AttachedDevHandle);
 		if (!handle)
 			continue;
-		phy_number = event_data->StartPhyNum + i;
 		reason_code = event_data->PHY[i].PhyStatus &
 		    MPI2_EVENT_SAS_TOPO_RC_MASK;
 		if (reason_code == MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
index da6c5f25749c..6f8d6213040b 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
@@ -42,7 +42,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 078c63913b55..532110f4562a 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -316,10 +316,13 @@ static int mvs_task_prep_smp(struct mvs_info *mvi,
 			     struct mvs_task_exec_info *tei)
 {
 	int elem, rc, i;
+	struct sas_ha_struct *sha = mvi->sas;
 	struct sas_task *task = tei->task;
 	struct mvs_cmd_hdr *hdr = tei->hdr;
 	struct domain_device *dev = task->dev;
 	struct asd_sas_port *sas_port = dev->port;
+	struct sas_phy *sphy = dev->phy;
+	struct asd_sas_phy *sas_phy = sha->sas_phy[sphy->number];
 	struct scatterlist *sg_req, *sg_resp;
 	u32 req_len, resp_len, tag = tei->tag;
 	void *buf_tmp;
@@ -392,7 +395,7 @@ static int mvs_task_prep_smp(struct mvs_info *mvi,
 	slot->tx = mvi->tx_prod;
 	mvi->tx[mvi->tx_prod] = cpu_to_le32((TXQ_CMD_SMP << TXQ_CMD_SHIFT) |
 					TXQ_MODE_I | tag |
-					(sas_port->phy_mask << TXQ_PHY_SHIFT));
+					(MVS_PHY_ID << TXQ_PHY_SHIFT));
 
 	hdr->flags |= flags;
 	hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | ((req_len - 4) / 4));
@@ -438,11 +441,14 @@ static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag)
 static int mvs_task_prep_ata(struct mvs_info *mvi,
 			     struct mvs_task_exec_info *tei)
 {
+	struct sas_ha_struct *sha = mvi->sas;
 	struct sas_task *task = tei->task;
 	struct domain_device *dev = task->dev;
 	struct mvs_device *mvi_dev = dev->lldd_dev;
 	struct mvs_cmd_hdr *hdr = tei->hdr;
 	struct asd_sas_port *sas_port = dev->port;
+	struct sas_phy *sphy = dev->phy;
+	struct asd_sas_phy *sas_phy = sha->sas_phy[sphy->number];
 	struct mvs_slot_info *slot;
 	void *buf_prd;
 	u32 tag = tei->tag, hdr_tag;
@@ -462,7 +468,7 @@ static int mvs_task_prep_ata(struct mvs_info *mvi,
 	slot->tx = mvi->tx_prod;
 	del_q = TXQ_MODE_I | tag |
 		(TXQ_CMD_STP << TXQ_CMD_SHIFT) |
-		(sas_port->phy_mask << TXQ_PHY_SHIFT) |
+		(MVS_PHY_ID << TXQ_PHY_SHIFT) |
 		(mvi_dev->taskfileset << TXQ_SRS_SHIFT);
 	mvi->tx[mvi->tx_prod] = cpu_to_le32(del_q);
 
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
index 2ae77a0394b2..9f3cc13a5ce7 100644
--- a/drivers/scsi/mvsas/mv_sas.h
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -76,6 +76,7 @@ extern struct kmem_cache *mvs_task_list_cache;
 					(__mc) != 0 ;		\
 					(++__lseq), (__mc) >>= 1)
 
+#define MVS_PHY_ID (1U << sas_phy->id)
 #define MV_INIT_DELAYED_WORK(w, f, d)	INIT_DELAYED_WORK(w, f)
 #define UNASSOC_D2H_FIS(id)		\
 	((void *) mvi->rx_fis + 0x100 * id)
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index c06b8e5aa2cf..d8293f25ca33 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -144,6 +144,10 @@ static int _osd_get_print_system_info(struct osd_dev *od,
 	odi->osdname_len = get_attrs[a].len;
 	/* Avoid NULL for memcmp optimization 0-length is good enough */
 	odi->osdname = kzalloc(odi->osdname_len + 1, GFP_KERNEL);
+	if (!odi->osdname) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	if (odi->osdname_len)
 		memcpy(odi->osdname, get_attrs[a].val_ptr, odi->osdname_len);
 	OSD_INFO("OSD_NAME               [%s]\n", odi->osdname);
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 4c9fe733fe88..3d5e522e00fc 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -140,7 +140,8 @@ static void pm8001_free(struct pm8001_hba_info *pm8001_ha)
 	for (i = 0; i < USI_MAX_MEMCNT; i++) {
 		if (pm8001_ha->memoryMap.region[i].virt_ptr != NULL) {
 			pci_free_consistent(pm8001_ha->pdev,
-				pm8001_ha->memoryMap.region[i].element_size,
+				(pm8001_ha->memoryMap.region[i].total_len +
+				pm8001_ha->memoryMap.region[i].alignment),
 				pm8001_ha->memoryMap.region[i].virt_ptr,
 				pm8001_ha->memoryMap.region[i].phys_addr);
 			}
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 83d798428c10..1d82eef4e1eb 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -1272,22 +1272,29 @@ qla2x00_thermal_temp_show(struct device *dev,
 	struct device_attribute *attr, char *buf)
 {
 	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
-	int rval = QLA_FUNCTION_FAILED;
-	uint16_t temp, frac;
+	uint16_t temp = 0;
 
-	if (!vha->hw->flags.thermal_supported)
-		return snprintf(buf, PAGE_SIZE, "\n");
+	if (!vha->hw->thermal_support) {
+		ql_log(ql_log_warn, vha, 0x70db,
+		    "Thermal not supported by this card.\n");
+		goto done;
+	}
 
-	temp = frac = 0;
-	if (qla2x00_reset_active(vha))
-		ql_log(ql_log_warn, vha, 0x707b,
-		    "ISP reset active.\n");
-	else if (!vha->hw->flags.eeh_busy)
-		rval = qla2x00_get_thermal_temp(vha, &temp, &frac);
-	if (rval != QLA_SUCCESS)
-		return snprintf(buf, PAGE_SIZE, "\n");
+	if (qla2x00_reset_active(vha)) {
+		ql_log(ql_log_warn, vha, 0x70dc, "ISP reset active.\n");
+		goto done;
+	}
+
+	if (vha->hw->flags.eeh_busy) {
+		ql_log(ql_log_warn, vha, 0x70dd, "PCI EEH busy.\n");
+		goto done;
+	}
+
+	if (qla2x00_get_thermal_temp(vha, &temp) == QLA_SUCCESS)
+		return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 
-	return snprintf(buf, PAGE_SIZE, "%d.%02d\n", temp, frac);
+done:
+	return snprintf(buf, PAGE_SIZE, "\n");
 }
 
 static ssize_t
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 9f34dedcdad7..ad54099cb805 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -27,7 +27,7 @@ void
 qla2x00_bsg_sp_free(void *data, void *ptr)
 {
 	srb_t *sp = (srb_t *)ptr;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
+	struct scsi_qla_host *vha = sp->fcport->vha;
 	struct fc_bsg_job *bsg_job = sp->u.bsg_job;
 	struct qla_hw_data *ha = vha->hw;
 
@@ -40,7 +40,7 @@ qla2x00_bsg_sp_free(void *data, void *ptr)
 	if (sp->type == SRB_CT_CMD ||
 	    sp->type == SRB_ELS_CMD_HST)
 		kfree(sp->fcport);
-	mempool_free(sp, vha->hw->srb_mempool);
+	qla2x00_rel_sp(vha, sp);
 }
 
 int
@@ -368,7 +368,7 @@ qla2x00_process_els(struct fc_bsg_job *bsg_job)
 	if (rval != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x700e,
 		    "qla2x00_start_sp failed = %d\n", rval);
-		mempool_free(sp, ha->srb_mempool);
+		qla2x00_rel_sp(vha, sp);
 		rval = -EIO;
 		goto done_unmap_sg;
 	}
@@ -515,7 +515,7 @@ qla2x00_process_ct(struct fc_bsg_job *bsg_job)
 	if (rval != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x7017,
 		    "qla2x00_start_sp failed=%d.\n", rval);
-		mempool_free(sp, ha->srb_mempool);
+		qla2x00_rel_sp(vha, sp);
 		rval = -EIO;
 		goto done_free_fcport;
 	}
@@ -531,6 +531,75 @@ done_unmap_sg:
 done:
 	return rval;
 }
+
+/* Disable loopback mode */
+static inline int
+qla81xx_reset_loopback_mode(scsi_qla_host_t *vha, uint16_t *config,
+			    int wait, int wait2)
+{
+	int ret = 0;
+	int rval = 0;
+	uint16_t new_config[4];
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!IS_QLA81XX(ha) && !IS_QLA8031(ha))
+		goto done_reset_internal;
+
+	memset(new_config, 0 , sizeof(new_config));
+	if ((config[0] & INTERNAL_LOOPBACK_MASK) >> 1 ==
+	    ENABLE_INTERNAL_LOOPBACK ||
+	    (config[0] & INTERNAL_LOOPBACK_MASK) >> 1 ==
+	    ENABLE_EXTERNAL_LOOPBACK) {
+		new_config[0] = config[0] & ~INTERNAL_LOOPBACK_MASK;
+		ql_dbg(ql_dbg_user, vha, 0x70bf, "new_config[0]=%02x\n",
+		    (new_config[0] & INTERNAL_LOOPBACK_MASK));
+		memcpy(&new_config[1], &config[1], sizeof(uint16_t) * 3) ;
+
+		ha->notify_dcbx_comp = wait;
+		ha->notify_lb_portup_comp = wait2;
+
+		ret = qla81xx_set_port_config(vha, new_config);
+		if (ret != QLA_SUCCESS) {
+			ql_log(ql_log_warn, vha, 0x7025,
+			    "Set port config failed.\n");
+			ha->notify_dcbx_comp = 0;
+			ha->notify_lb_portup_comp = 0;
+			rval = -EINVAL;
+			goto done_reset_internal;
+		}
+
+		/* Wait for DCBX complete event */
+		if (wait && !wait_for_completion_timeout(&ha->dcbx_comp,
+			(DCBX_COMP_TIMEOUT * HZ))) {
+			ql_dbg(ql_dbg_user, vha, 0x7026,
+			    "DCBX completion not received.\n");
+			ha->notify_dcbx_comp = 0;
+			ha->notify_lb_portup_comp = 0;
+			rval = -EINVAL;
+			goto done_reset_internal;
+		} else
+			ql_dbg(ql_dbg_user, vha, 0x7027,
+			    "DCBX completion received.\n");
+
+		if (wait2 &&
+		    !wait_for_completion_timeout(&ha->lb_portup_comp,
+		    (LB_PORTUP_COMP_TIMEOUT * HZ))) {
+			ql_dbg(ql_dbg_user, vha, 0x70c5,
+			    "Port up completion not received.\n");
+			ha->notify_lb_portup_comp = 0;
+			rval = -EINVAL;
+			goto done_reset_internal;
+		} else
+			ql_dbg(ql_dbg_user, vha, 0x70c6,
+			    "Port up completion received.\n");
+
+		ha->notify_dcbx_comp = 0;
+		ha->notify_lb_portup_comp = 0;
+	}
+done_reset_internal:
+	return rval;
+}
+
 /*
  * Set the port configuration to enable the internal or external loopback
  * depending on the loopback mode.
@@ -566,9 +635,19 @@ qla81xx_set_loopback_mode(scsi_qla_host_t *vha, uint16_t *config,
 	}
 
 	/* Wait for DCBX complete event */
-	if (!wait_for_completion_timeout(&ha->dcbx_comp, (20 * HZ))) {
+	if (!wait_for_completion_timeout(&ha->dcbx_comp,
+	    (DCBX_COMP_TIMEOUT * HZ))) {
 		ql_dbg(ql_dbg_user, vha, 0x7022,
-		    "State change notification not received.\n");
+		    "DCBX completion not received.\n");
+		ret = qla81xx_reset_loopback_mode(vha, new_config, 0, 0);
+		/*
+		 * If the reset of the loopback mode doesn't work take a FCoE
+		 * dump and reset the chip.
+		 */
+		if (ret) {
+			ha->isp_ops->fw_dump(vha, 0);
+			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+		}
 		rval = -EINVAL;
 	} else {
 		if (ha->flags.idc_compl_status) {
@@ -578,7 +657,7 @@ qla81xx_set_loopback_mode(scsi_qla_host_t *vha, uint16_t *config,
 			ha->flags.idc_compl_status = 0;
 		} else
 			ql_dbg(ql_dbg_user, vha, 0x7023,
-			    "State change received.\n");
+			    "DCBX completion received.\n");
 	}
 
 	ha->notify_dcbx_comp = 0;
@@ -587,57 +666,6 @@ done_set_internal:
 	return rval;
 }
 
-/* Disable loopback mode */
-static inline int
-qla81xx_reset_loopback_mode(scsi_qla_host_t *vha, uint16_t *config,
-    int wait)
-{
-	int ret = 0;
-	int rval = 0;
-	uint16_t new_config[4];
-	struct qla_hw_data *ha = vha->hw;
-
-	if (!IS_QLA81XX(ha) && !IS_QLA8031(ha))
-		goto done_reset_internal;
-
-	memset(new_config, 0 , sizeof(new_config));
-	if ((config[0] & INTERNAL_LOOPBACK_MASK) >> 1 ==
-	    ENABLE_INTERNAL_LOOPBACK ||
-	    (config[0] & INTERNAL_LOOPBACK_MASK) >> 1 ==
-	    ENABLE_EXTERNAL_LOOPBACK) {
-		new_config[0] = config[0] & ~INTERNAL_LOOPBACK_MASK;
-		ql_dbg(ql_dbg_user, vha, 0x70bf, "new_config[0]=%02x\n",
-		    (new_config[0] & INTERNAL_LOOPBACK_MASK));
-		memcpy(&new_config[1], &config[1], sizeof(uint16_t) * 3) ;
-
-		ha->notify_dcbx_comp = wait;
-		ret = qla81xx_set_port_config(vha, new_config);
-		if (ret != QLA_SUCCESS) {
-			ql_log(ql_log_warn, vha, 0x7025,
-			    "Set port config failed.\n");
-			ha->notify_dcbx_comp = 0;
-			rval = -EINVAL;
-			goto done_reset_internal;
-		}
-
-		/* Wait for DCBX complete event */
-		if (wait && !wait_for_completion_timeout(&ha->dcbx_comp,
-			(20 * HZ))) {
-			ql_dbg(ql_dbg_user, vha, 0x7026,
-			    "State change notification not received.\n");
-			ha->notify_dcbx_comp = 0;
-			rval = -EINVAL;
-			goto done_reset_internal;
-		} else
-			ql_dbg(ql_dbg_user, vha, 0x7027,
-			    "State change received.\n");
-
-		ha->notify_dcbx_comp = 0;
-	}
-done_reset_internal:
-	return rval;
-}
-
 static int
 qla2x00_process_loopback(struct fc_bsg_job *bsg_job)
 {
@@ -739,6 +767,7 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job)
 		if (IS_QLA81XX(ha) || IS_QLA8031(ha)) {
 			memset(config, 0, sizeof(config));
 			memset(new_config, 0, sizeof(new_config));
+
 			if (qla81xx_get_port_config(vha, config)) {
 				ql_log(ql_log_warn, vha, 0x701f,
 				    "Get port config failed.\n");
@@ -746,6 +775,14 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job)
 				goto done_free_dma_rsp;
 			}
 
+			if ((config[0] & INTERNAL_LOOPBACK_MASK) != 0) {
+				ql_dbg(ql_dbg_user, vha, 0x70c4,
+				    "Loopback operation already in "
+				    "progress.\n");
+				rval = -EAGAIN;
+				goto done_free_dma_rsp;
+			}
+
 			ql_dbg(ql_dbg_user, vha, 0x70c0,
 			    "elreq.options=%04x\n", elreq.options);
 
@@ -755,7 +792,7 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job)
 					    config, new_config, elreq.options);
 				else
 					rval = qla81xx_reset_loopback_mode(vha,
-					    config, 1);
+					    config, 1, 0);
 			else
 				rval = qla81xx_set_loopback_mode(vha, config,
 				    new_config, elreq.options);
@@ -772,14 +809,6 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job)
 			command_sent = INT_DEF_LB_LOOPBACK_CMD;
 			rval = qla2x00_loopback_test(vha, &elreq, response);
 
-			if (new_config[0]) {
-				/* Revert back to original port config
-				 * Also clear internal loopback
-				 */
-				qla81xx_reset_loopback_mode(vha,
-				    new_config, 0);
-			}
-
 			if (response[0] == MBS_COMMAND_ERROR &&
 					response[1] == MBS_LB_RESET) {
 				ql_log(ql_log_warn, vha, 0x7029,
@@ -788,15 +817,39 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job)
 				qla2xxx_wake_dpc(vha);
 				qla2x00_wait_for_chip_reset(vha);
 				/* Also reset the MPI */
-				if (qla81xx_restart_mpi_firmware(vha) !=
-				    QLA_SUCCESS) {
-					ql_log(ql_log_warn, vha, 0x702a,
-					    "MPI reset failed.\n");
+				if (IS_QLA81XX(ha)) {
+					if (qla81xx_restart_mpi_firmware(vha) !=
+					    QLA_SUCCESS) {
+						ql_log(ql_log_warn, vha, 0x702a,
+						    "MPI reset failed.\n");
+					}
 				}
 
 				rval = -EIO;
 				goto done_free_dma_rsp;
 			}
+
+			if (new_config[0]) {
+				int ret;
+
+				/* Revert back to original port config
+				 * Also clear internal loopback
+				 */
+				ret = qla81xx_reset_loopback_mode(vha,
+				    new_config, 0, 1);
+				if (ret) {
+					/*
+					 * If the reset of the loopback mode
+					 * doesn't work take FCoE dump and then
+					 * reset the chip.
+					 */
+					ha->isp_ops->fw_dump(vha, 0);
+					set_bit(ISP_ABORT_NEEDED,
+					    &vha->dpc_flags);
+				}
+
+			}
+
 		} else {
 			type = "FC_BSG_HST_VENDOR_LOOPBACK";
 			ql_dbg(ql_dbg_user, vha, 0x702b,
@@ -1950,7 +2003,7 @@ qla24xx_bsg_timeout(struct fc_bsg_job *bsg_job)
 		if (!req)
 			continue;
 
-		for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
+		for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
 			sp = req->outstanding_cmds[cnt];
 			if (sp) {
 				if (((sp->type == SRB_CT_CMD) ||
@@ -1985,6 +2038,6 @@ done:
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	if (bsg_job->request->msgcode == FC_BSG_HST_CT)
 		kfree(sp->fcport);
-	mempool_free(sp, ha->srb_mempool);
+	qla2x00_rel_sp(vha, sp);
 	return 0;
 }
diff --git a/drivers/scsi/qla2xxx/qla_bsg.h b/drivers/scsi/qla2xxx/qla_bsg.h
index 37b8b7ba7421..e9f6b9bbf29a 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.h
+++ b/drivers/scsi/qla2xxx/qla_bsg.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 53f9e492f9dc..1626de52e32a 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -11,20 +11,21 @@
  * ----------------------------------------------------------------------
  * |             Level            |   Last Value Used  |     Holes	|
  * ----------------------------------------------------------------------
- * | Module Init and Probe        |       0x0125       | 0x4b,0xba,0xfa |
- * | Mailbox commands             |       0x114f       | 0x111a-0x111b  |
+ * | Module Init and Probe        |       0x0126       | 0x4b,0xba,0xfa |
+ * | Mailbox commands             |       0x115b       | 0x111a-0x111b  |
  * |                              |                    | 0x112c-0x112e  |
  * |                              |                    | 0x113a         |
  * | Device Discovery             |       0x2087       | 0x2020-0x2022, |
  * |                              |                    | 0x2016         |
- * | Queue Command and IO tracing |       0x3030       | 0x3006-0x300b  |
+ * | Queue Command and IO tracing |       0x3031       | 0x3006-0x300b  |
  * |                              |                    | 0x3027-0x3028  |
  * |                              |                    | 0x302d-0x302e  |
  * | DPC Thread                   |       0x401d       | 0x4002,0x4013  |
  * | Async Events                 |       0x5071       | 0x502b-0x502f  |
  * |                              |                    | 0x5047,0x5052  |
  * | Timer Routines               |       0x6011       |                |
- * | User Space Interactions      |       0x70c3       | 0x7018,0x702e, |
+ * | User Space Interactions      |       0x70c4       | 0x7018,0x702e, |
+ * |                              |                    | 0x7020,0x7024, |
  * |                              |                    | 0x7039,0x7045, |
  * |                              |                    | 0x7073-0x7075, |
  * |                              |                    | 0x708c,        |
@@ -35,11 +36,11 @@
  * |                              |                    | 0x800b,0x8039  |
  * | AER/EEH                      |       0x9011       |		|
  * | Virtual Port                 |       0xa007       |		|
- * | ISP82XX Specific             |       0xb084       | 0xb002,0xb024  |
+ * | ISP82XX Specific             |       0xb086       | 0xb002,0xb024  |
  * | MultiQ                       |       0xc00c       |		|
  * | Misc                         |       0xd010       |		|
- * | Target Mode		  |	  0xe06f       |		|
- * | Target Mode Management	  |	  0xf071       |		|
+ * | Target Mode		  |	  0xe070       |		|
+ * | Target Mode Management	  |	  0xf072       |		|
  * | Target Mode Task Management  |	  0x1000b      |		|
  * ----------------------------------------------------------------------
  */
diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index 8f911c0b1e74..35e20b4f8b6c 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 6e7727f46d43..c6509911772b 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -37,6 +37,7 @@
 #include "qla_nx.h"
 #define QLA2XXX_DRIVER_NAME	"qla2xxx"
 #define QLA2XXX_APIDEV		"ql2xapidev"
+#define QLA2XXX_MANUFACTURER	"QLogic Corporation"
 
 /*
  * We have MAILBOX_REGISTER_COUNT sized arrays in a few places,
@@ -253,8 +254,8 @@
 #define LOOP_DOWN_TIME			255	/* 240 */
 #define	LOOP_DOWN_RESET			(LOOP_DOWN_TIME - 30)
 
-/* Maximum outstanding commands in ISP queues (1-65535) */
-#define MAX_OUTSTANDING_COMMANDS	1024
+#define DEFAULT_OUTSTANDING_COMMANDS	1024
+#define MIN_OUTSTANDING_COMMANDS	128
 
 /* ISP request and response entry counts (37-65535) */
 #define REQUEST_ENTRY_CNT_2100		128	/* Number of request entries. */
@@ -537,6 +538,8 @@ struct device_reg_25xxmq {
 	uint32_t req_q_out;
 	uint32_t rsp_q_in;
 	uint32_t rsp_q_out;
+	uint32_t atio_q_in;
+	uint32_t atio_q_out;
 };
 
 typedef union {
@@ -563,6 +566,9 @@ typedef union {
 	 &(reg)->u.isp2100.mailbox5 : \
 	 &(reg)->u.isp2300.rsp_q_out)
 
+#define ISP_ATIO_Q_IN(vha) (vha->hw->tgt.atio_q_in)
+#define ISP_ATIO_Q_OUT(vha) (vha->hw->tgt.atio_q_out)
+
 #define MAILBOX_REG(ha, reg, num) \
 	(IS_QLA2100(ha) || IS_QLA2200(ha) ? \
 	 (num < 8 ? \
@@ -762,8 +768,8 @@ typedef struct {
 #define MBC_PORT_LOGOUT			0x56	/* Port Logout request */
 #define MBC_SEND_RNID_ELS		0x57	/* Send RNID ELS request */
 #define MBC_SET_RNID_PARAMS		0x59	/* Set RNID parameters */
-#define MBC_GET_RNID_PARAMS		0x5a	/* Data Rate */
-#define MBC_DATA_RATE			0x5d	/* Get RNID parameters */
+#define MBC_GET_RNID_PARAMS		0x5a	/* Get RNID parameters */
+#define MBC_DATA_RATE			0x5d	/* Data Rate */
 #define MBC_INITIALIZE_FIRMWARE		0x60	/* Initialize firmware */
 #define MBC_INITIATE_LIP		0x62	/* Initiate Loop */
 						/* Initialization Procedure */
@@ -809,6 +815,7 @@ typedef struct {
 #define MBC_HOST_MEMORY_COPY		0x53	/* Host Memory Copy. */
 #define MBC_SEND_RNFT_ELS		0x5e	/* Send RNFT ELS request */
 #define MBC_GET_LINK_PRIV_STATS		0x6d	/* Get link & private data. */
+#define MBC_LINK_INITIALIZATION		0x72	/* Do link initialization. */
 #define MBC_SET_VENDOR_ID		0x76	/* Set Vendor ID. */
 #define MBC_PORT_RESET			0x120	/* Port Reset */
 #define MBC_SET_PORT_CONFIG		0x122	/* Set port configuration */
@@ -856,6 +863,9 @@ typedef struct {
 #define	MBX_1		BIT_1
 #define	MBX_0		BIT_0
 
+#define RNID_TYPE_SET_VERSION	0x9
+#define RNID_TYPE_ASIC_TEMP	0xC
+
 /*
  * Firmware state codes from get firmware state mailbox command
  */
@@ -1841,9 +1851,6 @@ typedef struct fc_port {
 	uint8_t scan_state;
 } fc_port_t;
 
-#define QLA_FCPORT_SCAN_NONE	0
-#define QLA_FCPORT_SCAN_FOUND	1
-
 /*
  * Fibre channel port/lun states.
  */
@@ -2533,8 +2540,10 @@ struct req_que {
 	uint16_t  qos;
 	uint16_t  vp_idx;
 	struct rsp_que *rsp;
-	srb_t *outstanding_cmds[MAX_OUTSTANDING_COMMANDS];
+	srb_t **outstanding_cmds;
 	uint32_t current_outstanding_cmd;
+	uint16_t num_outstanding_cmds;
+#define	MAX_Q_DEPTH		32
 	int max_q_depth;
 };
 
@@ -2557,11 +2566,13 @@ struct qlt_hw_data {
 	struct atio *atio_ring_ptr;	/* Current address. */
 	uint16_t atio_ring_index; /* Current index. */
 	uint16_t atio_q_length;
+	uint32_t __iomem *atio_q_in;
+	uint32_t __iomem *atio_q_out;
 
 	void *target_lport_ptr;
 	struct qla_tgt_func_tmpl *tgt_ops;
 	struct qla_tgt *qla_tgt;
-	struct qla_tgt_cmd *cmds[MAX_OUTSTANDING_COMMANDS];
+	struct qla_tgt_cmd *cmds[DEFAULT_OUTSTANDING_COMMANDS];
 	uint16_t current_handle;
 
 	struct qla_tgt_vp_map *tgt_vp_map;
@@ -2618,7 +2629,6 @@ struct qla_hw_data {
 		uint32_t	nic_core_hung:1;
 
 		uint32_t	quiesce_owner:1;
-		uint32_t	thermal_supported:1;
 		uint32_t	nic_core_reset_hdlr_active:1;
 		uint32_t	nic_core_reset_owner:1;
 		uint32_t	isp82xx_no_md_cap:1;
@@ -2788,6 +2798,8 @@ struct qla_hw_data {
 #define IS_PI_SPLIT_DET_CAPABLE_HBA(ha)	(IS_QLA83XX(ha))
 #define IS_PI_SPLIT_DET_CAPABLE(ha)	(IS_PI_SPLIT_DET_CAPABLE_HBA(ha) && \
     (((ha)->fw_attributes_h << 16 | (ha)->fw_attributes) & BIT_22))
+#define IS_ATIO_MSIX_CAPABLE(ha) (IS_QLA83XX(ha))
+#define IS_TGT_MODE_CAPABLE(ha)	(ha->tgt.atio_q_length)
 
 	/* HBA serial number */
 	uint8_t		serial0;
@@ -2870,7 +2882,13 @@ struct qla_hw_data {
 	struct completion mbx_cmd_comp; /* Serialize mbx access */
 	struct completion mbx_intr_comp;  /* Used for completion notification */
 	struct completion dcbx_comp;	/* For set port config notification */
+	struct completion lb_portup_comp; /* Used to wait for link up during
+					   * loopback */
+#define DCBX_COMP_TIMEOUT	20
+#define LB_PORTUP_COMP_TIMEOUT	10
+
 	int notify_dcbx_comp;
+	int notify_lb_portup_comp;
 	struct mutex selflogin_lock;
 
 	/* Basic firmware related information. */
@@ -2887,6 +2905,7 @@ struct qla_hw_data {
 #define RISC_START_ADDRESS_2300 0x800
 #define RISC_START_ADDRESS_2400 0x100000
 	uint16_t	fw_xcb_count;
+	uint16_t	fw_iocb_count;
 
 	uint16_t	fw_options[16];         /* slots: 1,2,3,10,11 */
 	uint8_t		fw_seriallink_options[4];
@@ -3056,7 +3075,16 @@ struct qla_hw_data {
 	struct work_struct idc_state_handler;
 	struct work_struct nic_core_unrecoverable;
 
+#define HOST_QUEUE_RAMPDOWN_INTERVAL           (60 * HZ)
+#define HOST_QUEUE_RAMPUP_INTERVAL             (30 * HZ)
+	unsigned long   host_last_rampdown_time;
+	unsigned long   host_last_rampup_time;
+	int             cfg_lun_q_depth;
+
 	struct qlt_hw_data tgt;
+	uint16_t	thermal_support;
+#define THERMAL_SUPPORT_I2C BIT_0
+#define THERMAL_SUPPORT_ISP BIT_1
 };
 
 /*
@@ -3115,6 +3143,8 @@ typedef struct scsi_qla_host {
 #define MPI_RESET_NEEDED	19	/* Initiate MPI FW reset */
 #define ISP_QUIESCE_NEEDED	20	/* Driver need some quiescence */
 #define SCR_PENDING		21	/* SCR in target mode */
+#define HOST_RAMP_DOWN_QUEUE_DEPTH     22
+#define HOST_RAMP_UP_QUEUE_DEPTH       23
 
 	uint32_t	device_flags;
 #define SWITCH_FOUND		BIT_0
@@ -3248,8 +3278,6 @@ struct qla_tgt_vp_map {
 
 #define NVRAM_DELAY()		udelay(10)
 
-#define INVALID_HANDLE	(MAX_OUTSTANDING_COMMANDS+1)
-
 /*
  * Flash support definitions
  */
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index 706c4f7bc7c9..792a29294b62 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index be6d61a89edc..1ac2b0e3a0e1 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -300,7 +300,8 @@ struct init_cb_24xx {
 	uint32_t prio_request_q_address[2];
 
 	uint16_t msix;
-	uint8_t reserved_2[6];
+	uint16_t msix_atio;
+	uint8_t reserved_2[4];
 
 	uint16_t atio_q_inpointer;
 	uint16_t atio_q_length;
@@ -1387,9 +1388,7 @@ struct qla_flt_header {
 #define FLT_REG_FCP_PRIO_0	0x87
 #define FLT_REG_FCP_PRIO_1	0x88
 #define FLT_REG_FCOE_FW		0xA4
-#define FLT_REG_FCOE_VPD_0	0xA9
 #define FLT_REG_FCOE_NVRAM_0	0xAA
-#define FLT_REG_FCOE_VPD_1	0xAB
 #define FLT_REG_FCOE_NVRAM_1	0xAC
 
 struct qla_flt_region {
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 2411d1a12b26..eb3ca21a7f17 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -55,7 +55,7 @@ extern void qla2x00_update_fcport(scsi_qla_host_t *, fc_port_t *);
 extern void qla2x00_alloc_fw_dump(scsi_qla_host_t *);
 extern void qla2x00_try_to_stop_firmware(scsi_qla_host_t *);
 
-extern int qla2x00_get_thermal_temp(scsi_qla_host_t *, uint16_t *, uint16_t *);
+extern int qla2x00_get_thermal_temp(scsi_qla_host_t *, uint16_t *);
 
 extern void qla84xx_put_chip(struct scsi_qla_host *);
 
@@ -84,6 +84,9 @@ extern int qla83xx_nic_core_reset(scsi_qla_host_t *);
 extern void qla83xx_reset_ownership(scsi_qla_host_t *);
 extern int qla2xxx_mctp_dump(scsi_qla_host_t *);
 
+extern int
+qla2x00_alloc_outstanding_cmds(struct qla_hw_data *, struct req_que *);
+
 /*
  * Global Data in qla_os.c source file.
  */
@@ -94,6 +97,7 @@ extern int qlport_down_retry;
 extern int ql2xplogiabsentdevice;
 extern int ql2xloginretrycount;
 extern int ql2xfdmienable;
+extern int ql2xmaxqdepth;
 extern int ql2xallocfwdump;
 extern int ql2xextended_error_logging;
 extern int ql2xiidmaenable;
@@ -278,6 +282,9 @@ extern int
 qla2x00_get_port_name(scsi_qla_host_t *, uint16_t, uint8_t *, uint8_t);
 
 extern int
+qla24xx_link_initialize(scsi_qla_host_t *);
+
+extern int
 qla2x00_lip_reset(scsi_qla_host_t *);
 
 extern int
@@ -351,6 +358,9 @@ extern int
 qla2x00_disable_fce_trace(scsi_qla_host_t *, uint64_t *, uint64_t *);
 
 extern int
+qla2x00_set_driver_version(scsi_qla_host_t *, char *);
+
+extern int
 qla2x00_read_sfp(scsi_qla_host_t *, dma_addr_t, uint8_t *,
 	uint16_t, uint16_t, uint16_t, uint16_t);
 
@@ -436,6 +446,7 @@ extern uint8_t *qla25xx_read_nvram_data(scsi_qla_host_t *, uint8_t *, uint32_t,
     uint32_t);
 extern int qla25xx_write_nvram_data(scsi_qla_host_t *, uint8_t *, uint32_t,
     uint32_t);
+extern int qla2x00_is_a_vp_did(scsi_qla_host_t *, uint32_t);
 
 extern int qla2x00_beacon_on(struct scsi_qla_host *);
 extern int qla2x00_beacon_off(struct scsi_qla_host *);
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 01efc0e9cc36..9b455250c101 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -1328,8 +1328,8 @@ qla2x00_fdmi_rhba(scsi_qla_host_t *vha)
 	/* Manufacturer. */
 	eiter = (struct ct_fdmi_hba_attr *) (entries + size);
 	eiter->type = __constant_cpu_to_be16(FDMI_HBA_MANUFACTURER);
-	strcpy(eiter->a.manufacturer, "QLogic Corporation");
-	alen = strlen(eiter->a.manufacturer);
+	alen = strlen(QLA2XXX_MANUFACTURER);
+	strncpy(eiter->a.manufacturer, QLA2XXX_MANUFACTURER, alen + 1);
 	alen += (alen & 3) ? (4 - (alen & 3)) : 4;
 	eiter->len = cpu_to_be16(4 + alen);
 	size += 4 + alen;
@@ -1649,8 +1649,8 @@ qla2x00_fdmi_rpa(scsi_qla_host_t *vha)
 	/* OS device name. */
 	eiter = (struct ct_fdmi_port_attr *) (entries + size);
 	eiter->type = __constant_cpu_to_be16(FDMI_PORT_OS_DEVICE_NAME);
-	strcpy(eiter->a.os_dev_name, QLA2XXX_DRIVER_NAME);
-	alen = strlen(eiter->a.os_dev_name);
+	alen = strlen(QLA2XXX_DRIVER_NAME);
+	strncpy(eiter->a.os_dev_name, QLA2XXX_DRIVER_NAME, alen + 1);
 	alen += (alen & 3) ? (4 - (alen & 3)) : 4;
 	eiter->len = cpu_to_be16(4 + alen);
 	size += 4 + alen;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 563eee3fa924..edf4d14a1335 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -70,9 +70,7 @@ qla2x00_sp_free(void *data, void *ptr)
 	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 
 	del_timer(&iocb->timer);
-	mempool_free(sp, vha->hw->srb_mempool);
-
-	QLA_VHA_MARK_NOT_BUSY(vha);
+	qla2x00_rel_sp(vha, sp);
 }
 
 /* Asynchronous Login/Logout Routines -------------------------------------- */
@@ -525,7 +523,7 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha)
 	vha->flags.reset_active = 0;
 	ha->flags.pci_channel_io_perm_failure = 0;
 	ha->flags.eeh_busy = 0;
-	ha->flags.thermal_supported = 1;
+	ha->thermal_support = THERMAL_SUPPORT_I2C|THERMAL_SUPPORT_ISP;
 	atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
 	atomic_set(&vha->loop_state, LOOP_DOWN);
 	vha->device_flags = DFLG_NO_CABLE;
@@ -621,6 +619,8 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha)
 	if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha))
 		qla24xx_read_fcp_prio_cfg(vha);
 
+	qla2x00_set_driver_version(vha, QLA2XXX_VERSION);
+
 	return (rval);
 }
 
@@ -1559,6 +1559,47 @@ done:
 	return rval;
 }
 
+int
+qla2x00_alloc_outstanding_cmds(struct qla_hw_data *ha, struct req_que *req)
+{
+	/* Don't try to reallocate the array */
+	if (req->outstanding_cmds)
+		return QLA_SUCCESS;
+
+	if (!IS_FWI2_CAPABLE(ha) || (ha->mqiobase &&
+	    (ql2xmultique_tag || ql2xmaxqueues > 1)))
+		req->num_outstanding_cmds = DEFAULT_OUTSTANDING_COMMANDS;
+	else {
+		if (ha->fw_xcb_count <= ha->fw_iocb_count)
+			req->num_outstanding_cmds = ha->fw_xcb_count;
+		else
+			req->num_outstanding_cmds = ha->fw_iocb_count;
+	}
+
+	req->outstanding_cmds = kzalloc(sizeof(srb_t *) *
+	    req->num_outstanding_cmds, GFP_KERNEL);
+
+	if (!req->outstanding_cmds) {
+		/*
+		 * Try to allocate a minimal size just so we can get through
+		 * initialization.
+		 */
+		req->num_outstanding_cmds = MIN_OUTSTANDING_COMMANDS;
+		req->outstanding_cmds = kzalloc(sizeof(srb_t *) *
+		    req->num_outstanding_cmds, GFP_KERNEL);
+
+		if (!req->outstanding_cmds) {
+			ql_log(ql_log_fatal, NULL, 0x0126,
+			    "Failed to allocate memory for "
+			    "outstanding_cmds for req_que %p.\n", req);
+			req->num_outstanding_cmds = 0;
+			return QLA_FUNCTION_FAILED;
+		}
+	}
+
+	return QLA_SUCCESS;
+}
+
 /**
  * qla2x00_setup_chip() - Load and start RISC firmware.
  * @ha: HA context
@@ -1628,9 +1669,18 @@ enable_82xx_npiv:
 						    MIN_MULTI_ID_FABRIC - 1;
 				}
 				qla2x00_get_resource_cnts(vha, NULL,
-				    &ha->fw_xcb_count, NULL, NULL,
+				    &ha->fw_xcb_count, NULL, &ha->fw_iocb_count,
 				    &ha->max_npiv_vports, NULL);
 
+				/*
+				 * Allocate the array of outstanding commands
+				 * now that we know the firmware resources.
+				 */
+				rval = qla2x00_alloc_outstanding_cmds(ha,
+				    vha->req);
+				if (rval != QLA_SUCCESS)
+					goto failed;
+
 				if (!fw_major_version && ql2xallocfwdump
 				    && !IS_QLA82XX(ha))
 					qla2x00_alloc_fw_dump(vha);
@@ -1914,7 +1964,7 @@ qla24xx_config_rings(struct scsi_qla_host *vha)
 		WRT_REG_DWORD(&reg->isp24.rsp_q_in, 0);
 		WRT_REG_DWORD(&reg->isp24.rsp_q_out, 0);
 	}
-	qlt_24xx_config_rings(vha, reg);
+	qlt_24xx_config_rings(vha);
 
 	/* PCI posting */
 	RD_REG_DWORD(&ioreg->hccr);
@@ -1948,7 +1998,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
 		req = ha->req_q_map[que];
 		if (!req)
 			continue;
-		for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++)
+		for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++)
 			req->outstanding_cmds[cnt] = NULL;
 
 		req->current_outstanding_cmd = 1;
@@ -2157,6 +2207,7 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 	char		connect_type[22];
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
+	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
 
 	/* Get host addresses. */
 	rval = qla2x00_get_adapter_id(vha,
@@ -2170,6 +2221,13 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 		} else {
 			ql_log(ql_log_warn, vha, 0x2009,
 			    "Unable to get host loop ID.\n");
+			if (IS_FWI2_CAPABLE(ha) && (vha == base_vha) &&
+			    (rval == QLA_COMMAND_ERROR && loop_id == 0x1b)) {
+				ql_log(ql_log_warn, vha, 0x1151,
+				    "Doing link init.\n");
+				if (qla24xx_link_initialize(vha) == QLA_SUCCESS)
+					return rval;
+			}
 			set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
 		}
 		return (rval);
@@ -2690,7 +2748,6 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
 	fcport->loop_id = FC_NO_LOOP_ID;
 	qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED);
 	fcport->supported_classes = FC_COS_UNSPECIFIED;
-	fcport->scan_state = QLA_FCPORT_SCAN_NONE;
 
 	return fcport;
 }
@@ -3103,7 +3160,7 @@ static int
 qla2x00_configure_fabric(scsi_qla_host_t *vha)
 {
 	int	rval;
-	fc_port_t	*fcport;
+	fc_port_t	*fcport, *fcptemp;
 	uint16_t	next_loopid;
 	uint16_t	mb[MAILBOX_REGISTER_COUNT];
 	uint16_t	loop_id;
@@ -3141,7 +3198,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 		    0xfc, mb, BIT_1|BIT_0);
 		if (rval != QLA_SUCCESS) {
 			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
-			break;
+			return rval;
 		}
 		if (mb[0] != MBS_COMMAND_COMPLETE) {
 			ql_dbg(ql_dbg_disc, vha, 0x2042,
@@ -3173,16 +3230,21 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 			}
 		}
 
+#define QLA_FCPORT_SCAN		1
+#define QLA_FCPORT_FOUND	2
+
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			fcport->scan_state = QLA_FCPORT_SCAN;
+		}
+
 		rval = qla2x00_find_all_fabric_devs(vha, &new_fcports);
 		if (rval != QLA_SUCCESS)
 			break;
 
-		/* Add new ports to existing port list */
-		list_splice_tail_init(&new_fcports, &vha->vp_fcports);
-
-		/* Starting free loop ID. */
-		next_loopid = ha->min_external_loopid;
-
+		/*
+		 * Logout all previous fabric devices marked lost, except
+		 * FCP2 devices.
+		 */
 		list_for_each_entry(fcport, &vha->vp_fcports, list) {
 			if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
 				break;
@@ -3190,8 +3252,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 			if ((fcport->flags & FCF_FABRIC_DEVICE) == 0)
 				continue;
 
-			/* Logout lost/gone fabric devices (non-FCP2) */
-			if (fcport->scan_state != QLA_FCPORT_SCAN_FOUND &&
+			if (fcport->scan_state == QLA_FCPORT_SCAN &&
 			    atomic_read(&fcport->state) == FCS_ONLINE) {
 				qla2x00_mark_device_lost(vha, fcport,
 				    ql2xplogiabsentdevice, 0);
@@ -3204,30 +3265,74 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 					    fcport->d_id.b.domain,
 					    fcport->d_id.b.area,
 					    fcport->d_id.b.al_pa);
+					fcport->loop_id = FC_NO_LOOP_ID;
 				}
-				continue;
 			}
-			fcport->scan_state = QLA_FCPORT_SCAN_NONE;
-
-			/* Login fabric devices that need a login */
-			if ((fcport->flags & FCF_LOGIN_NEEDED) != 0 &&
-			    atomic_read(&vha->loop_down_timer) == 0) {
-				if (fcport->loop_id == FC_NO_LOOP_ID) {
-					fcport->loop_id = next_loopid;
-					rval = qla2x00_find_new_loop_id(
-					    base_vha, fcport);
-					if (rval != QLA_SUCCESS) {
-						/* Ran out of IDs to use */
-						continue;
-					}
+		}
+
+		/* Starting free loop ID. */
+		next_loopid = ha->min_external_loopid;
+
+		/*
+		 * Scan through our port list and login entries that need to be
+		 * logged in.
+		 */
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (atomic_read(&vha->loop_down_timer) ||
+			    test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
+				break;
+
+			if ((fcport->flags & FCF_FABRIC_DEVICE) == 0 ||
+			    (fcport->flags & FCF_LOGIN_NEEDED) == 0)
+				continue;
+
+			if (fcport->loop_id == FC_NO_LOOP_ID) {
+				fcport->loop_id = next_loopid;
+				rval = qla2x00_find_new_loop_id(
+				    base_vha, fcport);
+				if (rval != QLA_SUCCESS) {
+					/* Ran out of IDs to use */
+					break;
 				}
 			}
+			/* Login and update database */
+			qla2x00_fabric_dev_login(vha, fcport, &next_loopid);
+		}
+
+		/* Exit if out of loop IDs. */
+		if (rval != QLA_SUCCESS) {
+			break;
+		}
+
+		/*
+		 * Login and add the new devices to our port list.
+		 */
+		list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) {
+			if (atomic_read(&vha->loop_down_timer) ||
+			    test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
+				break;
+
+			/* Find a new loop ID to use. */
+			fcport->loop_id = next_loopid;
+			rval = qla2x00_find_new_loop_id(base_vha, fcport);
+			if (rval != QLA_SUCCESS) {
+				/* Ran out of IDs to use */
+				break;
+			}
 
 			/* Login and update database */
 			qla2x00_fabric_dev_login(vha, fcport, &next_loopid);
+
+			list_move_tail(&fcport->list, &vha->vp_fcports);
 		}
 	} while (0);
 
+	/* Free all new device structures not processed. */
+	list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) {
+		list_del(&fcport->list);
+		kfree(fcport);
+	}
+
 	if (rval) {
 		ql_dbg(ql_dbg_disc, vha, 0x2068,
 		    "Configure fabric error exit rval=%d.\n", rval);
@@ -3263,8 +3368,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 	int		first_dev, last_dev;
 	port_id_t	wrap = {}, nxt_d_id;
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *vp, *base_vha = pci_get_drvdata(ha->pdev);
-	struct scsi_qla_host *tvp;
+	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
 
 	rval = QLA_SUCCESS;
 
@@ -3377,22 +3481,8 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 			continue;
 
 		/* Bypass virtual ports of the same host. */
-		found = 0;
-		if (ha->num_vhosts) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&ha->vport_slock, flags);
-			list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) {
-				if (new_fcport->d_id.b24 == vp->d_id.b24) {
-					found = 1;
-					break;
-				}
-			}
-			spin_unlock_irqrestore(&ha->vport_slock, flags);
-
-			if (found)
-				continue;
-		}
+		if (qla2x00_is_a_vp_did(vha, new_fcport->d_id.b24))
+			continue;
 
 		/* Bypass if same domain and area of adapter. */
 		if (((new_fcport->d_id.b24 & 0xffff00) ==
@@ -3417,7 +3507,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 			    WWN_SIZE))
 				continue;
 
-			fcport->scan_state = QLA_FCPORT_SCAN_FOUND;
+			fcport->scan_state = QLA_FCPORT_FOUND;
 
 			found++;
 
@@ -5004,7 +5094,7 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr,
 	return rval;
 }
 
-#define QLA_FW_URL "ftp://ftp.qlogic.com/outgoing/linux/firmware/"
+#define QLA_FW_URL "http://ldriver.qlogic.com/firmware/"
 
 int
 qla2x00_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr)
@@ -5529,6 +5619,8 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 	if (IS_T10_PI_CAPABLE(ha))
 		nv->frame_payload_size &= ~7;
 
+	qlt_81xx_config_nvram_stage1(vha, nv);
+
 	/* Reset Initialization control block */
 	memset(icb, 0, ha->init_cb_size);
 
@@ -5569,6 +5661,8 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 	qla2x00_set_model_info(vha, nv->model_name, sizeof(nv->model_name),
 	    "QLE8XXX");
 
+	qlt_81xx_config_nvram_stage2(vha, icb);
+
 	/* Use alternate WWN? */
 	if (nv->host_p & __constant_cpu_to_le32(BIT_15)) {
 		memcpy(icb->node_name, nv->alternate_node_name, WWN_SIZE);
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index c0462c04c885..68e2c4afc134 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -198,6 +198,13 @@ done:
 }
 
 static inline void
+qla2x00_rel_sp(scsi_qla_host_t *vha, srb_t *sp)
+{
+	mempool_free(sp, vha->hw->srb_mempool);
+	QLA_VHA_MARK_NOT_BUSY(vha);
+}
+
+static inline void
 qla2x00_init_timer(srb_t *sp, unsigned long tmo)
 {
 	init_timer(&sp->u.iocb_cmd.timer);
@@ -213,3 +220,22 @@ qla2x00_gid_list_size(struct qla_hw_data *ha)
 {
 	return sizeof(struct gid_list_info) * ha->max_fibre_devices;
 }
+
+static inline void
+qla2x00_do_host_ramp_up(scsi_qla_host_t *vha)
+{
+	if (vha->hw->cfg_lun_q_depth >= ql2xmaxqdepth)
+		return;
+
+	/* Wait at least HOST_QUEUE_RAMPDOWN_INTERVAL before ramping up */
+	if (time_before(jiffies, (vha->hw->host_last_rampdown_time +
+	    HOST_QUEUE_RAMPDOWN_INTERVAL)))
+		return;
+
+	/* Wait at least HOST_QUEUE_RAMPUP_INTERVAL between each ramp up */
+	if (time_before(jiffies, (vha->hw->host_last_rampup_time +
+	    HOST_QUEUE_RAMPUP_INTERVAL)))
+		return;
+
+	set_bit(HOST_RAMP_UP_QUEUE_DEPTH, &vha->dpc_flags);
+}
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index a481684479c1..d2630317cce8 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -349,14 +349,14 @@ qla2x00_start_scsi(srb_t *sp)
 
 	/* Check for room in outstanding command list. */
 	handle = req->current_outstanding_cmd;
-	for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) {
+	for (index = 1; index < req->num_outstanding_cmds; index++) {
 		handle++;
-		if (handle == MAX_OUTSTANDING_COMMANDS)
+		if (handle == req->num_outstanding_cmds)
 			handle = 1;
 		if (!req->outstanding_cmds[handle])
 			break;
 	}
-	if (index == MAX_OUTSTANDING_COMMANDS)
+	if (index == req->num_outstanding_cmds)
 		goto queuing_error;
 
 	/* Map the sg table so we have an accurate count of sg entries needed */
@@ -1467,16 +1467,15 @@ qla24xx_start_scsi(srb_t *sp)
 
 	/* Check for room in outstanding command list. */
 	handle = req->current_outstanding_cmd;
-	for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) {
+	for (index = 1; index < req->num_outstanding_cmds; index++) {
 		handle++;
-		if (handle == MAX_OUTSTANDING_COMMANDS)
+		if (handle == req->num_outstanding_cmds)
 			handle = 1;
 		if (!req->outstanding_cmds[handle])
 			break;
 	}
-	if (index == MAX_OUTSTANDING_COMMANDS) {
+	if (index == req->num_outstanding_cmds)
 		goto queuing_error;
-	}
 
 	/* Map the sg table so we have an accurate count of sg entries needed */
 	if (scsi_sg_count(cmd)) {
@@ -1641,15 +1640,15 @@ qla24xx_dif_start_scsi(srb_t *sp)
 
 	/* Check for room in outstanding command list. */
 	handle = req->current_outstanding_cmd;
-	for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) {
+	for (index = 1; index < req->num_outstanding_cmds; index++) {
 		handle++;
-		if (handle == MAX_OUTSTANDING_COMMANDS)
+		if (handle == req->num_outstanding_cmds)
 			handle = 1;
 		if (!req->outstanding_cmds[handle])
 			break;
 	}
 
-	if (index == MAX_OUTSTANDING_COMMANDS)
+	if (index == req->num_outstanding_cmds)
 		goto queuing_error;
 
 	/* Compute number of required data segments */
@@ -1822,14 +1821,14 @@ qla2x00_alloc_iocbs(scsi_qla_host_t *vha, srb_t *sp)
 
 	/* Check for room in outstanding command list. */
 	handle = req->current_outstanding_cmd;
-	for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) {
+	for (index = 1; req->num_outstanding_cmds; index++) {
 		handle++;
-		if (handle == MAX_OUTSTANDING_COMMANDS)
+		if (handle == req->num_outstanding_cmds)
 			handle = 1;
 		if (!req->outstanding_cmds[handle])
 			break;
 	}
-	if (index == MAX_OUTSTANDING_COMMANDS) {
+	if (index == req->num_outstanding_cmds) {
 		ql_log(ql_log_warn, vha, 0x700b,
 		    "No room on outstanding cmd array.\n");
 		goto queuing_error;
@@ -2263,14 +2262,14 @@ qla82xx_start_scsi(srb_t *sp)
 
 	/* Check for room in outstanding command list. */
 	handle = req->current_outstanding_cmd;
-	for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) {
+	for (index = 1; index < req->num_outstanding_cmds; index++) {
 		handle++;
-		if (handle == MAX_OUTSTANDING_COMMANDS)
+		if (handle == req->num_outstanding_cmds)
 			handle = 1;
 		if (!req->outstanding_cmds[handle])
 			break;
 	}
-	if (index == MAX_OUTSTANDING_COMMANDS)
+	if (index == req->num_outstanding_cmds)
 		goto queuing_error;
 
 	/* Map the sg table so we have an accurate count of sg entries needed */
@@ -2767,15 +2766,15 @@ qla2x00_start_bidir(srb_t *sp, struct scsi_qla_host *vha, uint32_t tot_dsds)
 
 	/* Check for room in outstanding command list. */
 	handle = req->current_outstanding_cmd;
-	for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) {
+	for (index = 1; index < req->num_outstanding_cmds; index++) {
 		handle++;
-	if (handle == MAX_OUTSTANDING_COMMANDS)
+	if (handle == req->num_outstanding_cmds)
 		handle = 1;
 	if (!req->outstanding_cmds[handle])
 		break;
 	}
 
-	if (index == MAX_OUTSTANDING_COMMANDS) {
+	if (index == req->num_outstanding_cmds) {
 		rval = EXT_STATUS_BUSY;
 		goto queuing_error;
 	}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 873c82014b16..e9dbd74c20d3 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -13,6 +13,8 @@
 #include <scsi/scsi_bsg_fc.h>
 #include <scsi/scsi_eh.h>
 
+#include "qla_target.h"
+
 static void qla2x00_mbx_completion(scsi_qla_host_t *, uint16_t);
 static void qla2x00_process_completed_request(struct scsi_qla_host *,
 	struct req_que *, uint32_t);
@@ -489,10 +491,37 @@ qla83xx_handle_8200_aen(scsi_qla_host_t *vha, uint16_t *mb)
 	if (mb[1] & IDC_DEVICE_STATE_CHANGE) {
 		ql_log(ql_log_info, vha, 0x506a,
 		    "IDC Device-State changed = 0x%x.\n", mb[4]);
+		if (ha->flags.nic_core_reset_owner)
+			return;
 		qla83xx_schedule_work(vha, MBA_IDC_AEN);
 	}
 }
 
+int
+qla2x00_is_a_vp_did(scsi_qla_host_t *vha, uint32_t rscn_entry)
+{
+	struct qla_hw_data *ha = vha->hw;
+	scsi_qla_host_t *vp;
+	uint32_t vp_did;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!ha->num_vhosts)
+		return ret;
+
+	spin_lock_irqsave(&ha->vport_slock, flags);
+	list_for_each_entry(vp, &ha->vp_list, list) {
+		vp_did = vp->d_id.b24;
+		if (vp_did == rscn_entry) {
+			ret = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&ha->vport_slock, flags);
+
+	return ret;
+}
+
 /**
  * qla2x00_async_event() - Process aynchronous events.
  * @ha: SCSI driver HA context
@@ -899,6 +928,10 @@ skip_rio:
 		/* Ignore reserved bits from RSCN-payload. */
 		rscn_entry = ((mb[1] & 0x3ff) << 16) | mb[2];
 
+		/* Skip RSCNs for virtual ports on the same physical port */
+		if (qla2x00_is_a_vp_did(vha, rscn_entry))
+			break;
+
 		atomic_set(&vha->loop_down_timer, 0);
 		vha->flags.management_server_logged_in = 0;
 
@@ -983,14 +1016,25 @@ skip_rio:
 		    mb[1], mb[2], mb[3]);
 		break;
 	case MBA_IDC_NOTIFY:
-		/* See if we need to quiesce any I/O */
-		if (IS_QLA8031(vha->hw))
-			if ((mb[2] & 0x7fff) == MBC_PORT_RESET ||
-			    (mb[2] & 0x7fff) == MBC_SET_PORT_CONFIG) {
+		if (IS_QLA8031(vha->hw)) {
+			mb[4] = RD_REG_WORD(&reg24->mailbox4);
+			if (((mb[2] & 0x7fff) == MBC_PORT_RESET ||
+			    (mb[2] & 0x7fff) == MBC_SET_PORT_CONFIG) &&
+			    (mb[4] & INTERNAL_LOOPBACK_MASK) != 0) {
 				set_bit(ISP_QUIESCE_NEEDED, &vha->dpc_flags);
+				/*
+				 * Extend loop down timer since port is active.
+				 */
+				if (atomic_read(&vha->loop_state) == LOOP_DOWN)
+					atomic_set(&vha->loop_down_timer,
+					    LOOP_DOWN_TIME);
 				qla2xxx_wake_dpc(vha);
 			}
+		}
 	case MBA_IDC_COMPLETE:
+		if (ha->notify_lb_portup_comp)
+			complete(&ha->lb_portup_comp);
+		/* Fallthru */
 	case MBA_IDC_TIME_EXT:
 		if (IS_QLA81XX(vha->hw) || IS_QLA8031(vha->hw))
 			qla81xx_idc_event(vha, mb[0], mb[1]);
@@ -1029,7 +1073,7 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha,
 	struct qla_hw_data *ha = vha->hw;
 
 	/* Validate handle. */
-	if (index >= MAX_OUTSTANDING_COMMANDS) {
+	if (index >= req->num_outstanding_cmds) {
 		ql_log(ql_log_warn, vha, 0x3014,
 		    "Invalid SCSI command index (%x).\n", index);
 
@@ -1067,7 +1111,7 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
 	uint16_t index;
 
 	index = LSW(pkt->handle);
-	if (index >= MAX_OUTSTANDING_COMMANDS) {
+	if (index >= req->num_outstanding_cmds) {
 		ql_log(ql_log_warn, vha, 0x5031,
 		    "Invalid command index (%x).\n", index);
 		if (IS_QLA82XX(ha))
@@ -1740,7 +1784,7 @@ qla25xx_process_bidir_status_iocb(scsi_qla_host_t *vha, void *pkt,
 	sts24 = (struct sts_entry_24xx *) pkt;
 
 	/* Validate handle. */
-	if (index >= MAX_OUTSTANDING_COMMANDS) {
+	if (index >= req->num_outstanding_cmds) {
 		ql_log(ql_log_warn, vha, 0x70af,
 		    "Invalid SCSI completion handle 0x%x.\n", index);
 		set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
@@ -1910,9 +1954,9 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 	req = ha->req_q_map[que];
 
 	/* Validate handle. */
-	if (handle < MAX_OUTSTANDING_COMMANDS) {
+	if (handle < req->num_outstanding_cmds)
 		sp = req->outstanding_cmds[handle];
-	} else
+	else
 		sp = NULL;
 
 	if (sp == NULL) {
@@ -1934,6 +1978,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 
 	/* Fast path completion. */
 	if (comp_status == CS_COMPLETE && scsi_status == 0) {
+		qla2x00_do_host_ramp_up(vha);
 		qla2x00_process_completed_request(vha, req, handle);
 
 		return;
@@ -2193,6 +2238,9 @@ out:
 		    cp->cmnd[8], cp->cmnd[9], scsi_bufflen(cp), rsp_info_len,
 		    resid_len, fw_resid_len);
 
+	if (!res)
+		qla2x00_do_host_ramp_up(vha);
+
 	if (rsp->status_srb == NULL)
 		sp->done(ha, sp, res);
 }
@@ -2747,6 +2795,12 @@ static struct qla_init_msix_entry qla82xx_msix_entries[2] = {
 	{ "qla2xxx (rsp_q)", qla82xx_msix_rsp_q },
 };
 
+static struct qla_init_msix_entry qla83xx_msix_entries[3] = {
+	{ "qla2xxx (default)", qla24xx_msix_default },
+	{ "qla2xxx (rsp_q)", qla24xx_msix_rsp_q },
+	{ "qla2xxx (atio_q)", qla83xx_msix_atio_q },
+};
+
 static void
 qla24xx_disable_msix(struct qla_hw_data *ha)
 {
@@ -2827,9 +2881,13 @@ msix_failed:
 	}
 
 	/* Enable MSI-X vectors for the base queue */
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < ha->msix_count; i++) {
 		qentry = &ha->msix_entries[i];
-		if (IS_QLA82XX(ha)) {
+		if (QLA_TGT_MODE_ENABLED() && IS_ATIO_MSIX_CAPABLE(ha)) {
+			ret = request_irq(qentry->vector,
+				qla83xx_msix_entries[i].handler,
+				0, qla83xx_msix_entries[i].name, rsp);
+		} else if (IS_QLA82XX(ha)) {
 			ret = request_irq(qentry->vector,
 				qla82xx_msix_entries[i].handler,
 				0, qla82xx_msix_entries[i].name, rsp);
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 68c55eaa318c..186dd59ce4fa 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -900,13 +900,13 @@ qla2x00_abort_command(srb_t *sp)
 	    "Entered %s.\n", __func__);
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) {
+	for (handle = 1; handle < req->num_outstanding_cmds; handle++) {
 		if (req->outstanding_cmds[handle] == sp)
 			break;
 	}
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	if (handle == MAX_OUTSTANDING_COMMANDS) {
+	if (handle == req->num_outstanding_cmds) {
 		/* command not found */
 		return QLA_FUNCTION_FAILED;
 	}
@@ -1633,6 +1633,54 @@ qla2x00_get_port_name(scsi_qla_host_t *vha, uint16_t loop_id, uint8_t *name,
 }
 
 /*
+ * qla24xx_link_initialization
+ *	Issue link initialization mailbox command.
+ *
+ * Input:
+ *	ha = adapter block pointer.
+ *	TARGET_QUEUE_LOCK must be released.
+ *	ADAPTER_STATE_LOCK must be released.
+ *
+ * Returns:
+ *	qla2x00 local function return status code.
+ *
+ * Context:
+ *	Kernel context.
+ */
+int
+qla24xx_link_initialize(scsi_qla_host_t *vha)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1152,
+	    "Entered %s.\n", __func__);
+
+	if (!IS_FWI2_CAPABLE(vha->hw) || IS_CNA_CAPABLE(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	mcp->mb[0] = MBC_LINK_INITIALIZATION;
+	mcp->mb[1] = BIT_6|BIT_4;
+	mcp->mb[2] = 0;
+	mcp->mb[3] = 0;
+	mcp->out_mb = MBX_3|MBX_2|MBX_1|MBX_0;
+	mcp->in_mb = MBX_0;
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0x1153, "Failed=%x.\n", rval);
+	} else {
+		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1154,
+		    "Done %s.\n", __func__);
+	}
+
+	return rval;
+}
+
+/*
  * qla2x00_lip_reset
  *	Issue LIP reset mailbox command.
  *
@@ -2535,12 +2583,12 @@ qla24xx_abort_command(srb_t *sp)
 	    "Entered %s.\n", __func__);
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) {
+	for (handle = 1; handle < req->num_outstanding_cmds; handle++) {
 		if (req->outstanding_cmds[handle] == sp)
 			break;
 	}
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-	if (handle == MAX_OUTSTANDING_COMMANDS) {
+	if (handle == req->num_outstanding_cmds) {
 		/* Command not found. */
 		return QLA_FUNCTION_FAILED;
 	}
@@ -3093,6 +3141,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 	struct qla_hw_data *ha = vha->hw;
 	scsi_qla_host_t *vp;
 	unsigned long   flags;
+	int found;
 
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6,
 	    "Entered %s.\n", __func__);
@@ -3128,13 +3177,17 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 			return;
 		}
 
+		found = 0;
 		spin_lock_irqsave(&ha->vport_slock, flags);
-		list_for_each_entry(vp, &ha->vp_list, list)
-			if (vp_idx == vp->vp_idx)
+		list_for_each_entry(vp, &ha->vp_list, list) {
+			if (vp_idx == vp->vp_idx) {
+				found = 1;
 				break;
+			}
+		}
 		spin_unlock_irqrestore(&ha->vport_slock, flags);
 
-		if (!vp)
+		if (!found)
 			return;
 
 		vp->d_id.b.domain = rptid_entry->port_id[2];
@@ -3814,6 +3867,97 @@ qla81xx_restart_mpi_firmware(scsi_qla_host_t *vha)
 }
 
 int
+qla2x00_set_driver_version(scsi_qla_host_t *vha, char *version)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+	int len;
+	uint16_t dwlen;
+	uint8_t *str;
+	dma_addr_t str_dma;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!IS_FWI2_CAPABLE(ha) || IS_QLA82XX(ha))
+		return QLA_FUNCTION_FAILED;
+
+	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1155,
+	    "Entered %s.\n", __func__);
+
+	str = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &str_dma);
+	if (!str) {
+		ql_log(ql_log_warn, vha, 0x1156,
+		    "Failed to allocate driver version param.\n");
+		return QLA_MEMORY_ALLOC_FAILED;
+	}
+
+	memcpy(str, "\x7\x3\x11\x0", 4);
+	dwlen = str[0];
+	len = dwlen * sizeof(uint32_t) - 4;
+	memset(str + 4, 0, len);
+	if (len > strlen(version))
+		len = strlen(version);
+	memcpy(str + 4, version, len);
+
+	mcp->mb[0] = MBC_SET_RNID_PARAMS;
+	mcp->mb[1] = RNID_TYPE_SET_VERSION << 8 | dwlen;
+	mcp->mb[2] = MSW(LSD(str_dma));
+	mcp->mb[3] = LSW(LSD(str_dma));
+	mcp->mb[6] = MSW(MSD(str_dma));
+	mcp->mb[7] = LSW(MSD(str_dma));
+	mcp->out_mb = MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
+	mcp->in_mb = MBX_0;
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0x1157,
+		    "Failed=%x mb[0]=%x.\n", rval, mcp->mb[0]);
+	} else {
+		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1158,
+		    "Done %s.\n", __func__);
+	}
+
+	dma_pool_free(ha->s_dma_pool, str, str_dma);
+
+	return rval;
+}
+
+static int
+qla2x00_read_asic_temperature(scsi_qla_host_t *vha, uint16_t *temp)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_FWI2_CAPABLE(vha->hw))
+		return QLA_FUNCTION_FAILED;
+
+	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1159,
+	    "Entered %s.\n", __func__);
+
+	mcp->mb[0] = MBC_GET_RNID_PARAMS;
+	mcp->mb[1] = RNID_TYPE_ASIC_TEMP << 8;
+	mcp->out_mb = MBX_1|MBX_0;
+	mcp->in_mb = MBX_1|MBX_0;
+	mcp->tov = MBX_TOV_SECONDS;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(vha, mcp);
+	*temp = mcp->mb[1];
+
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0x115a,
+		    "Failed=%x mb[0]=%x,%x.\n", rval, mcp->mb[0], mcp->mb[1]);
+	} else {
+		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x115b,
+		    "Done %s.\n", __func__);
+	}
+
+	return rval;
+}
+
+int
 qla2x00_read_sfp(scsi_qla_host_t *vha, dma_addr_t sfp_dma, uint8_t *sfp,
 	uint16_t dev, uint16_t off, uint16_t len, uint16_t opt)
 {
@@ -4415,38 +4559,45 @@ qla24xx_set_fcp_prio(scsi_qla_host_t *vha, uint16_t loop_id, uint16_t priority,
 }
 
 int
-qla2x00_get_thermal_temp(scsi_qla_host_t *vha, uint16_t *temp, uint16_t *frac)
+qla2x00_get_thermal_temp(scsi_qla_host_t *vha, uint16_t *temp)
 {
-	int rval;
-	uint8_t byte;
+	int rval = QLA_FUNCTION_FAILED;
 	struct qla_hw_data *ha = vha->hw;
+	uint8_t byte;
 
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10ca,
 	    "Entered %s.\n", __func__);
 
-	/* Integer part */
-	rval = qla2x00_read_sfp(vha, 0, &byte, 0x98, 0x01, 1,
-		BIT_13|BIT_12|BIT_0);
-	if (rval != QLA_SUCCESS) {
-		ql_dbg(ql_dbg_mbx, vha, 0x10c9, "Failed=%x.\n", rval);
-		ha->flags.thermal_supported = 0;
-		goto fail;
+	if (ha->thermal_support & THERMAL_SUPPORT_I2C) {
+		rval = qla2x00_read_sfp(vha, 0, &byte,
+		    0x98, 0x1, 1, BIT_13|BIT_12|BIT_0);
+		*temp = byte;
+		if (rval == QLA_SUCCESS)
+			goto done;
+
+		ql_log(ql_log_warn, vha, 0x10c9,
+		    "Thermal not supported by I2C.\n");
+		ha->thermal_support &= ~THERMAL_SUPPORT_I2C;
 	}
-	*temp = byte;
 
-	/* Fraction part */
-	rval = qla2x00_read_sfp(vha, 0, &byte, 0x98, 0x10, 1,
-		BIT_13|BIT_12|BIT_0);
-	if (rval != QLA_SUCCESS) {
-		ql_dbg(ql_dbg_mbx, vha, 0x1019, "Failed=%x.\n", rval);
-		ha->flags.thermal_supported = 0;
-		goto fail;
+	if (ha->thermal_support & THERMAL_SUPPORT_ISP) {
+		rval = qla2x00_read_asic_temperature(vha, temp);
+		if (rval == QLA_SUCCESS)
+			goto done;
+
+		ql_log(ql_log_warn, vha, 0x1019,
+		    "Thermal not supported by ISP.\n");
+		ha->thermal_support &= ~THERMAL_SUPPORT_ISP;
 	}
-	*frac = (byte >> 6) * 25;
 
+	ql_log(ql_log_warn, vha, 0x1150,
+	    "Thermal not supported by this card "
+	    "(ignoring further requests).\n");
+	return  rval;
+
+done:
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1018,
 	    "Done %s.\n", __func__);
-fail:
 	return rval;
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 20fd974f903a..f868a9f98afe 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -523,6 +523,7 @@ qla25xx_free_req_que(struct scsi_qla_host *vha, struct req_que *req)
 		clear_bit(que_id, ha->req_qid_map);
 		mutex_unlock(&ha->vport_lock);
 	}
+	kfree(req->outstanding_cmds);
 	kfree(req);
 	req = NULL;
 }
@@ -649,6 +650,10 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options,
 		goto que_failed;
 	}
 
+	ret = qla2x00_alloc_outstanding_cmds(ha, req);
+	if (ret != QLA_SUCCESS)
+		goto que_failed;
+
 	mutex_lock(&ha->vport_lock);
 	que_id = find_first_zero_bit(ha->req_qid_map, ha->max_req_queues);
 	if (que_id >= ha->max_req_queues) {
@@ -685,7 +690,7 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options,
 	    "options=0x%x.\n", req->options);
 	ql_dbg(ql_dbg_init, base_vha, 0x00dd,
 	    "options=0x%x.\n", req->options);
-	for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++)
+	for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++)
 		req->outstanding_cmds[cnt] = NULL;
 	req->current_outstanding_cmd = 1;
 
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 3e3f593bada3..10754f518303 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -847,14 +847,21 @@ static int
 qla82xx_rom_lock(struct qla_hw_data *ha)
 {
 	int done = 0, timeout = 0;
+	uint32_t lock_owner = 0;
+	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
 
 	while (!done) {
 		/* acquire semaphore2 from PCI HW block */
 		done = qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_LOCK));
 		if (done == 1)
 			break;
-		if (timeout >= qla82xx_rom_lock_timeout)
+		if (timeout >= qla82xx_rom_lock_timeout) {
+			lock_owner = qla82xx_rd_32(ha, QLA82XX_ROM_LOCK_ID);
+			ql_dbg(ql_dbg_p3p, vha, 0xb085,
+			    "Failed to acquire rom lock, acquired by %d.\n",
+			    lock_owner);
 			return -1;
+		}
 		timeout++;
 	}
 	qla82xx_wr_32(ha, QLA82XX_ROM_LOCK_ID, ROM_LOCK_DRIVER);
@@ -3629,7 +3636,7 @@ qla82xx_chip_reset_cleanup(scsi_qla_host_t *vha)
 			req = ha->req_q_map[que];
 			if (!req)
 				continue;
-			for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
+			for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
 				sp = req->outstanding_cmds[cnt];
 				if (sp) {
 					if (!sp->u.scmd.ctx ||
diff --git a/drivers/scsi/qla2xxx/qla_nx.h b/drivers/scsi/qla2xxx/qla_nx.h
index 6c953e8c08f0..d268e8406fdb 100644
--- a/drivers/scsi/qla2xxx/qla_nx.h
+++ b/drivers/scsi/qla2xxx/qla_nx.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -897,7 +897,7 @@ struct ct6_dsd {
 #define FLT_REG_BOOT_CODE_82XX	0x78
 #define FLT_REG_FW_82XX		0x74
 #define FLT_REG_GOLD_FW_82XX	0x75
-#define FLT_REG_VPD_82XX	0x81
+#define FLT_REG_VPD_8XXX	0x81
 
 #define	FA_VPD_SIZE_82XX	0x400
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 10d23f8b7036..2c6dd3dfe0f4 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -111,8 +111,7 @@ MODULE_PARM_DESC(ql2xfdmienable,
 		"Enables FDMI registrations. "
 		"0 - no FDMI. Default is 1 - perform FDMI.");
 
-#define MAX_Q_DEPTH    32
-static int ql2xmaxqdepth = MAX_Q_DEPTH;
+int ql2xmaxqdepth = MAX_Q_DEPTH;
 module_param(ql2xmaxqdepth, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(ql2xmaxqdepth,
 		"Maximum queue depth to set for each LUN. "
@@ -360,6 +359,9 @@ static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
 		(req->length + 1) * sizeof(request_t),
 		req->ring, req->dma);
 
+	if (req)
+		kfree(req->outstanding_cmds);
+
 	kfree(req);
 	req = NULL;
 }
@@ -628,7 +630,7 @@ qla2x00_sp_free_dma(void *vha, void *ptr)
 	}
 
 	CMD_SP(cmd) = NULL;
-	mempool_free(sp, ha->srb_mempool);
+	qla2x00_rel_sp(sp->fcport->vha, sp);
 }
 
 static void
@@ -716,9 +718,11 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		goto qc24_target_busy;
 	}
 
-	sp = qla2x00_get_sp(base_vha, fcport, GFP_ATOMIC);
-	if (!sp)
+	sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
+	if (!sp) {
+		set_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags);
 		goto qc24_host_busy;
+	}
 
 	sp->u.scmd.cmd = cmd;
 	sp->type = SRB_SCSI_CMD;
@@ -731,6 +735,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	if (rval != QLA_SUCCESS) {
 		ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x3013,
 		    "Start scsi failed rval=%d for cmd=%p.\n", rval, cmd);
+		set_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags);
 		goto qc24_host_busy_free_sp;
 	}
 
@@ -1010,7 +1015,7 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	req = vha->req;
 	for (cnt = 1; status == QLA_SUCCESS &&
-		cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
+		cnt < req->num_outstanding_cmds; cnt++) {
 		sp = req->outstanding_cmds[cnt];
 		if (!sp)
 			continue;
@@ -1300,14 +1305,14 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
 	}
 
 	if (ha->flags.enable_lip_full_login && !IS_CNA_CAPABLE(ha)) {
+		atomic_set(&vha->loop_state, LOOP_DOWN);
+		atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
+		qla2x00_mark_all_devices_lost(vha, 0);
 		ret = qla2x00_full_login_lip(vha);
 		if (ret != QLA_SUCCESS) {
 			ql_dbg(ql_dbg_taskm, vha, 0x802d,
 			    "full_login_lip=%d.\n", ret);
 		}
-		atomic_set(&vha->loop_state, LOOP_DOWN);
-		atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
-		qla2x00_mark_all_devices_lost(vha, 0);
 	}
 
 	if (ha->flags.enable_lip_reset) {
@@ -1337,7 +1342,9 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 		req = ha->req_q_map[que];
 		if (!req)
 			continue;
-		for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
+		if (!req->outstanding_cmds)
+			continue;
+		for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
 			sp = req->outstanding_cmds[cnt];
 			if (sp) {
 				req->outstanding_cmds[cnt] = NULL;
@@ -1453,6 +1460,81 @@ qla2x00_change_queue_type(struct scsi_device *sdev, int tag_type)
 	return tag_type;
 }
 
+static void
+qla2x00_host_ramp_down_queuedepth(scsi_qla_host_t *vha)
+{
+	scsi_qla_host_t *vp;
+	struct Scsi_Host *shost;
+	struct scsi_device *sdev;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
+
+	ha->host_last_rampdown_time = jiffies;
+
+	if (ha->cfg_lun_q_depth <= vha->host->cmd_per_lun)
+		return;
+
+	if ((ha->cfg_lun_q_depth / 2) < vha->host->cmd_per_lun)
+		ha->cfg_lun_q_depth = vha->host->cmd_per_lun;
+	else
+		ha->cfg_lun_q_depth = ha->cfg_lun_q_depth / 2;
+
+	/*
+	 * Geometrically ramp down the queue depth for all devices on this
+	 * adapter
+	 */
+	spin_lock_irqsave(&ha->vport_slock, flags);
+	list_for_each_entry(vp, &ha->vp_list, list) {
+		shost = vp->host;
+		shost_for_each_device(sdev, shost) {
+			if (sdev->queue_depth > shost->cmd_per_lun) {
+				if (sdev->queue_depth < ha->cfg_lun_q_depth)
+					continue;
+				ql_log(ql_log_warn, vp, 0x3031,
+				    "%ld:%d:%d: Ramping down queue depth to %d",
+				    vp->host_no, sdev->id, sdev->lun,
+				    ha->cfg_lun_q_depth);
+				qla2x00_change_queue_depth(sdev,
+				    ha->cfg_lun_q_depth, SCSI_QDEPTH_DEFAULT);
+			}
+		}
+	}
+	spin_unlock_irqrestore(&ha->vport_slock, flags);
+
+	return;
+}
+
+static void
+qla2x00_host_ramp_up_queuedepth(scsi_qla_host_t *vha)
+{
+	scsi_qla_host_t *vp;
+	struct Scsi_Host *shost;
+	struct scsi_device *sdev;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
+
+	ha->host_last_rampup_time = jiffies;
+	ha->cfg_lun_q_depth++;
+
+	/*
+	 * Linearly ramp up the queue depth for all devices on this
+	 * adapter
+	 */
+	spin_lock_irqsave(&ha->vport_slock, flags);
+	list_for_each_entry(vp, &ha->vp_list, list) {
+		shost = vp->host;
+		shost_for_each_device(sdev, shost) {
+			if (sdev->queue_depth > ha->cfg_lun_q_depth)
+				continue;
+			qla2x00_change_queue_depth(sdev, ha->cfg_lun_q_depth,
+			    SCSI_QDEPTH_RAMP_UP);
+		}
+	}
+	spin_unlock_irqrestore(&ha->vport_slock, flags);
+
+	return;
+}
+
 /**
  * qla2x00_config_dma_addressing() - Configure OS DMA addressing method.
  * @ha: HA context
@@ -1730,6 +1812,9 @@ qla83xx_iospace_config(struct qla_hw_data *ha)
 
 mqiobase_exit:
 	ha->msix_count = ha->max_rsp_queues + 1;
+
+	qlt_83xx_iospace_config(ha);
+
 	ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011f,
 	    "MSIX Count:%d.\n", ha->msix_count);
 	return 0;
@@ -2230,6 +2315,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	ha->init_cb_size = sizeof(init_cb_t);
 	ha->link_data_rate = PORT_SPEED_UNKNOWN;
 	ha->optrom_size = OPTROM_SIZE_2300;
+	ha->cfg_lun_q_depth = ql2xmaxqdepth;
 
 	/* Assign ISP specific operations. */
 	if (IS_QLA2100(ha)) {
@@ -2307,6 +2393,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		ha->mbx_count = MAILBOX_REGISTER_COUNT;
 		req_length = REQUEST_ENTRY_CNT_24XX;
 		rsp_length = RESPONSE_ENTRY_CNT_2300;
+		ha->tgt.atio_q_length = ATIO_ENTRY_CNT_24XX;
 		ha->max_loop_id = SNS_LAST_LOOP_ID_2300;
 		ha->init_cb_size = sizeof(struct mid_init_cb_81xx);
 		ha->gid_list_info_size = 8;
@@ -2338,6 +2425,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		ha->mbx_count = MAILBOX_REGISTER_COUNT;
 		req_length = REQUEST_ENTRY_CNT_24XX;
 		rsp_length = RESPONSE_ENTRY_CNT_2300;
+		ha->tgt.atio_q_length = ATIO_ENTRY_CNT_24XX;
 		ha->max_loop_id = SNS_LAST_LOOP_ID_2300;
 		ha->init_cb_size = sizeof(struct mid_init_cb_81xx);
 		ha->gid_list_info_size = 8;
@@ -2377,6 +2465,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	complete(&ha->mbx_cmd_comp);
 	init_completion(&ha->mbx_intr_comp);
 	init_completion(&ha->dcbx_comp);
+	init_completion(&ha->lb_portup_comp);
 
 	set_bit(0, (unsigned long *) ha->vp_idx_map);
 
@@ -2720,6 +2809,9 @@ qla2x00_shutdown(struct pci_dev *pdev)
 	scsi_qla_host_t *vha;
 	struct qla_hw_data  *ha;
 
+	if (!atomic_read(&pdev->enable_cnt))
+		return;
+
 	vha = pci_get_drvdata(pdev);
 	ha = vha->hw;
 
@@ -3974,6 +4066,8 @@ qla83xx_force_lock_recovery(scsi_qla_host_t *base_vha)
 	uint32_t idc_lck_rcvry_stage_mask = 0x3;
 	uint32_t idc_lck_rcvry_owner_mask = 0x3c;
 	struct qla_hw_data *ha = base_vha->hw;
+	ql_dbg(ql_dbg_p3p, base_vha, 0xb086,
+	    "Trying force recovery of the IDC lock.\n");
 
 	rval = qla83xx_rd_reg(base_vha, QLA83XX_IDC_LOCK_RECOVERY, &data);
 	if (rval)
@@ -4065,6 +4159,7 @@ qla83xx_idc_lock(scsi_qla_host_t *base_vha, uint16_t requester_id)
 {
 	uint16_t options = (requester_id << 15) | BIT_6;
 	uint32_t data;
+	uint32_t lock_owner;
 	struct qla_hw_data *ha = base_vha->hw;
 
 	/* IDC-lock implementation using driver-lock/lock-id remote registers */
@@ -4076,8 +4171,11 @@ retry_lock:
 			qla83xx_wr_reg(base_vha, QLA83XX_DRIVER_LOCKID,
 			    ha->portnum);
 		} else {
+			qla83xx_rd_reg(base_vha, QLA83XX_DRIVER_LOCKID,
+			    &lock_owner);
 			ql_dbg(ql_dbg_p3p, base_vha, 0xb063,
-			    "Failed to acquire IDC lock. retrying...\n");
+			    "Failed to acquire IDC lock, acquired by %d, "
+			    "retrying...\n", lock_owner);
 
 			/* Retry/Perform IDC-Lock recovery */
 			if (qla83xx_idc_lock_recovery(base_vha)
@@ -4605,6 +4703,18 @@ qla2x00_do_dpc(void *data)
 			qla2xxx_flash_npiv_conf(base_vha);
 		}
 
+		if (test_and_clear_bit(HOST_RAMP_DOWN_QUEUE_DEPTH,
+		    &base_vha->dpc_flags)) {
+			/* Prevents simultaneous ramp up and down */
+			clear_bit(HOST_RAMP_UP_QUEUE_DEPTH,
+			    &base_vha->dpc_flags);
+			qla2x00_host_ramp_down_queuedepth(base_vha);
+		}
+
+		if (test_and_clear_bit(HOST_RAMP_UP_QUEUE_DEPTH,
+		    &base_vha->dpc_flags))
+			qla2x00_host_ramp_up_queuedepth(base_vha);
+
 		if (!ha->interrupts_on)
 			ha->isp_ops->enable_intrs(ha);
 
@@ -4733,7 +4843,7 @@ qla2x00_timer(scsi_qla_host_t *vha)
 				    cpu_flags);
 				req = ha->req_q_map[0];
 				for (index = 1;
-				    index < MAX_OUTSTANDING_COMMANDS;
+				    index < req->num_outstanding_cmds;
 				    index++) {
 					fc_port_t *sfcp;
 
@@ -4802,7 +4912,9 @@ qla2x00_timer(scsi_qla_host_t *vha)
 	    test_bit(ISP_UNRECOVERABLE, &vha->dpc_flags) ||
 	    test_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags) ||
 	    test_bit(VP_DPC_NEEDED, &vha->dpc_flags) ||
-	    test_bit(RELOGIN_NEEDED, &vha->dpc_flags))) {
+	    test_bit(RELOGIN_NEEDED, &vha->dpc_flags) ||
+	    test_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags) ||
+	    test_bit(HOST_RAMP_UP_QUEUE_DEPTH, &vha->dpc_flags))) {
 		ql_dbg(ql_dbg_timer, vha, 0x600b,
 		    "isp_abort_needed=%d loop_resync_needed=%d "
 		    "fcport_update_needed=%d start_dpc=%d "
@@ -4815,12 +4927,15 @@ qla2x00_timer(scsi_qla_host_t *vha)
 		ql_dbg(ql_dbg_timer, vha, 0x600c,
 		    "beacon_blink_needed=%d isp_unrecoverable=%d "
 		    "fcoe_ctx_reset_needed=%d vp_dpc_needed=%d "
-		    "relogin_needed=%d.\n",
+		    "relogin_needed=%d, host_ramp_down_needed=%d "
+		    "host_ramp_up_needed=%d.\n",
 		    test_bit(BEACON_BLINK_NEEDED, &vha->dpc_flags),
 		    test_bit(ISP_UNRECOVERABLE, &vha->dpc_flags),
 		    test_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags),
 		    test_bit(VP_DPC_NEEDED, &vha->dpc_flags),
-		    test_bit(RELOGIN_NEEDED, &vha->dpc_flags));
+		    test_bit(RELOGIN_NEEDED, &vha->dpc_flags),
+		    test_bit(HOST_RAMP_UP_QUEUE_DEPTH, &vha->dpc_flags),
+		    test_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags));
 		qla2xxx_wake_dpc(vha);
 	}
 
diff --git a/drivers/scsi/qla2xxx/qla_settings.h b/drivers/scsi/qla2xxx/qla_settings.h
index 892a81e457bc..46ef0ac48f44 100644
--- a/drivers/scsi/qla2xxx/qla_settings.h
+++ b/drivers/scsi/qla2xxx/qla_settings.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 32fdc2a66dd1..3bef6736d885 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
@@ -798,20 +798,8 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr)
 		case FLT_REG_BOOTLOAD_82XX:
 			ha->flt_region_bootload = start;
 			break;
-		case FLT_REG_VPD_82XX:
-			ha->flt_region_vpd = start;
-			break;
-		case FLT_REG_FCOE_VPD_0:
-			if (!IS_QLA8031(ha))
-				break;
-			ha->flt_region_vpd_nvram = start;
-			if (ha->flags.port0)
-				ha->flt_region_vpd = start;
-			break;
-		case FLT_REG_FCOE_VPD_1:
-			if (!IS_QLA8031(ha))
-				break;
-			if (!ha->flags.port0)
+		case FLT_REG_VPD_8XXX:
+			if (IS_CNA_CAPABLE(ha))
 				ha->flt_region_vpd = start;
 			break;
 		case FLT_REG_FCOE_NVRAM_0:
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 80f4b849e2b0..61b5d8c2b5da 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -52,7 +52,7 @@ MODULE_PARM_DESC(qlini_mode,
 	"\"disabled\" - initiator mode will never be enabled; "
 	"\"enabled\" (default) - initiator mode will always stay enabled.");
 
-static int ql2x_ini_mode = QLA2XXX_INI_MODE_EXCLUSIVE;
+int ql2x_ini_mode = QLA2XXX_INI_MODE_EXCLUSIVE;
 
 /*
  * From scsi/fc/fc_fcp.h
@@ -1119,6 +1119,7 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
 	nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id;
 	nack->u.isp24.status = ntfy->u.isp24.status;
 	nack->u.isp24.status_subcode = ntfy->u.isp24.status_subcode;
+	nack->u.isp24.fw_handle = ntfy->u.isp24.fw_handle;
 	nack->u.isp24.exchange_address = ntfy->u.isp24.exchange_address;
 	nack->u.isp24.srr_rel_offs = ntfy->u.isp24.srr_rel_offs;
 	nack->u.isp24.srr_ui = ntfy->u.isp24.srr_ui;
@@ -1570,7 +1571,7 @@ static inline uint32_t qlt_make_handle(struct scsi_qla_host *vha)
 	/* always increment cmd handle */
 	do {
 		++h;
-		if (h > MAX_OUTSTANDING_COMMANDS)
+		if (h > DEFAULT_OUTSTANDING_COMMANDS)
 			h = 1; /* 0 is QLA_TGT_NULL_HANDLE */
 		if (h == ha->tgt.current_handle) {
 			ql_dbg(ql_dbg_tgt, vha, 0xe04e,
@@ -2441,7 +2442,7 @@ static struct qla_tgt_cmd *qlt_ctio_to_cmd(struct scsi_qla_host *vha,
 			return NULL;
 		}
 		/* handle-1 is actually used */
-		if (unlikely(handle > MAX_OUTSTANDING_COMMANDS)) {
+		if (unlikely(handle > DEFAULT_OUTSTANDING_COMMANDS)) {
 			ql_dbg(ql_dbg_tgt, vha, 0xe052,
 			    "qla_target(%d): Wrong handle %x received\n",
 			    vha->vp_idx, handle);
@@ -4305,6 +4306,12 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 	if (!QLA_TGT_MODE_ENABLED())
 		return 0;
 
+	if (!IS_TGT_MODE_CAPABLE(ha)) {
+		ql_log(ql_log_warn, base_vha, 0xe070,
+		    "This adapter does not support target mode.\n");
+		return 0;
+	}
+
 	ql_dbg(ql_dbg_tgt, base_vha, 0xe03b,
 	    "Registering target for host %ld(%p)", base_vha->host_no, ha);
 
@@ -4666,7 +4673,6 @@ void
 qlt_24xx_process_atio_queue(struct scsi_qla_host *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
 	struct atio_from_isp *pkt;
 	int cnt, i;
 
@@ -4694,26 +4700,28 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha)
 	}
 
 	/* Adjust ring index */
-	WRT_REG_DWORD(&reg->atio_q_out, ha->tgt.atio_ring_index);
+	WRT_REG_DWORD(ISP_ATIO_Q_OUT(vha), ha->tgt.atio_ring_index);
 }
 
 void
-qlt_24xx_config_rings(struct scsi_qla_host *vha, device_reg_t __iomem *reg)
+qlt_24xx_config_rings(struct scsi_qla_host *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
+	if (!QLA_TGT_MODE_ENABLED())
+		return;
 
-/* FIXME: atio_q in/out for ha->mqenable=1..? */
-	if (ha->mqenable) {
-#if 0
-		WRT_REG_DWORD(&reg->isp25mq.atio_q_in, 0);
-		WRT_REG_DWORD(&reg->isp25mq.atio_q_out, 0);
-		RD_REG_DWORD(&reg->isp25mq.atio_q_out);
-#endif
-	} else {
-		/* Setup APTIO registers for target mode */
-		WRT_REG_DWORD(&reg->isp24.atio_q_in, 0);
-		WRT_REG_DWORD(&reg->isp24.atio_q_out, 0);
-		RD_REG_DWORD(&reg->isp24.atio_q_out);
+	WRT_REG_DWORD(ISP_ATIO_Q_IN(vha), 0);
+	WRT_REG_DWORD(ISP_ATIO_Q_OUT(vha), 0);
+	RD_REG_DWORD(ISP_ATIO_Q_OUT(vha));
+
+	if (IS_ATIO_MSIX_CAPABLE(ha)) {
+		struct qla_msix_entry *msix = &ha->msix_entries[2];
+		struct init_cb_24xx *icb = (struct init_cb_24xx *)ha->init_cb;
+
+		icb->msix_atio = cpu_to_le16(msix->entry);
+		ql_dbg(ql_dbg_init, vha, 0xf072,
+		    "Registering ICB vector 0x%x for atio que.\n",
+		    msix->entry);
 	}
 }
 
@@ -4796,6 +4804,101 @@ qlt_24xx_config_nvram_stage2(struct scsi_qla_host *vha,
 	}
 }
 
+void
+qlt_81xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_81xx *nv)
+{
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!QLA_TGT_MODE_ENABLED())
+		return;
+
+	if (qla_tgt_mode_enabled(vha)) {
+		if (!ha->tgt.saved_set) {
+			/* We save only once */
+			ha->tgt.saved_exchange_count = nv->exchange_count;
+			ha->tgt.saved_firmware_options_1 =
+			    nv->firmware_options_1;
+			ha->tgt.saved_firmware_options_2 =
+			    nv->firmware_options_2;
+			ha->tgt.saved_firmware_options_3 =
+			    nv->firmware_options_3;
+			ha->tgt.saved_set = 1;
+		}
+
+		nv->exchange_count = __constant_cpu_to_le16(0xFFFF);
+
+		/* Enable target mode */
+		nv->firmware_options_1 |= __constant_cpu_to_le32(BIT_4);
+
+		/* Disable ini mode, if requested */
+		if (!qla_ini_mode_enabled(vha))
+			nv->firmware_options_1 |=
+			    __constant_cpu_to_le32(BIT_5);
+
+		/* Disable Full Login after LIP */
+		nv->firmware_options_1 &= __constant_cpu_to_le32(~BIT_13);
+		/* Enable initial LIP */
+		nv->firmware_options_1 &= __constant_cpu_to_le32(~BIT_9);
+		/* Enable FC tapes support */
+		nv->firmware_options_2 |= __constant_cpu_to_le32(BIT_12);
+		/* Disable Full Login after LIP */
+		nv->host_p &= __constant_cpu_to_le32(~BIT_10);
+		/* Enable target PRLI control */
+		nv->firmware_options_2 |= __constant_cpu_to_le32(BIT_14);
+	} else {
+		if (ha->tgt.saved_set) {
+			nv->exchange_count = ha->tgt.saved_exchange_count;
+			nv->firmware_options_1 =
+			    ha->tgt.saved_firmware_options_1;
+			nv->firmware_options_2 =
+			    ha->tgt.saved_firmware_options_2;
+			nv->firmware_options_3 =
+			    ha->tgt.saved_firmware_options_3;
+		}
+		return;
+	}
+
+	/* out-of-order frames reassembly */
+	nv->firmware_options_3 |= BIT_6|BIT_9;
+
+	if (ha->tgt.enable_class_2) {
+		if (vha->flags.init_done)
+			fc_host_supported_classes(vha->host) =
+				FC_COS_CLASS2 | FC_COS_CLASS3;
+
+		nv->firmware_options_2 |= __constant_cpu_to_le32(BIT_8);
+	} else {
+		if (vha->flags.init_done)
+			fc_host_supported_classes(vha->host) = FC_COS_CLASS3;
+
+		nv->firmware_options_2 &= ~__constant_cpu_to_le32(BIT_8);
+	}
+}
+
+void
+qlt_81xx_config_nvram_stage2(struct scsi_qla_host *vha,
+	struct init_cb_81xx *icb)
+{
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!QLA_TGT_MODE_ENABLED())
+		return;
+
+	if (ha->tgt.node_name_set) {
+		memcpy(icb->node_name, ha->tgt.tgt_node_name, WWN_SIZE);
+		icb->firmware_options_1 |= __constant_cpu_to_le32(BIT_14);
+	}
+}
+
+void
+qlt_83xx_iospace_config(struct qla_hw_data *ha)
+{
+	if (!QLA_TGT_MODE_ENABLED())
+		return;
+
+	ha->msix_count += 1; /* For ATIO Q */
+}
+
 int
 qlt_24xx_process_response_error(struct scsi_qla_host *vha,
 	struct sts_entry_24xx *pkt)
@@ -4828,11 +4931,41 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 	if (!QLA_TGT_MODE_ENABLED())
 		return;
 
+	if  (ha->mqenable || IS_QLA83XX(ha)) {
+		ISP_ATIO_Q_IN(base_vha) = &ha->mqiobase->isp25mq.atio_q_in;
+		ISP_ATIO_Q_OUT(base_vha) = &ha->mqiobase->isp25mq.atio_q_out;
+	} else {
+		ISP_ATIO_Q_IN(base_vha) = &ha->iobase->isp24.atio_q_in;
+		ISP_ATIO_Q_OUT(base_vha) = &ha->iobase->isp24.atio_q_out;
+	}
+
 	mutex_init(&ha->tgt.tgt_mutex);
 	mutex_init(&ha->tgt.tgt_host_action_mutex);
 	qlt_clear_mode(base_vha);
 }
 
+irqreturn_t
+qla83xx_msix_atio_q(int irq, void *dev_id)
+{
+	struct rsp_que *rsp;
+	scsi_qla_host_t	*vha;
+	struct qla_hw_data *ha;
+	unsigned long flags;
+
+	rsp = (struct rsp_que *) dev_id;
+	ha = rsp->hw;
+	vha = pci_get_drvdata(ha->pdev);
+
+	spin_lock_irqsave(&ha->hardware_lock, flags);
+
+	qlt_24xx_process_atio_queue(vha);
+	qla24xx_process_response_queue(vha, rsp);
+
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
 int
 qlt_mem_alloc(struct qla_hw_data *ha)
 {
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index bad749561ec2..ff9ccb9fd036 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -60,8 +60,9 @@
  * multi-complete should come to the tgt driver or be handled there by qla2xxx
  */
 #define CTIO_COMPLETION_HANDLE_MARK	BIT_29
-#if (CTIO_COMPLETION_HANDLE_MARK <= MAX_OUTSTANDING_COMMANDS)
-#error "CTIO_COMPLETION_HANDLE_MARK not larger than MAX_OUTSTANDING_COMMANDS"
+#if (CTIO_COMPLETION_HANDLE_MARK <= DEFAULT_OUTSTANDING_COMMANDS)
+#error "CTIO_COMPLETION_HANDLE_MARK not larger than "
+	"DEFAULT_OUTSTANDING_COMMANDS"
 #endif
 #define HANDLE_IS_CTIO_COMP(h) (h & CTIO_COMPLETION_HANDLE_MARK)
 
@@ -161,7 +162,7 @@ struct imm_ntfy_from_isp {
 			uint16_t srr_rx_id;
 			uint16_t status;
 			uint8_t  status_subcode;
-			uint8_t  reserved_3;
+			uint8_t  fw_handle;
 			uint32_t exchange_address;
 			uint32_t srr_rel_offs;
 			uint16_t srr_ui;
@@ -217,7 +218,7 @@ struct nack_to_isp {
 			uint16_t srr_rx_id;
 			uint16_t status;
 			uint8_t  status_subcode;
-			uint8_t  reserved_3;
+			uint8_t  fw_handle;
 			uint32_t exchange_address;
 			uint32_t srr_rel_offs;
 			uint16_t srr_ui;
@@ -948,6 +949,7 @@ extern void qlt_update_vp_map(struct scsi_qla_host *, int);
  * is not set. Right now, ha value is ignored.
  */
 #define QLA_TGT_MODE_ENABLED() (ql2x_ini_mode != QLA2XXX_INI_MODE_ENABLED)
+extern int ql2x_ini_mode;
 
 static inline bool qla_tgt_mode_enabled(struct scsi_qla_host *ha)
 {
@@ -985,12 +987,15 @@ extern void qlt_vport_create(struct scsi_qla_host *, struct qla_hw_data *);
 extern void qlt_rff_id(struct scsi_qla_host *, struct ct_sns_req *);
 extern void qlt_init_atio_q_entries(struct scsi_qla_host *);
 extern void qlt_24xx_process_atio_queue(struct scsi_qla_host *);
-extern void qlt_24xx_config_rings(struct scsi_qla_host *,
-	device_reg_t __iomem *);
+extern void qlt_24xx_config_rings(struct scsi_qla_host *);
 extern void qlt_24xx_config_nvram_stage1(struct scsi_qla_host *,
 	struct nvram_24xx *);
 extern void qlt_24xx_config_nvram_stage2(struct scsi_qla_host *,
 	struct init_cb_24xx *);
+extern void qlt_81xx_config_nvram_stage2(struct scsi_qla_host *,
+	struct init_cb_81xx *);
+extern void qlt_81xx_config_nvram_stage1(struct scsi_qla_host *,
+	struct nvram_81xx *);
 extern int qlt_24xx_process_response_error(struct scsi_qla_host *,
 	struct sts_entry_24xx *);
 extern void qlt_modify_vp_config(struct scsi_qla_host *,
@@ -1000,5 +1005,7 @@ extern int qlt_mem_alloc(struct qla_hw_data *);
 extern void qlt_mem_free(struct qla_hw_data *);
 extern void qlt_stop_phase1(struct qla_tgt *);
 extern void qlt_stop_phase2(struct qla_tgt *);
+extern irqreturn_t qla83xx_msix_atio_q(int, void *);
+extern void qlt_83xx_iospace_config(struct qla_hw_data *);
 
 #endif /* __QLA_TARGET_H */
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 49697ca41e78..2b6e478d9e33 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -1,6 +1,6 @@
 /*
  * QLogic Fibre Channel HBA Driver
- * Copyright (c)  2003-2012 QLogic Corporation
+ * Copyright (c)  2003-2013 QLogic Corporation
  *
  * See LICENSE.qla2xxx for copyright and licensing details.
  */
diff --git a/drivers/scsi/qla4xxx/ql4_83xx.c b/drivers/scsi/qla4xxx/ql4_83xx.c
index 6e9af20be12f..5d8fe4f75650 100644
--- a/drivers/scsi/qla4xxx/ql4_83xx.c
+++ b/drivers/scsi/qla4xxx/ql4_83xx.c
@@ -538,7 +538,7 @@ struct device_info {
 	int port_num;
 };
 
-static int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha)
+int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha)
 {
 	uint32_t drv_active;
 	uint32_t dev_part, dev_part1, dev_part2;
@@ -1351,31 +1351,58 @@ exit_start_fw:
 
 /*----------------------Interrupt Related functions ---------------------*/
 
-void qla4_83xx_disable_intrs(struct scsi_qla_host *ha)
+static void qla4_83xx_disable_iocb_intrs(struct scsi_qla_host *ha)
+{
+	if (test_and_clear_bit(AF_83XX_IOCB_INTR_ON, &ha->flags))
+		qla4_8xxx_intr_disable(ha);
+}
+
+static void qla4_83xx_disable_mbox_intrs(struct scsi_qla_host *ha)
 {
 	uint32_t mb_int, ret;
 
-	if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags))
-		qla4_8xxx_mbx_intr_disable(ha);
+	if (test_and_clear_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) {
+		ret = readl(&ha->qla4_83xx_reg->mbox_int);
+		mb_int = ret & ~INT_ENABLE_FW_MB;
+		writel(mb_int, &ha->qla4_83xx_reg->mbox_int);
+		writel(1, &ha->qla4_83xx_reg->leg_int_mask);
+	}
+}
 
-	ret = readl(&ha->qla4_83xx_reg->mbox_int);
-	mb_int = ret & ~INT_ENABLE_FW_MB;
-	writel(mb_int, &ha->qla4_83xx_reg->mbox_int);
-	writel(1, &ha->qla4_83xx_reg->leg_int_mask);
+void qla4_83xx_disable_intrs(struct scsi_qla_host *ha)
+{
+	qla4_83xx_disable_mbox_intrs(ha);
+	qla4_83xx_disable_iocb_intrs(ha);
 }
 
-void qla4_83xx_enable_intrs(struct scsi_qla_host *ha)
+static void qla4_83xx_enable_iocb_intrs(struct scsi_qla_host *ha)
+{
+	if (!test_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) {
+		qla4_8xxx_intr_enable(ha);
+		set_bit(AF_83XX_IOCB_INTR_ON, &ha->flags);
+	}
+}
+
+void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha)
 {
 	uint32_t mb_int;
 
-	qla4_8xxx_mbx_intr_enable(ha);
-	mb_int = INT_ENABLE_FW_MB;
-	writel(mb_int, &ha->qla4_83xx_reg->mbox_int);
-	writel(0, &ha->qla4_83xx_reg->leg_int_mask);
+	if (!test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) {
+		mb_int = INT_ENABLE_FW_MB;
+		writel(mb_int, &ha->qla4_83xx_reg->mbox_int);
+		writel(0, &ha->qla4_83xx_reg->leg_int_mask);
+		set_bit(AF_83XX_MBOX_INTR_ON, &ha->flags);
+	}
+}
 
-	set_bit(AF_INTERRUPTS_ON, &ha->flags);
+
+void qla4_83xx_enable_intrs(struct scsi_qla_host *ha)
+{
+	qla4_83xx_enable_mbox_intrs(ha);
+	qla4_83xx_enable_iocb_intrs(ha);
 }
 
+
 void qla4_83xx_queue_mbox_cmd(struct scsi_qla_host *ha, uint32_t *mbx_cmd,
 			      int incount)
 {
diff --git a/drivers/scsi/qla4xxx/ql4_attr.c b/drivers/scsi/qla4xxx/ql4_attr.c
index 76819b71ada7..19ee55a6226c 100644
--- a/drivers/scsi/qla4xxx/ql4_attr.c
+++ b/drivers/scsi/qla4xxx/ql4_attr.c
@@ -74,16 +74,22 @@ qla4_8xxx_sysfs_write_fw_dump(struct file *filep, struct kobject *kobj,
 		}
 		break;
 	case 2:
-		/* Reset HBA */
+		/* Reset HBA and collect FW dump */
 		ha->isp_ops->idc_lock(ha);
 		dev_state = qla4_8xxx_rd_direct(ha, QLA8XXX_CRB_DEV_STATE);
 		if (dev_state == QLA8XXX_DEV_READY) {
-			ql4_printk(KERN_INFO, ha,
-				   "%s: Setting Need reset, reset_owner is 0x%x.\n",
-				   __func__, ha->func_num);
+			ql4_printk(KERN_INFO, ha, "%s: Setting Need reset\n",
+				   __func__);
 			qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DEV_STATE,
 					    QLA8XXX_DEV_NEED_RESET);
-			set_bit(AF_8XXX_RST_OWNER, &ha->flags);
+			if (is_qla8022(ha) ||
+			    (is_qla8032(ha) &&
+			     qla4_83xx_can_perform_reset(ha))) {
+				set_bit(AF_8XXX_RST_OWNER, &ha->flags);
+				set_bit(AF_FW_RECOVERY, &ha->flags);
+				ql4_printk(KERN_INFO, ha, "%s: Reset owner is 0x%x\n",
+					   __func__, ha->func_num);
+			}
 		} else
 			ql4_printk(KERN_INFO, ha,
 				   "%s: Reset not performed as device state is 0x%x\n",
diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h
index 329d553eae94..129f5dd02822 100644
--- a/drivers/scsi/qla4xxx/ql4_def.h
+++ b/drivers/scsi/qla4xxx/ql4_def.h
@@ -136,6 +136,7 @@
 #define RESPONSE_QUEUE_DEPTH		64
 #define QUEUE_SIZE			64
 #define DMA_BUFFER_SIZE			512
+#define IOCB_HIWAT_CUSHION		4
 
 /*
  * Misc
@@ -180,6 +181,7 @@
 #define DISABLE_ACB_TOV			30
 #define IP_CONFIG_TOV			30
 #define LOGIN_TOV			12
+#define BOOT_LOGIN_RESP_TOV		60
 
 #define MAX_RESET_HA_RETRIES		2
 #define FW_ALIVE_WAIT_TOV		3
@@ -314,6 +316,7 @@ struct ql4_tuple_ddb {
  * DDB flags.
  */
 #define DF_RELOGIN		0	/* Relogin to device */
+#define DF_BOOT_TGT		1	/* Boot target entry */
 #define DF_ISNS_DISCOVERED	2	/* Device was discovered via iSNS */
 #define DF_FO_MASKED		3
 
@@ -501,6 +504,7 @@ struct scsi_qla_host {
 #define AF_INTERRUPTS_ON		6 /* 0x00000040 */
 #define AF_GET_CRASH_RECORD		7 /* 0x00000080 */
 #define AF_LINK_UP			8 /* 0x00000100 */
+#define AF_LOOPBACK			9 /* 0x00000200 */
 #define AF_IRQ_ATTACHED			10 /* 0x00000400 */
 #define AF_DISABLE_ACB_COMPLETE		11 /* 0x00000800 */
 #define AF_HA_REMOVAL			12 /* 0x00001000 */
@@ -516,6 +520,8 @@ struct scsi_qla_host {
 #define AF_8XXX_RST_OWNER		25 /* 0x02000000 */
 #define AF_82XX_DUMP_READING		26 /* 0x04000000 */
 #define AF_83XX_NO_FW_DUMP		27 /* 0x08000000 */
+#define AF_83XX_IOCB_INTR_ON		28 /* 0x10000000 */
+#define AF_83XX_MBOX_INTR_ON		29 /* 0x20000000 */
 
 	unsigned long dpc_flags;
 
@@ -537,6 +543,7 @@ struct scsi_qla_host {
 	uint32_t tot_ddbs;
 
 	uint16_t iocb_cnt;
+	uint16_t iocb_hiwat;
 
 	/* SRB cache. */
 #define SRB_MIN_REQ	128
@@ -838,7 +845,8 @@ static inline int is_aer_supported(struct scsi_qla_host *ha)
 static inline int adapter_up(struct scsi_qla_host *ha)
 {
 	return (test_bit(AF_ONLINE, &ha->flags) != 0) &&
-		(test_bit(AF_LINK_UP, &ha->flags) != 0);
+	       (test_bit(AF_LINK_UP, &ha->flags) != 0) &&
+	       (!test_bit(AF_LOOPBACK, &ha->flags));
 }
 
 static inline struct scsi_qla_host* to_qla_host(struct Scsi_Host *shost)
diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
index 1c4795020357..ad9d2e2d370f 100644
--- a/drivers/scsi/qla4xxx/ql4_fw.h
+++ b/drivers/scsi/qla4xxx/ql4_fw.h
@@ -495,7 +495,7 @@ struct qla_flt_region {
 #define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED	0x802D
 #define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD		0x802E
 #define MBOX_ASTS_IDC_COMPLETE			0x8100
-#define MBOX_ASTS_IDC_NOTIFY			0x8101
+#define MBOX_ASTS_IDC_REQUEST_NOTIFICATION	0x8101
 #define MBOX_ASTS_TXSCVR_INSERTED		0x8130
 #define MBOX_ASTS_TXSCVR_REMOVED		0x8131
 
@@ -522,6 +522,10 @@ struct qla_flt_region {
 #define FLASH_OPT_COMMIT	2
 #define FLASH_OPT_RMW_COMMIT	3
 
+/* Loopback type */
+#define ENABLE_INTERNAL_LOOPBACK	0x04
+#define ENABLE_EXTERNAL_LOOPBACK	0x08
+
 /*************************************************************************/
 
 /* Host Adapter Initialization Control Block (from host) */
diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h
index 57a5a3cf5770..982293edf02c 100644
--- a/drivers/scsi/qla4xxx/ql4_glbl.h
+++ b/drivers/scsi/qla4xxx/ql4_glbl.h
@@ -253,12 +253,14 @@ void qla4_8xxx_set_rst_ready(struct scsi_qla_host *ha);
 void qla4_8xxx_clear_rst_ready(struct scsi_qla_host *ha);
 int qla4_8xxx_device_bootstrap(struct scsi_qla_host *ha);
 void qla4_8xxx_get_minidump(struct scsi_qla_host *ha);
-int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha);
-int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha);
+int qla4_8xxx_intr_disable(struct scsi_qla_host *ha);
+int qla4_8xxx_intr_enable(struct scsi_qla_host *ha);
 int qla4_8xxx_set_param(struct scsi_qla_host *ha, int param);
 int qla4_8xxx_update_idc_reg(struct scsi_qla_host *ha);
 int qla4_83xx_post_idc_ack(struct scsi_qla_host *ha);
 void qla4_83xx_disable_pause(struct scsi_qla_host *ha);
+void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha);
+int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha);
 
 extern int ql4xextended_error_logging;
 extern int ql4xdontresethba;
diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c
index 1aca1b4f70b8..8fc8548ba4ba 100644
--- a/drivers/scsi/qla4xxx/ql4_init.c
+++ b/drivers/scsi/qla4xxx/ql4_init.c
@@ -195,12 +195,10 @@ exit_get_sys_info_no_free:
  * @ha: pointer to host adapter structure.
  *
  **/
-static int qla4xxx_init_local_data(struct scsi_qla_host *ha)
+static void qla4xxx_init_local_data(struct scsi_qla_host *ha)
 {
 	/* Initialize aen queue */
 	ha->aen_q_count = MAX_AEN_ENTRIES;
-
-	return qla4xxx_get_firmware_status(ha);
 }
 
 static uint8_t
@@ -935,14 +933,23 @@ int qla4xxx_initialize_adapter(struct scsi_qla_host *ha, int is_reset)
 	if (ha->isp_ops->start_firmware(ha) == QLA_ERROR)
 		goto exit_init_hba;
 
+	/*
+	 * For ISP83XX, mailbox and IOCB interrupts are enabled separately.
+	 * Mailbox interrupts must be enabled prior to issuing any mailbox
+	 * command in order to prevent the possibility of losing interrupts
+	 * while switching from polling to interrupt mode. IOCB interrupts are
+	 * enabled via isp_ops->enable_intrs.
+	 */
+	if (is_qla8032(ha))
+		qla4_83xx_enable_mbox_intrs(ha);
+
 	if (qla4xxx_about_firmware(ha) == QLA_ERROR)
 		goto exit_init_hba;
 
 	if (ha->isp_ops->get_sys_info(ha) == QLA_ERROR)
 		goto exit_init_hba;
 
-	if (qla4xxx_init_local_data(ha) == QLA_ERROR)
-		goto exit_init_hba;
+	qla4xxx_init_local_data(ha);
 
 	status = qla4xxx_init_firmware(ha);
 	if (status == QLA_ERROR)
diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c
index f48f37a281d1..14fec976f634 100644
--- a/drivers/scsi/qla4xxx/ql4_iocb.c
+++ b/drivers/scsi/qla4xxx/ql4_iocb.c
@@ -316,7 +316,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb)
 		goto queuing_error;
 
 	/* total iocbs active */
-	if ((ha->iocb_cnt + req_cnt) >= REQUEST_QUEUE_DEPTH)
+	if ((ha->iocb_cnt + req_cnt) >= ha->iocb_hiwat)
 		goto queuing_error;
 
 	/* Build command packet */
diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c
index 15ea81465ce4..1b83dc283d2e 100644
--- a/drivers/scsi/qla4xxx/ql4_isr.c
+++ b/drivers/scsi/qla4xxx/ql4_isr.c
@@ -582,6 +582,33 @@ exit_prq_error:
 }
 
 /**
+ * qla4_83xx_loopback_in_progress: Is loopback in progress?
+ * @ha: Pointer to host adapter structure.
+ * @ret: 1 = loopback in progress, 0 = loopback not in progress
+ **/
+static int qla4_83xx_loopback_in_progress(struct scsi_qla_host *ha)
+{
+	int rval = 1;
+
+	if (is_qla8032(ha)) {
+		if ((ha->idc_info.info2 & ENABLE_INTERNAL_LOOPBACK) ||
+		    (ha->idc_info.info2 & ENABLE_EXTERNAL_LOOPBACK)) {
+			DEBUG2(ql4_printk(KERN_INFO, ha,
+					  "%s: Loopback diagnostics in progress\n",
+					  __func__));
+			rval = 1;
+		} else {
+			DEBUG2(ql4_printk(KERN_INFO, ha,
+					  "%s: Loopback diagnostics not in progress\n",
+					  __func__));
+			rval = 0;
+		}
+	}
+
+	return rval;
+}
+
+/**
  * qla4xxx_isr_decode_mailbox - decodes mailbox status
  * @ha: Pointer to host adapter structure.
  * @mailbox_status: Mailbox status.
@@ -676,8 +703,10 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha,
 
 		case MBOX_ASTS_LINK_DOWN:
 			clear_bit(AF_LINK_UP, &ha->flags);
-			if (test_bit(AF_INIT_DONE, &ha->flags))
+			if (test_bit(AF_INIT_DONE, &ha->flags)) {
 				set_bit(DPC_LINK_CHANGED, &ha->dpc_flags);
+				qla4xxx_wake_dpc(ha);
+			}
 
 			ql4_printk(KERN_INFO, ha, "%s: LINK DOWN\n", __func__);
 			qla4xxx_post_aen_work(ha, ISCSI_EVENT_LINKDOWN,
@@ -806,7 +835,7 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha,
 			    " removed\n",  ha->host_no, mbox_sts[0]));
 			break;
 
-		case MBOX_ASTS_IDC_NOTIFY:
+		case MBOX_ASTS_IDC_REQUEST_NOTIFICATION:
 		{
 			uint32_t opcode;
 			if (is_qla8032(ha)) {
@@ -840,6 +869,11 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha,
 				DEBUG2(ql4_printk(KERN_INFO, ha,
 						  "scsi:%ld: AEN %04x IDC Complete notification\n",
 						  ha->host_no, mbox_sts[0]));
+
+				if (qla4_83xx_loopback_in_progress(ha))
+					set_bit(AF_LOOPBACK, &ha->flags);
+				else
+					clear_bit(AF_LOOPBACK, &ha->flags);
 			}
 			break;
 
@@ -1124,17 +1158,18 @@ irqreturn_t qla4_83xx_intr_handler(int irq, void *dev_id)
 
 	/* Legacy interrupt is valid if bit31 of leg_int_ptr is set */
 	if (!(leg_int_ptr & LEG_INT_PTR_B31)) {
-		ql4_printk(KERN_ERR, ha,
-			   "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n",
-			   __func__);
+		DEBUG2(ql4_printk(KERN_ERR, ha,
+				  "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n",
+				  __func__));
 		return IRQ_NONE;
 	}
 
 	/* Validate the PCIE function ID set in leg_int_ptr bits [19..16] */
 	if ((leg_int_ptr & PF_BITS_MASK) != ha->pf_bit) {
-		ql4_printk(KERN_ERR, ha,
-			   "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n",
-			   __func__, (leg_int_ptr & PF_BITS_MASK), ha->pf_bit);
+		DEBUG2(ql4_printk(KERN_ERR, ha,
+				  "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n",
+				  __func__, (leg_int_ptr & PF_BITS_MASK),
+				  ha->pf_bit));
 		return IRQ_NONE;
 	}
 
@@ -1437,11 +1472,14 @@ irq_not_attached:
 
 void qla4xxx_free_irqs(struct scsi_qla_host *ha)
 {
-	if (test_bit(AF_MSIX_ENABLED, &ha->flags))
-		qla4_8xxx_disable_msix(ha);
-	else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) {
-		free_irq(ha->pdev->irq, ha);
-		pci_disable_msi(ha->pdev);
-	} else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags))
-		free_irq(ha->pdev->irq, ha);
+	if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) {
+		if (test_bit(AF_MSIX_ENABLED, &ha->flags)) {
+			qla4_8xxx_disable_msix(ha);
+		} else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) {
+			free_irq(ha->pdev->irq, ha);
+			pci_disable_msi(ha->pdev);
+		} else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) {
+			free_irq(ha->pdev->irq, ha);
+		}
+	}
 }
diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c
index 3d41034191f0..160d33697216 100644
--- a/drivers/scsi/qla4xxx/ql4_mbx.c
+++ b/drivers/scsi/qla4xxx/ql4_mbx.c
@@ -44,6 +44,30 @@ void qla4xxx_process_mbox_intr(struct scsi_qla_host *ha, int out_count)
 }
 
 /**
+ * qla4xxx_is_intr_poll_mode – Are we allowed to poll for interrupts?
+ * @ha: Pointer to host adapter structure.
+ * @ret: 1=polling mode, 0=non-polling mode
+ **/
+static int qla4xxx_is_intr_poll_mode(struct scsi_qla_host *ha)
+{
+	int rval = 1;
+
+	if (is_qla8032(ha)) {
+		if (test_bit(AF_IRQ_ATTACHED, &ha->flags) &&
+		    test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags))
+			rval = 0;
+	} else {
+		if (test_bit(AF_IRQ_ATTACHED, &ha->flags) &&
+		    test_bit(AF_INTERRUPTS_ON, &ha->flags) &&
+		    test_bit(AF_ONLINE, &ha->flags) &&
+		    !test_bit(AF_HA_REMOVAL, &ha->flags))
+			rval = 0;
+	}
+
+	return rval;
+}
+
+/**
  * qla4xxx_mailbox_command - issues mailbox commands
  * @ha: Pointer to host adapter structure.
  * @inCount: number of mailbox registers to load.
@@ -153,33 +177,28 @@ int qla4xxx_mailbox_command(struct scsi_qla_host *ha, uint8_t inCount,
 	/*
 	 * Wait for completion: Poll or completion queue
 	 */
-	if (test_bit(AF_IRQ_ATTACHED, &ha->flags) &&
-	    test_bit(AF_INTERRUPTS_ON, &ha->flags) &&
-	    test_bit(AF_ONLINE, &ha->flags) &&
-	    !test_bit(AF_HA_REMOVAL, &ha->flags)) {
-		/* Do not poll for completion. Use completion queue */
-		set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags);
-		wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ);
-		clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags);
-	} else {
+	if (qla4xxx_is_intr_poll_mode(ha)) {
 		/* Poll for command to complete */
 		wait_count = jiffies + MBOX_TOV * HZ;
 		while (test_bit(AF_MBOX_COMMAND_DONE, &ha->flags) == 0) {
 			if (time_after_eq(jiffies, wait_count))
 				break;
-
 			/*
 			 * Service the interrupt.
 			 * The ISR will save the mailbox status registers
 			 * to a temporary storage location in the adapter
 			 * structure.
 			 */
-
 			spin_lock_irqsave(&ha->hardware_lock, flags);
 			ha->isp_ops->process_mailbox_interrupt(ha, outCount);
 			spin_unlock_irqrestore(&ha->hardware_lock, flags);
 			msleep(10);
 		}
+	} else {
+		/* Do not poll for completion. Use completion queue */
+		set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags);
+		wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ);
+		clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags);
 	}
 
 	/* Check for mailbox timeout. */
@@ -678,8 +697,24 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha)
 		return QLA_ERROR;
 	}
 
-	ql4_printk(KERN_INFO, ha, "%ld firmware IOCBs available (%d).\n",
-	    ha->host_no, mbox_sts[2]);
+	/* High-water mark of IOCBs */
+	ha->iocb_hiwat = mbox_sts[2];
+	DEBUG2(ql4_printk(KERN_INFO, ha,
+			  "%s: firmware IOCBs available = %d\n", __func__,
+			  ha->iocb_hiwat));
+
+	if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION)
+		ha->iocb_hiwat -= IOCB_HIWAT_CUSHION;
+
+	/* Ideally, we should not enter this code, as the # of firmware
+	 * IOCBs is hard-coded in the firmware. We set a default
+	 * iocb_hiwat here just in case */
+	if (ha->iocb_hiwat == 0) {
+		ha->iocb_hiwat = REQUEST_QUEUE_DEPTH / 4;
+		DEBUG2(ql4_printk(KERN_WARNING, ha,
+				  "%s: Setting IOCB's to = %d\n", __func__,
+				  ha->iocb_hiwat));
+	}
 
 	return QLA_SUCCESS;
 }
@@ -1385,10 +1420,8 @@ int qla4xxx_get_chap(struct scsi_qla_host *ha, char *username, char *password,
 	dma_addr_t chap_dma;
 
 	chap_table = dma_pool_alloc(ha->chap_dma_pool, GFP_KERNEL, &chap_dma);
-	if (chap_table == NULL) {
-		ret = -ENOMEM;
-		goto exit_get_chap;
-	}
+	if (chap_table == NULL)
+		return -ENOMEM;
 
 	chap_size = sizeof(struct ql4_chap_table);
 	memset(chap_table, 0, chap_size);
diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c
index 499a92db1cf6..71d3d234f526 100644
--- a/drivers/scsi/qla4xxx/ql4_nx.c
+++ b/drivers/scsi/qla4xxx/ql4_nx.c
@@ -2986,7 +2986,7 @@ int qla4_8xxx_load_risc(struct scsi_qla_host *ha)
 
 	retval = qla4_8xxx_device_state_handler(ha);
 
-	if (retval == QLA_SUCCESS && !test_bit(AF_INIT_DONE, &ha->flags))
+	if (retval == QLA_SUCCESS && !test_bit(AF_IRQ_ATTACHED, &ha->flags))
 		retval = qla4xxx_request_irqs(ha);
 
 	return retval;
@@ -3427,11 +3427,11 @@ int qla4_8xxx_get_sys_info(struct scsi_qla_host *ha)
 	}
 
 	/* Make sure we receive the minimum required data to cache internally */
-	if (mbox_sts[4] < offsetof(struct mbx_sys_info, reserved)) {
+	if ((is_qla8032(ha) ? mbox_sts[3] : mbox_sts[4]) <
+	    offsetof(struct mbx_sys_info, reserved)) {
 		DEBUG2(printk("scsi%ld: %s: GET_SYS_INFO data receive"
 		    " error (%x)\n", ha->host_no, __func__, mbox_sts[4]));
 		goto exit_validate_mac82;
-
 	}
 
 	/* Save M.A.C. address & serial_number */
@@ -3463,7 +3463,7 @@ exit_validate_mac82:
 
 /* Interrupt handling helpers. */
 
-int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha)
+int qla4_8xxx_intr_enable(struct scsi_qla_host *ha)
 {
 	uint32_t mbox_cmd[MBOX_REG_COUNT];
 	uint32_t mbox_sts[MBOX_REG_COUNT];
@@ -3484,7 +3484,7 @@ int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha)
 	return QLA_SUCCESS;
 }
 
-int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha)
+int qla4_8xxx_intr_disable(struct scsi_qla_host *ha)
 {
 	uint32_t mbox_cmd[MBOX_REG_COUNT];
 	uint32_t mbox_sts[MBOX_REG_COUNT];
@@ -3509,7 +3509,7 @@ int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha)
 void
 qla4_82xx_enable_intrs(struct scsi_qla_host *ha)
 {
-	qla4_8xxx_mbx_intr_enable(ha);
+	qla4_8xxx_intr_enable(ha);
 
 	spin_lock_irq(&ha->hardware_lock);
 	/* BIT 10 - reset */
@@ -3522,7 +3522,7 @@ void
 qla4_82xx_disable_intrs(struct scsi_qla_host *ha)
 {
 	if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags))
-		qla4_8xxx_mbx_intr_disable(ha);
+		qla4_8xxx_intr_disable(ha);
 
 	spin_lock_irq(&ha->hardware_lock);
 	/* BIT 10 - set */
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 4cec123a6a6a..6142729167f4 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1337,18 +1337,18 @@ static int qla4xxx_session_get_param(struct iscsi_cls_session *cls_sess,
 					      sess->password_in, BIDI_CHAP,
 					      &idx);
 		if (rval)
-			return -EINVAL;
-
-		len = sprintf(buf, "%hu\n", idx);
+			len = sprintf(buf, "\n");
+		else
+			len = sprintf(buf, "%hu\n", idx);
 		break;
 	case ISCSI_PARAM_CHAP_OUT_IDX:
 		rval = qla4xxx_get_chap_index(ha, sess->username,
 					      sess->password, LOCAL_CHAP,
 					      &idx);
 		if (rval)
-			return -EINVAL;
-
-		len = sprintf(buf, "%hu\n", idx);
+			len = sprintf(buf, "\n");
+		else
+			len = sprintf(buf, "%hu\n", idx);
 		break;
 	default:
 		return iscsi_session_get_param(cls_sess, param, buf);
@@ -2242,6 +2242,7 @@ static int qla4xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	    test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags) ||
 	    !test_bit(AF_ONLINE, &ha->flags) ||
 	    !test_bit(AF_LINK_UP, &ha->flags) ||
+	    test_bit(AF_LOOPBACK, &ha->flags) ||
 	    test_bit(DPC_RESET_HA_FW_CONTEXT, &ha->dpc_flags))
 		goto qc_host_busy;
 
@@ -2978,6 +2979,7 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha)
 		if (status == QLA_SUCCESS) {
 			if (!test_bit(AF_FW_RECOVERY, &ha->flags))
 				qla4xxx_cmd_wait(ha);
+
 			ha->isp_ops->disable_intrs(ha);
 			qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
 			qla4xxx_abort_active_cmds(ha, DID_RESET << 16);
@@ -3479,7 +3481,8 @@ dpc_post_reset_ha:
 	}
 
 	/* ---- link change? --- */
-	if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) {
+	if (!test_bit(AF_LOOPBACK, &ha->flags) &&
+	    test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) {
 		if (!test_bit(AF_LINK_UP, &ha->flags)) {
 			/* ---- link down? --- */
 			qla4xxx_mark_all_devices_missing(ha);
@@ -3508,10 +3511,8 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha)
 {
 	qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16);
 
-	if (test_bit(AF_INTERRUPTS_ON, &ha->flags)) {
-		/* Turn-off interrupts on the card. */
-		ha->isp_ops->disable_intrs(ha);
-	}
+	/* Turn-off interrupts on the card. */
+	ha->isp_ops->disable_intrs(ha);
 
 	if (is_qla40XX(ha)) {
 		writel(set_rmask(CSR_SCSI_PROCESSOR_INTR),
@@ -3547,8 +3548,7 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha)
 	}
 
 	/* Detach interrupts */
-	if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags))
-		qla4xxx_free_irqs(ha);
+	qla4xxx_free_irqs(ha);
 
 	/* free extra memory */
 	qla4xxx_mem_free(ha);
@@ -4687,7 +4687,8 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha,
 	struct iscsi_endpoint *ep;
 	struct sockaddr_in *addr;
 	struct sockaddr_in6 *addr6;
-	struct sockaddr *dst_addr;
+	struct sockaddr *t_addr;
+	struct sockaddr_storage *dst_addr;
 	char *ip;
 
 	/* TODO: need to destroy on unload iscsi_endpoint*/
@@ -4696,21 +4697,23 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha,
 		return NULL;
 
 	if (fw_ddb_entry->options & DDB_OPT_IPV6_DEVICE) {
-		dst_addr->sa_family = AF_INET6;
+		t_addr = (struct sockaddr *)dst_addr;
+		t_addr->sa_family = AF_INET6;
 		addr6 = (struct sockaddr_in6 *)dst_addr;
 		ip = (char *)&addr6->sin6_addr;
 		memcpy(ip, fw_ddb_entry->ip_addr, IPv6_ADDR_LEN);
 		addr6->sin6_port = htons(le16_to_cpu(fw_ddb_entry->port));
 
 	} else {
-		dst_addr->sa_family = AF_INET;
+		t_addr = (struct sockaddr *)dst_addr;
+		t_addr->sa_family = AF_INET;
 		addr = (struct sockaddr_in *)dst_addr;
 		ip = (char *)&addr->sin_addr;
 		memcpy(ip, fw_ddb_entry->ip_addr, IP_ADDR_LEN);
 		addr->sin_port = htons(le16_to_cpu(fw_ddb_entry->port));
 	}
 
-	ep = qla4xxx_ep_connect(ha->host, dst_addr, 0);
+	ep = qla4xxx_ep_connect(ha->host, (struct sockaddr *)dst_addr, 0);
 	vfree(dst_addr);
 	return ep;
 }
@@ -4725,7 +4728,8 @@ static int qla4xxx_verify_boot_idx(struct scsi_qla_host *ha, uint16_t idx)
 }
 
 static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha,
-					  struct ddb_entry *ddb_entry)
+					  struct ddb_entry *ddb_entry,
+					  uint16_t idx)
 {
 	uint16_t def_timeout;
 
@@ -4745,6 +4749,10 @@ static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha,
 		def_timeout : LOGIN_TOV;
 	ddb_entry->default_time2wait =
 		le16_to_cpu(ddb_entry->fw_ddb_entry.iscsi_def_time2wait);
+
+	if (ql4xdisablesysfsboot &&
+	    (idx == ha->pri_ddb_idx || idx == ha->sec_ddb_idx))
+		set_bit(DF_BOOT_TGT, &ddb_entry->flags);
 }
 
 static void qla4xxx_wait_for_ip_configuration(struct scsi_qla_host *ha)
@@ -4881,7 +4889,7 @@ static void qla4xxx_remove_failed_ddb(struct scsi_qla_host *ha,
 
 static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha,
 				   struct dev_db_entry *fw_ddb_entry,
-				   int is_reset)
+				   int is_reset, uint16_t idx)
 {
 	struct iscsi_cls_session *cls_sess;
 	struct iscsi_session *sess;
@@ -4919,7 +4927,7 @@ static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha,
 	memcpy(&ddb_entry->fw_ddb_entry, fw_ddb_entry,
 	       sizeof(struct dev_db_entry));
 
-	qla4xxx_setup_flash_ddb_entry(ha, ddb_entry);
+	qla4xxx_setup_flash_ddb_entry(ha, ddb_entry, idx);
 
 	cls_conn = iscsi_conn_setup(cls_sess, sizeof(struct qla_conn), conn_id);
 
@@ -5036,7 +5044,7 @@ static void qla4xxx_build_nt_list(struct scsi_qla_host *ha,
 				goto continue_next_nt;
 		}
 
-		ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset);
+		ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset, idx);
 		if (ret == QLA_ERROR)
 			goto exit_nt_list;
 
@@ -5116,6 +5124,78 @@ void qla4xxx_build_ddb_list(struct scsi_qla_host *ha, int is_reset)
 }
 
 /**
+ * qla4xxx_wait_login_resp_boot_tgt -  Wait for iSCSI boot target login
+ * response.
+ * @ha: pointer to adapter structure
+ *
+ * When the boot entry is normal iSCSI target then DF_BOOT_TGT flag will be
+ * set in DDB and we will wait for login response of boot targets during
+ * probe.
+ **/
+static void qla4xxx_wait_login_resp_boot_tgt(struct scsi_qla_host *ha)
+{
+	struct ddb_entry *ddb_entry;
+	struct dev_db_entry *fw_ddb_entry = NULL;
+	dma_addr_t fw_ddb_entry_dma;
+	unsigned long wtime;
+	uint32_t ddb_state;
+	int max_ddbs, idx, ret;
+
+	max_ddbs =  is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX :
+				     MAX_DEV_DB_ENTRIES;
+
+	fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
+					  &fw_ddb_entry_dma, GFP_KERNEL);
+	if (!fw_ddb_entry) {
+		ql4_printk(KERN_ERR, ha,
+			   "%s: Unable to allocate dma buffer\n", __func__);
+		goto exit_login_resp;
+	}
+
+	wtime = jiffies + (HZ * BOOT_LOGIN_RESP_TOV);
+
+	for (idx = 0; idx < max_ddbs; idx++) {
+		ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx);
+		if (ddb_entry == NULL)
+			continue;
+
+		if (test_bit(DF_BOOT_TGT, &ddb_entry->flags)) {
+			DEBUG2(ql4_printk(KERN_INFO, ha,
+					  "%s: DDB index [%d]\n", __func__,
+					  ddb_entry->fw_ddb_index));
+			do {
+				ret = qla4xxx_get_fwddb_entry(ha,
+						ddb_entry->fw_ddb_index,
+						fw_ddb_entry, fw_ddb_entry_dma,
+						NULL, NULL, &ddb_state, NULL,
+						NULL, NULL);
+				if (ret == QLA_ERROR)
+					goto exit_login_resp;
+
+				if ((ddb_state == DDB_DS_SESSION_ACTIVE) ||
+				    (ddb_state == DDB_DS_SESSION_FAILED))
+					break;
+
+				schedule_timeout_uninterruptible(HZ);
+
+			} while ((time_after(wtime, jiffies)));
+
+			if (!time_after(wtime, jiffies)) {
+				DEBUG2(ql4_printk(KERN_INFO, ha,
+						  "%s: Login response wait timer expired\n",
+						  __func__));
+				 goto exit_login_resp;
+			}
+		}
+	}
+
+exit_login_resp:
+	if (fw_ddb_entry)
+		dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
+				  fw_ddb_entry, fw_ddb_entry_dma);
+}
+
+/**
  * qla4xxx_probe_adapter - callback function to probe HBA
  * @pdev: pointer to pci_dev structure
  * @pci_device_id: pointer to pci_device entry
@@ -5270,7 +5350,7 @@ static int qla4xxx_probe_adapter(struct pci_dev *pdev,
 		if (is_qla80XX(ha)) {
 			ha->isp_ops->idc_lock(ha);
 			dev_state = qla4_8xxx_rd_direct(ha,
-							QLA82XX_CRB_DEV_STATE);
+							QLA8XXX_CRB_DEV_STATE);
 			ha->isp_ops->idc_unlock(ha);
 			if (dev_state == QLA8XXX_DEV_FAILED) {
 				ql4_printk(KERN_WARNING, ha, "%s: don't retry "
@@ -5368,6 +5448,7 @@ skip_retry_init:
 		/* Perform the build ddb list and login to each */
 	qla4xxx_build_ddb_list(ha, INIT_ADAPTER);
 	iscsi_host_for_each_session(ha->host, qla4xxx_login_flash_ddb);
+	qla4xxx_wait_login_resp_boot_tgt(ha);
 
 	qla4xxx_create_chap_list(ha);
 
@@ -6008,14 +6089,6 @@ static int qla4xxx_host_reset(struct Scsi_Host *shost, int reset_type)
 		goto exit_host_reset;
 	}
 
-	rval = qla4xxx_wait_for_hba_online(ha);
-	if (rval != QLA_SUCCESS) {
-		DEBUG2(ql4_printk(KERN_INFO, ha, "%s: Unable to reset host "
-				  "adapter\n", __func__));
-		rval = -EIO;
-		goto exit_host_reset;
-	}
-
 	if (test_bit(DPC_RESET_HA, &ha->dpc_flags))
 		goto recover_adapter;
 
@@ -6115,7 +6188,6 @@ qla4xxx_pci_mmio_enabled(struct pci_dev *pdev)
 static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
 {
 	uint32_t rval = QLA_ERROR;
-	uint32_t ret = 0;
 	int fn;
 	struct pci_dev *other_pdev = NULL;
 
@@ -6201,16 +6273,7 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
 			qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DRV_STATE, 0);
 			qla4_8xxx_set_drv_active(ha);
 			ha->isp_ops->idc_unlock(ha);
-			ret = qla4xxx_request_irqs(ha);
-			if (ret) {
-				ql4_printk(KERN_WARNING, ha, "Failed to "
-				    "reserve interrupt %d already in use.\n",
-				    ha->pdev->irq);
-				rval = QLA_ERROR;
-			} else {
-				ha->isp_ops->enable_intrs(ha);
-				rval = QLA_SUCCESS;
-			}
+			ha->isp_ops->enable_intrs(ha);
 		}
 	} else {
 		ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not "
@@ -6220,18 +6283,9 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
 		     QLA8XXX_DEV_READY)) {
 			clear_bit(AF_FW_RECOVERY, &ha->flags);
 			rval = qla4xxx_initialize_adapter(ha, RESET_ADAPTER);
-			if (rval == QLA_SUCCESS) {
-				ret = qla4xxx_request_irqs(ha);
-				if (ret) {
-					ql4_printk(KERN_WARNING, ha, "Failed to"
-					    " reserve interrupt %d already in"
-					    " use.\n", ha->pdev->irq);
-					rval = QLA_ERROR;
-				} else {
-					ha->isp_ops->enable_intrs(ha);
-					rval = QLA_SUCCESS;
-				}
-			}
+			if (rval == QLA_SUCCESS)
+				ha->isp_ops->enable_intrs(ha);
+
 			ha->isp_ops->idc_lock(ha);
 			qla4_8xxx_set_drv_active(ha);
 			ha->isp_ops->idc_unlock(ha);
diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h
index f6df2ea91ab5..6775a45af315 100644
--- a/drivers/scsi/qla4xxx/ql4_version.h
+++ b/drivers/scsi/qla4xxx/ql4_version.h
@@ -5,4 +5,4 @@
  * See LICENSE.qla4xxx for copyright and licensing details.
  */
 
-#define QLA4XXX_DRIVER_VERSION	"5.03.00-k1"
+#define QLA4XXX_DRIVER_VERSION	"5.03.00-k4"
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 59d427bf08e2..0a74b975efdf 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -2503,6 +2503,15 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr,
 }
 static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator,
 			NULL);
+static ssize_t
+show_priv_session_target_id(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
+	return sprintf(buf, "%d\n", session->target_id);
+}
+static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO,
+			show_priv_session_target_id, NULL);
 
 #define iscsi_priv_session_attr_show(field, format)			\
 static ssize_t								\
@@ -2575,6 +2584,7 @@ static struct attribute *iscsi_session_attrs[] = {
 	&dev_attr_priv_sess_creator.attr,
 	&dev_attr_sess_chap_out_idx.attr,
 	&dev_attr_sess_chap_in_idx.attr,
+	&dev_attr_priv_sess_target_id.attr,
 	NULL,
 };
 
@@ -2638,6 +2648,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj,
 		return S_IRUGO;
 	else if (attr == &dev_attr_priv_sess_creator.attr)
 		return S_IRUGO;
+	else if (attr == &dev_attr_priv_sess_target_id.attr)
+		return S_IRUGO;
 	else {
 		WARN_ONCE(1, "Invalid session attr");
 		return 0;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index be2c9a6561ff..9f0c46547459 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1391,24 +1391,23 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	if (!idr_pre_get(&sg_index_idr, GFP_KERNEL)) {
-		printk(KERN_WARNING "idr expansion Sg_device failure\n");
-		error = -ENOMEM;
-		goto out;
-	}
-
+	idr_preload(GFP_KERNEL);
 	write_lock_irqsave(&sg_index_lock, iflags);
 
-	error = idr_get_new(&sg_index_idr, sdp, &k);
-	if (error) {
-		write_unlock_irqrestore(&sg_index_lock, iflags);
-		printk(KERN_WARNING "idr allocation Sg_device failure: %d\n",
-		       error);
-		goto out;
+	error = idr_alloc(&sg_index_idr, sdp, 0, SG_MAX_DEVS, GFP_NOWAIT);
+	if (error < 0) {
+		if (error == -ENOSPC) {
+			sdev_printk(KERN_WARNING, scsidp,
+				    "Unable to attach sg device type=%d, minor number exceeds %d\n",
+				    scsidp->type, SG_MAX_DEVS - 1);
+			error = -ENODEV;
+		} else {
+			printk(KERN_WARNING
+			       "idr allocation Sg_device failure: %d\n", error);
+		}
+		goto out_unlock;
 	}
-
-	if (unlikely(k >= SG_MAX_DEVS))
-		goto overflow;
+	k = error;
 
 	SCSI_LOG_TIMEOUT(3, printk("sg_alloc: dev=%d \n", k));
 	sprintf(disk->disk_name, "sg%d", k);
@@ -1420,25 +1419,17 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
 	sdp->sg_tablesize = queue_max_segments(q);
 	sdp->index = k;
 	kref_init(&sdp->d_ref);
+	error = 0;
 
+out_unlock:
 	write_unlock_irqrestore(&sg_index_lock, iflags);
+	idr_preload_end();
 
-	error = 0;
- out:
 	if (error) {
 		kfree(sdp);
 		return ERR_PTR(error);
 	}
 	return sdp;
-
- overflow:
-	idr_remove(&sg_index_idr, k);
-	write_unlock_irqrestore(&sg_index_lock, iflags);
-	sdev_printk(KERN_WARNING, scsidp,
-		    "Unable to attach sg device type=%d, minor "
-		    "number exceeds %d\n", scsidp->type, SG_MAX_DEVS - 1);
-	error = -ENODEV;
-	goto out;
 }
 
 static int
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 3e2b3717cb5c..86974471af68 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4076,7 +4076,7 @@ static int st_probe(struct device *dev)
 	struct st_modedef *STm;
 	struct st_partstat *STps;
 	struct st_buffer *buffer;
-	int i, dev_num, error;
+	int i, error;
 	char *stp;
 
 	if (SDp->type != TYPE_TAPE)
@@ -4178,27 +4178,17 @@ static int st_probe(struct device *dev)
 	    tpnt->blksize_changed = 0;
 	mutex_init(&tpnt->lock);
 
-	if (!idr_pre_get(&st_index_idr, GFP_KERNEL)) {
-		pr_warn("st: idr expansion failed\n");
-		error = -ENOMEM;
-		goto out_put_disk;
-	}
-
+	idr_preload(GFP_KERNEL);
 	spin_lock(&st_index_lock);
-	error = idr_get_new(&st_index_idr, tpnt, &dev_num);
+	error = idr_alloc(&st_index_idr, tpnt, 0, ST_MAX_TAPES + 1, GFP_NOWAIT);
 	spin_unlock(&st_index_lock);
-	if (error) {
+	idr_preload_end();
+	if (error < 0) {
 		pr_warn("st: idr allocation failed: %d\n", error);
 		goto out_put_disk;
 	}
-
-	if (dev_num > ST_MAX_TAPES) {
-		pr_err("st: Too many tape devices (max. %d).\n", ST_MAX_TAPES);
-		goto out_put_index;
-	}
-
-	tpnt->index = dev_num;
-	sprintf(disk->disk_name, "st%d", dev_num);
+	tpnt->index = error;
+	sprintf(disk->disk_name, "st%d", tpnt->index);
 
 	dev_set_drvdata(dev, tpnt);
 
@@ -4218,9 +4208,8 @@ static int st_probe(struct device *dev)
 
 out_remove_devs:
 	remove_cdevs(tpnt);
-out_put_index:
 	spin_lock(&st_index_lock);
-	idr_remove(&st_index_idr, dev_num);
+	idr_remove(&st_index_idr, tpnt->index);
 	spin_unlock(&st_index_lock);
 out_put_disk:
 	put_disk(disk);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 270b3cf6f372..16a3a0cc9672 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -201,6 +201,7 @@ enum storvsc_request_type {
 #define SRB_STATUS_AUTOSENSE_VALID	0x80
 #define SRB_STATUS_INVALID_LUN	0x20
 #define SRB_STATUS_SUCCESS	0x01
+#define SRB_STATUS_ABORTED	0x02
 #define SRB_STATUS_ERROR	0x04
 
 /*
@@ -295,6 +296,25 @@ struct storvsc_scan_work {
 	uint lun;
 };
 
+static void storvsc_device_scan(struct work_struct *work)
+{
+	struct storvsc_scan_work *wrk;
+	uint lun;
+	struct scsi_device *sdev;
+
+	wrk = container_of(work, struct storvsc_scan_work, work);
+	lun = wrk->lun;
+
+	sdev = scsi_device_lookup(wrk->host, 0, 0, lun);
+	if (!sdev)
+		goto done;
+	scsi_rescan_device(&sdev->sdev_gendev);
+	scsi_device_put(sdev);
+
+done:
+	kfree(wrk);
+}
+
 static void storvsc_bus_scan(struct work_struct *work)
 {
 	struct storvsc_scan_work *wrk;
@@ -467,6 +487,7 @@ static struct scatterlist *create_bounce_buffer(struct scatterlist *sgl,
 	if (!bounce_sgl)
 		return NULL;
 
+	sg_init_table(bounce_sgl, num_pages);
 	for (i = 0; i < num_pages; i++) {
 		page_buf = alloc_page(GFP_ATOMIC);
 		if (!page_buf)
@@ -760,6 +781,66 @@ cleanup:
 	return ret;
 }
 
+static void storvsc_handle_error(struct vmscsi_request *vm_srb,
+				struct scsi_cmnd *scmnd,
+				struct Scsi_Host *host,
+				u8 asc, u8 ascq)
+{
+	struct storvsc_scan_work *wrk;
+	void (*process_err_fn)(struct work_struct *work);
+	bool do_work = false;
+
+	switch (vm_srb->srb_status) {
+	case SRB_STATUS_ERROR:
+		/*
+		 * If there is an error; offline the device since all
+		 * error recovery strategies would have already been
+		 * deployed on the host side. However, if the command
+		 * were a pass-through command deal with it appropriately.
+		 */
+		switch (scmnd->cmnd[0]) {
+		case ATA_16:
+		case ATA_12:
+			set_host_byte(scmnd, DID_PASSTHROUGH);
+			break;
+		default:
+			set_host_byte(scmnd, DID_TARGET_FAILURE);
+		}
+		break;
+	case SRB_STATUS_INVALID_LUN:
+		do_work = true;
+		process_err_fn = storvsc_remove_lun;
+		break;
+	case (SRB_STATUS_ABORTED | SRB_STATUS_AUTOSENSE_VALID):
+		if ((asc == 0x2a) && (ascq == 0x9)) {
+			do_work = true;
+			process_err_fn = storvsc_device_scan;
+			/*
+			 * Retry the I/O that trigerred this.
+			 */
+			set_host_byte(scmnd, DID_REQUEUE);
+		}
+		break;
+	}
+
+	if (!do_work)
+		return;
+
+	/*
+	 * We need to schedule work to process this error; schedule it.
+	 */
+	wrk = kmalloc(sizeof(struct storvsc_scan_work), GFP_ATOMIC);
+	if (!wrk) {
+		set_host_byte(scmnd, DID_TARGET_FAILURE);
+		return;
+	}
+
+	wrk->host = host;
+	wrk->lun = vm_srb->lun;
+	INIT_WORK(&wrk->work, process_err_fn);
+	schedule_work(&wrk->work);
+}
+
 
 static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request)
 {
@@ -768,8 +849,13 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request)
 	void (*scsi_done_fn)(struct scsi_cmnd *);
 	struct scsi_sense_hdr sense_hdr;
 	struct vmscsi_request *vm_srb;
-	struct storvsc_scan_work *wrk;
 	struct stor_mem_pools *memp = scmnd->device->hostdata;
+	struct Scsi_Host *host;
+	struct storvsc_device *stor_dev;
+	struct hv_device *dev = host_dev->dev;
+
+	stor_dev = get_in_stor_device(dev);
+	host = stor_dev->host;
 
 	vm_srb = &cmd_request->vstor_packet.vm_srb;
 	if (cmd_request->bounce_sgl_count) {
@@ -782,55 +868,18 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request)
 					cmd_request->bounce_sgl_count);
 	}
 
-	/*
-	 * If there is an error; offline the device since all
-	 * error recovery strategies would have already been
-	 * deployed on the host side. However, if the command
-	 * were a pass-through command deal with it appropriately.
-	 */
 	scmnd->result = vm_srb->scsi_status;
 
-	if (vm_srb->srb_status == SRB_STATUS_ERROR) {
-		switch (scmnd->cmnd[0]) {
-		case ATA_16:
-		case ATA_12:
-			set_host_byte(scmnd, DID_PASSTHROUGH);
-			break;
-		default:
-			set_host_byte(scmnd, DID_TARGET_FAILURE);
-		}
-	}
-
-
-	/*
-	 * If the LUN is invalid; remove the device.
-	 */
-	if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) {
-		struct storvsc_device *stor_dev;
-		struct hv_device *dev = host_dev->dev;
-		struct Scsi_Host *host;
-
-		stor_dev = get_in_stor_device(dev);
-		host = stor_dev->host;
-
-		wrk = kmalloc(sizeof(struct storvsc_scan_work),
-				GFP_ATOMIC);
-		if (!wrk) {
-			scmnd->result = DID_TARGET_FAILURE << 16;
-		} else {
-			wrk->host = host;
-			wrk->lun = vm_srb->lun;
-			INIT_WORK(&wrk->work, storvsc_remove_lun);
-			schedule_work(&wrk->work);
-		}
-	}
-
 	if (scmnd->result) {
 		if (scsi_normalize_sense(scmnd->sense_buffer,
 				SCSI_SENSE_BUFFERSIZE, &sense_hdr))
 			scsi_print_sense_hdr("storvsc", &sense_hdr);
 	}
 
+	if (vm_srb->srb_status != SRB_STATUS_SUCCESS)
+		storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc,
+					 sense_hdr.ascq);
+
 	scsi_set_resid(scmnd,
 		cmd_request->data_buffer.len -
 		vm_srb->data_transfer_length);
@@ -1155,6 +1204,8 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
 
 	blk_queue_bounce_limit(sdevice->request_queue, BLK_BOUNCE_ANY);
 
+	sdevice->no_write_same = 1;
+
 	return 0;
 }
 
@@ -1237,6 +1288,8 @@ static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd)
 	u8 scsi_op = scmnd->cmnd[0];
 
 	switch (scsi_op) {
+	/* the host does not handle WRITE_SAME, log accident usage */
+	case WRITE_SAME:
 	/*
 	 * smartd sends this command and the host does not handle
 	 * this. So, don't send it.
diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
index 8f27f9d6f91d..0371047c5922 100644
--- a/drivers/scsi/ufs/Kconfig
+++ b/drivers/scsi/ufs/Kconfig
@@ -2,48 +2,58 @@
 # Kernel configuration file for the UFS Host Controller
 #
 # This code is based on drivers/scsi/ufs/Kconfig
-# Copyright (C) 2011  Samsung Samsung India Software Operations
+# Copyright (C) 2011-2013 Samsung India Software Operations
+#
+# Authors:
+#	Santosh Yaraganavi <santosh.sy@samsung.com>
+#	Vinayak Holikatti <h.vinayak@samsung.com>
 #
-# Santosh Yaraganavi <santosh.sy@samsung.com>
-# Vinayak Holikatti <h.vinayak@samsung.com>
-
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
-
+# See the COPYING file in the top-level directory or visit
+# <http://www.gnu.org/licenses/gpl-2.0.html>
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
+# This program is provided "AS IS" and "WITH ALL FAULTS" and
+# without warranty of any kind. You are solely responsible for
+# determining the appropriateness of using and distributing
+# the program and assume all risks associated with your exercise
+# of rights with respect to the program, including but not limited
+# to infringement of third party rights, the risks and costs of
+# program errors, damage to or loss of data, programs or equipment,
+# and unavailability or interruption of operations. Under no
+# circumstances will the contributor of this Program be liable for
+# any damages of any kind arising from your use or distribution of
+# this program.
 
-# NO WARRANTY
-# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
-# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
-# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
-# solely responsible for determining the appropriateness of using and
-# distributing the Program and assumes all risks associated with its
-# exercise of rights under this Agreement, including but not limited to
-# the risks and costs of program errors, damage to or loss of data,
-# programs or equipment, and unavailability or interruption of operations.
-
-# DISCLAIMER OF LIABILITY
-# NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
-# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
-# USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
-# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
+config SCSI_UFSHCD
+	tristate "Universal Flash Storage Controller Driver Core"
+	depends on SCSI
+	---help---
+	This selects the support for UFS devices in Linux, say Y and make
+	  sure that you know the name of your UFS host adapter (the card
+	  inside your computer that "speaks" the UFS protocol, also
+	  called UFS Host Controller), because you will be asked for it.
+	  The module will be called ufshcd.
 
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
-# USA.
+	  To compile this driver as a module, choose M here and read
+	  <file:Documentation/scsi/ufs.txt>.
+	  However, do not compile this as a module if your root file system
+	  (the one containing the directory /) is located on a UFS device.
 
-config SCSI_UFSHCD
-	tristate "Universal Flash Storage host controller driver"
-	depends on PCI && SCSI
+config SCSI_UFSHCD_PCI
+	tristate "PCI bus based UFS Controller support"
+	depends on SCSI_UFSHCD && PCI
 	---help---
-	This is a generic driver which supports PCIe UFS Host controllers.
+	This selects the PCI UFS Host Controller Interface. Select this if
+	you have UFS Host Controller with PCI Interface.
+
+	  If you have a controller with this interface, say Y or M here.
+
+	  If unsure, say N.
diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile
index adf7895a6a91..9eda0dfbd6df 100644
--- a/drivers/scsi/ufs/Makefile
+++ b/drivers/scsi/ufs/Makefile
@@ -1,2 +1,3 @@
 # UFSHCD makefile
 obj-$(CONFIG_SCSI_UFSHCD) += ufshcd.o
+obj-$(CONFIG_SCSI_UFSHCD_PCI) += ufshcd-pci.o
diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index b207529f8d54..139bc0647b41 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -2,45 +2,35 @@
  * Universal Flash Storage Host controller driver
  *
  * This code is based on drivers/scsi/ufs/ufs.h
- * Copyright (C) 2011-2012 Samsung India Software Operations
+ * Copyright (C) 2011-2013 Samsung India Software Operations
  *
- * Santosh Yaraganavi <santosh.sy@samsung.com>
- * Vinayak Holikatti <h.vinayak@samsung.com>
+ * Authors:
+ *	Santosh Yaraganavi <santosh.sy@samsung.com>
+ *	Vinayak Holikatti <h.vinayak@samsung.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version 2
  * of the License, or (at your option) any later version.
+ * See the COPYING file in the top-level directory or visit
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * NO WARRANTY
- * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
- * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
- * solely responsible for determining the appropriateness of using and
- * distributing the Program and assumes all risks associated with its
- * exercise of rights under this Agreement, including but not limited to
- * the risks and costs of program errors, damage to or loss of data,
- * programs or equipment, and unavailability or interruption of operations.
-
- * DISCLAIMER OF LIABILITY
- * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
- * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
- * USA.
+ * This program is provided "AS IS" and "WITH ALL FAULTS" and
+ * without warranty of any kind. You are solely responsible for
+ * determining the appropriateness of using and distributing
+ * the program and assume all risks associated with your exercise
+ * of rights with respect to the program, including but not limited
+ * to infringement of third party rights, the risks and costs of
+ * program errors, damage to or loss of data, programs or equipment,
+ * and unavailability or interruption of operations. Under no
+ * circumstances will the contributor of this Program be liable for
+ * any damages of any kind arising from your use or distribution of
+ * this program.
  */
 
 #ifndef _UFS_H
diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
new file mode 100644
index 000000000000..5cb1d75f5868
--- /dev/null
+++ b/drivers/scsi/ufs/ufshcd-pci.c
@@ -0,0 +1,211 @@
+/*
+ * Universal Flash Storage Host controller PCI glue driver
+ *
+ * This code is based on drivers/scsi/ufs/ufshcd-pci.c
+ * Copyright (C) 2011-2013 Samsung India Software Operations
+ *
+ * Authors:
+ *	Santosh Yaraganavi <santosh.sy@samsung.com>
+ *	Vinayak Holikatti <h.vinayak@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * See the COPYING file in the top-level directory or visit
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This program is provided "AS IS" and "WITH ALL FAULTS" and
+ * without warranty of any kind. You are solely responsible for
+ * determining the appropriateness of using and distributing
+ * the program and assume all risks associated with your exercise
+ * of rights with respect to the program, including but not limited
+ * to infringement of third party rights, the risks and costs of
+ * program errors, damage to or loss of data, programs or equipment,
+ * and unavailability or interruption of operations. Under no
+ * circumstances will the contributor of this Program be liable for
+ * any damages of any kind arising from your use or distribution of
+ * this program.
+ */
+
+#include "ufshcd.h"
+#include <linux/pci.h>
+
+#ifdef CONFIG_PM
+/**
+ * ufshcd_pci_suspend - suspend power management function
+ * @pdev: pointer to PCI device handle
+ * @state: power state
+ *
+ * Returns -ENOSYS
+ */
+static int ufshcd_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	/*
+	 * TODO:
+	 * 1. Call ufshcd_suspend
+	 * 2. Do bus specific power management
+	 */
+
+	return -ENOSYS;
+}
+
+/**
+ * ufshcd_pci_resume - resume power management function
+ * @pdev: pointer to PCI device handle
+ *
+ * Returns -ENOSYS
+ */
+static int ufshcd_pci_resume(struct pci_dev *pdev)
+{
+	/*
+	 * TODO:
+	 * 1. Call ufshcd_resume.
+	 * 2. Do bus specific wake up
+	 */
+
+	return -ENOSYS;
+}
+#endif /* CONFIG_PM */
+
+/**
+ * ufshcd_pci_shutdown - main function to put the controller in reset state
+ * @pdev: pointer to PCI device handle
+ */
+static void ufshcd_pci_shutdown(struct pci_dev *pdev)
+{
+	ufshcd_hba_stop((struct ufs_hba *)pci_get_drvdata(pdev));
+}
+
+/**
+ * ufshcd_pci_remove - de-allocate PCI/SCSI host and host memory space
+ *		data structure memory
+ * @pdev - pointer to PCI handle
+ */
+static void ufshcd_pci_remove(struct pci_dev *pdev)
+{
+	struct ufs_hba *hba = pci_get_drvdata(pdev);
+
+	disable_irq(pdev->irq);
+	free_irq(pdev->irq, hba);
+	ufshcd_remove(hba);
+	pci_release_regions(pdev);
+	pci_set_drvdata(pdev, NULL);
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+}
+
+/**
+ * ufshcd_set_dma_mask - Set dma mask based on the controller
+ *			 addressing capability
+ * @pdev: PCI device structure
+ *
+ * Returns 0 for success, non-zero for failure
+ */
+static int ufshcd_set_dma_mask(struct pci_dev *pdev)
+{
+	int err;
+
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
+		&& !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
+		return 0;
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (!err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	return err;
+}
+
+/**
+ * ufshcd_pci_probe - probe routine of the driver
+ * @pdev: pointer to PCI device handle
+ * @id: PCI device id
+ *
+ * Returns 0 on success, non-zero value on failure
+ */
+static int
+ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct ufs_hba *hba;
+	void __iomem *mmio_base;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "pci_enable_device failed\n");
+		goto out_error;
+	}
+
+	pci_set_master(pdev);
+
+
+	err = pci_request_regions(pdev, UFSHCD);
+	if (err < 0) {
+		dev_err(&pdev->dev, "request regions failed\n");
+		goto out_disable;
+	}
+
+	mmio_base = pci_ioremap_bar(pdev, 0);
+	if (!mmio_base) {
+		dev_err(&pdev->dev, "memory map failed\n");
+		err = -ENOMEM;
+		goto out_release_regions;
+	}
+
+	err = ufshcd_set_dma_mask(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "set dma mask failed\n");
+		goto out_iounmap;
+	}
+
+	err = ufshcd_init(&pdev->dev, &hba, mmio_base, pdev->irq);
+	if (err) {
+		dev_err(&pdev->dev, "Initialization failed\n");
+		goto out_iounmap;
+	}
+
+	pci_set_drvdata(pdev, hba);
+
+	return 0;
+
+out_iounmap:
+	iounmap(mmio_base);
+out_release_regions:
+	pci_release_regions(pdev);
+out_disable:
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+out_error:
+	return err;
+}
+
+static DEFINE_PCI_DEVICE_TABLE(ufshcd_pci_tbl) = {
+	{ PCI_VENDOR_ID_SAMSUNG, 0xC00C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+	{ }	/* terminate list */
+};
+
+MODULE_DEVICE_TABLE(pci, ufshcd_pci_tbl);
+
+static struct pci_driver ufshcd_pci_driver = {
+	.name = UFSHCD,
+	.id_table = ufshcd_pci_tbl,
+	.probe = ufshcd_pci_probe,
+	.remove = ufshcd_pci_remove,
+	.shutdown = ufshcd_pci_shutdown,
+#ifdef CONFIG_PM
+	.suspend = ufshcd_pci_suspend,
+	.resume = ufshcd_pci_resume,
+#endif
+};
+
+module_pci_driver(ufshcd_pci_driver);
+
+MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>");
+MODULE_AUTHOR("Vinayak Holikatti <h.vinayak@samsung.com>");
+MODULE_DESCRIPTION("UFS host controller PCI glue driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(UFSHCD_DRIVER_VERSION);
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 91a4046ca9ba..60fd40c4e4c2 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1,77 +1,39 @@
 /*
- * Universal Flash Storage Host controller driver
+ * Universal Flash Storage Host controller driver Core
  *
  * This code is based on drivers/scsi/ufs/ufshcd.c
- * Copyright (C) 2011-2012 Samsung India Software Operations
+ * Copyright (C) 2011-2013 Samsung India Software Operations
  *
- * Santosh Yaraganavi <santosh.sy@samsung.com>
- * Vinayak Holikatti <h.vinayak@samsung.com>
+ * Authors:
+ *	Santosh Yaraganavi <santosh.sy@samsung.com>
+ *	Vinayak Holikatti <h.vinayak@samsung.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version 2
  * of the License, or (at your option) any later version.
+ * See the COPYING file in the top-level directory or visit
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * NO WARRANTY
- * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
- * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
- * solely responsible for determining the appropriateness of using and
- * distributing the Program and assumes all risks associated with its
- * exercise of rights under this Agreement, including but not limited to
- * the risks and costs of program errors, damage to or loss of data,
- * programs or equipment, and unavailability or interruption of operations.
-
- * DISCLAIMER OF LIABILITY
- * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
- * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
- * USA.
+ * This program is provided "AS IS" and "WITH ALL FAULTS" and
+ * without warranty of any kind. You are solely responsible for
+ * determining the appropriateness of using and distributing
+ * the program and assume all risks associated with your exercise
+ * of rights with respect to the program, including but not limited
+ * to infringement of third party rights, the risks and costs of
+ * program errors, damage to or loss of data, programs or equipment,
+ * and unavailability or interruption of operations. Under no
+ * circumstances will the contributor of this Program be liable for
+ * any damages of any kind arising from your use or distribution of
+ * this program.
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/workqueue.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/wait.h>
-#include <linux/bitops.h>
-
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_host.h>
-#include <scsi/scsi_tcq.h>
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_eh.h>
-
-#include "ufs.h"
-#include "ufshci.h"
-
-#define UFSHCD "ufshcd"
-#define UFSHCD_DRIVER_VERSION "0.1"
+#include "ufshcd.h"
 
 enum {
 	UFSHCD_MAX_CHANNEL	= 0,
@@ -102,121 +64,6 @@ enum {
 };
 
 /**
- * struct uic_command - UIC command structure
- * @command: UIC command
- * @argument1: UIC command argument 1
- * @argument2: UIC command argument 2
- * @argument3: UIC command argument 3
- * @cmd_active: Indicate if UIC command is outstanding
- * @result: UIC command result
- */
-struct uic_command {
-	u32 command;
-	u32 argument1;
-	u32 argument2;
-	u32 argument3;
-	int cmd_active;
-	int result;
-};
-
-/**
- * struct ufs_hba - per adapter private structure
- * @mmio_base: UFSHCI base register address
- * @ucdl_base_addr: UFS Command Descriptor base address
- * @utrdl_base_addr: UTP Transfer Request Descriptor base address
- * @utmrdl_base_addr: UTP Task Management Descriptor base address
- * @ucdl_dma_addr: UFS Command Descriptor DMA address
- * @utrdl_dma_addr: UTRDL DMA address
- * @utmrdl_dma_addr: UTMRDL DMA address
- * @host: Scsi_Host instance of the driver
- * @pdev: PCI device handle
- * @lrb: local reference block
- * @outstanding_tasks: Bits representing outstanding task requests
- * @outstanding_reqs: Bits representing outstanding transfer requests
- * @capabilities: UFS Controller Capabilities
- * @nutrs: Transfer Request Queue depth supported by controller
- * @nutmrs: Task Management Queue depth supported by controller
- * @active_uic_cmd: handle of active UIC command
- * @ufshcd_tm_wait_queue: wait queue for task management
- * @tm_condition: condition variable for task management
- * @ufshcd_state: UFSHCD states
- * @int_enable_mask: Interrupt Mask Bits
- * @uic_workq: Work queue for UIC completion handling
- * @feh_workq: Work queue for fatal controller error handling
- * @errors: HBA errors
- */
-struct ufs_hba {
-	void __iomem *mmio_base;
-
-	/* Virtual memory reference */
-	struct utp_transfer_cmd_desc *ucdl_base_addr;
-	struct utp_transfer_req_desc *utrdl_base_addr;
-	struct utp_task_req_desc *utmrdl_base_addr;
-
-	/* DMA memory reference */
-	dma_addr_t ucdl_dma_addr;
-	dma_addr_t utrdl_dma_addr;
-	dma_addr_t utmrdl_dma_addr;
-
-	struct Scsi_Host *host;
-	struct pci_dev *pdev;
-
-	struct ufshcd_lrb *lrb;
-
-	unsigned long outstanding_tasks;
-	unsigned long outstanding_reqs;
-
-	u32 capabilities;
-	int nutrs;
-	int nutmrs;
-	u32 ufs_version;
-
-	struct uic_command active_uic_cmd;
-	wait_queue_head_t ufshcd_tm_wait_queue;
-	unsigned long tm_condition;
-
-	u32 ufshcd_state;
-	u32 int_enable_mask;
-
-	/* Work Queues */
-	struct work_struct uic_workq;
-	struct work_struct feh_workq;
-
-	/* HBA Errors */
-	u32 errors;
-};
-
-/**
- * struct ufshcd_lrb - local reference block
- * @utr_descriptor_ptr: UTRD address of the command
- * @ucd_cmd_ptr: UCD address of the command
- * @ucd_rsp_ptr: Response UPIU address for this command
- * @ucd_prdt_ptr: PRDT address of the command
- * @cmd: pointer to SCSI command
- * @sense_buffer: pointer to sense buffer address of the SCSI command
- * @sense_bufflen: Length of the sense buffer
- * @scsi_status: SCSI status of the command
- * @command_type: SCSI, UFS, Query.
- * @task_tag: Task tag of the command
- * @lun: LUN of the command
- */
-struct ufshcd_lrb {
-	struct utp_transfer_req_desc *utr_descriptor_ptr;
-	struct utp_upiu_cmd *ucd_cmd_ptr;
-	struct utp_upiu_rsp *ucd_rsp_ptr;
-	struct ufshcd_sg_entry *ucd_prdt_ptr;
-
-	struct scsi_cmnd *cmd;
-	u8 *sense_buffer;
-	unsigned int sense_bufflen;
-	int scsi_status;
-
-	int command_type;
-	int task_tag;
-	unsigned int lun;
-};
-
-/**
  * ufshcd_get_ufs_version - Get the UFS version supported by the HBA
  * @hba - Pointer to adapter instance
  *
@@ -335,21 +182,21 @@ static inline void ufshcd_free_hba_memory(struct ufs_hba *hba)
 
 	if (hba->utmrdl_base_addr) {
 		utmrdl_size = sizeof(struct utp_task_req_desc) * hba->nutmrs;
-		dma_free_coherent(&hba->pdev->dev, utmrdl_size,
+		dma_free_coherent(hba->dev, utmrdl_size,
 				  hba->utmrdl_base_addr, hba->utmrdl_dma_addr);
 	}
 
 	if (hba->utrdl_base_addr) {
 		utrdl_size =
 		(sizeof(struct utp_transfer_req_desc) * hba->nutrs);
-		dma_free_coherent(&hba->pdev->dev, utrdl_size,
+		dma_free_coherent(hba->dev, utrdl_size,
 				  hba->utrdl_base_addr, hba->utrdl_dma_addr);
 	}
 
 	if (hba->ucdl_base_addr) {
 		ucdl_size =
 		(sizeof(struct utp_transfer_cmd_desc) * hba->nutrs);
-		dma_free_coherent(&hba->pdev->dev, ucdl_size,
+		dma_free_coherent(hba->dev, ucdl_size,
 				  hba->ucdl_base_addr, hba->ucdl_dma_addr);
 	}
 }
@@ -429,15 +276,6 @@ static void ufshcd_enable_run_stop_reg(struct ufs_hba *hba)
 }
 
 /**
- * ufshcd_hba_stop - Send controller to reset state
- * @hba: per adapter instance
- */
-static inline void ufshcd_hba_stop(struct ufs_hba *hba)
-{
-	writel(CONTROLLER_DISABLE, (hba->mmio_base + REG_CONTROLLER_ENABLE));
-}
-
-/**
  * ufshcd_hba_start - Start controller initialization sequence
  * @hba: per adapter instance
  */
@@ -724,7 +562,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
 
 	/* Allocate memory for UTP command descriptors */
 	ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs);
-	hba->ucdl_base_addr = dma_alloc_coherent(&hba->pdev->dev,
+	hba->ucdl_base_addr = dma_alloc_coherent(hba->dev,
 						 ucdl_size,
 						 &hba->ucdl_dma_addr,
 						 GFP_KERNEL);
@@ -737,7 +575,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
 	 */
 	if (!hba->ucdl_base_addr ||
 	    WARN_ON(hba->ucdl_dma_addr & (PAGE_SIZE - 1))) {
-		dev_err(&hba->pdev->dev,
+		dev_err(hba->dev,
 			"Command Descriptor Memory allocation failed\n");
 		goto out;
 	}
@@ -747,13 +585,13 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
 	 * UFSHCI requires 1024 byte alignment of UTRD
 	 */
 	utrdl_size = (sizeof(struct utp_transfer_req_desc) * hba->nutrs);
-	hba->utrdl_base_addr = dma_alloc_coherent(&hba->pdev->dev,
+	hba->utrdl_base_addr = dma_alloc_coherent(hba->dev,
 						  utrdl_size,
 						  &hba->utrdl_dma_addr,
 						  GFP_KERNEL);
 	if (!hba->utrdl_base_addr ||
 	    WARN_ON(hba->utrdl_dma_addr & (PAGE_SIZE - 1))) {
-		dev_err(&hba->pdev->dev,
+		dev_err(hba->dev,
 			"Transfer Descriptor Memory allocation failed\n");
 		goto out;
 	}
@@ -763,13 +601,13 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
 	 * UFSHCI requires 1024 byte alignment of UTMRD
 	 */
 	utmrdl_size = sizeof(struct utp_task_req_desc) * hba->nutmrs;
-	hba->utmrdl_base_addr = dma_alloc_coherent(&hba->pdev->dev,
+	hba->utmrdl_base_addr = dma_alloc_coherent(hba->dev,
 						   utmrdl_size,
 						   &hba->utmrdl_dma_addr,
 						   GFP_KERNEL);
 	if (!hba->utmrdl_base_addr ||
 	    WARN_ON(hba->utmrdl_dma_addr & (PAGE_SIZE - 1))) {
-		dev_err(&hba->pdev->dev,
+		dev_err(hba->dev,
 		"Task Management Descriptor Memory allocation failed\n");
 		goto out;
 	}
@@ -777,7 +615,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
 	/* Allocate memory for local reference block */
 	hba->lrb = kcalloc(hba->nutrs, sizeof(struct ufshcd_lrb), GFP_KERNEL);
 	if (!hba->lrb) {
-		dev_err(&hba->pdev->dev, "LRB Memory allocation failed\n");
+		dev_err(hba->dev, "LRB Memory allocation failed\n");
 		goto out;
 	}
 	return 0;
@@ -867,7 +705,7 @@ static int ufshcd_dme_link_startup(struct ufs_hba *hba)
 	/* check if controller is ready to accept UIC commands */
 	if (((readl(hba->mmio_base + REG_CONTROLLER_STATUS)) &
 	    UIC_COMMAND_READY) == 0x0) {
-		dev_err(&hba->pdev->dev,
+		dev_err(hba->dev,
 			"Controller not ready"
 			" to accept UIC commands\n");
 		return -EIO;
@@ -912,7 +750,7 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
 	/* check if device present */
 	reg = readl((hba->mmio_base + REG_CONTROLLER_STATUS));
 	if (!ufshcd_is_device_present(reg)) {
-		dev_err(&hba->pdev->dev, "cc: Device not present\n");
+		dev_err(hba->dev, "cc: Device not present\n");
 		err = -ENXIO;
 		goto out;
 	}
@@ -924,7 +762,7 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
 	if (!(ufshcd_get_lists_status(reg))) {
 		ufshcd_enable_run_stop_reg(hba);
 	} else {
-		dev_err(&hba->pdev->dev,
+		dev_err(hba->dev,
 			"Host controller not ready to process requests");
 		err = -EIO;
 		goto out;
@@ -1005,7 +843,7 @@ static int ufshcd_hba_enable(struct ufs_hba *hba)
 		if (retry) {
 			retry--;
 		} else {
-			dev_err(&hba->pdev->dev,
+			dev_err(hba->dev,
 				"Controller enable failed\n");
 			return -EIO;
 		}
@@ -1084,7 +922,7 @@ static int ufshcd_do_reset(struct ufs_hba *hba)
 
 	/* start the initialization process */
 	if (ufshcd_initialize_hba(hba)) {
-		dev_err(&hba->pdev->dev,
+		dev_err(hba->dev,
 			"Reset: Controller initialization failed\n");
 		return FAILED;
 	}
@@ -1167,7 +1005,7 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
 			task_result = SUCCESS;
 	} else {
 		task_result = FAILED;
-		dev_err(&hba->pdev->dev,
+		dev_err(hba->dev,
 			"trc: Invalid ocs = %x\n", ocs_value);
 	}
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
@@ -1281,7 +1119,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 		/* check if the returned transfer response is valid */
 		result = ufshcd_is_valid_req_rsp(lrbp->ucd_rsp_ptr);
 		if (result) {
-			dev_err(&hba->pdev->dev,
+			dev_err(hba->dev,
 				"Invalid response = %x\n", result);
 			break;
 		}
@@ -1310,7 +1148,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 	case OCS_FATAL_ERROR:
 	default:
 		result |= DID_ERROR << 16;
-		dev_err(&hba->pdev->dev,
+		dev_err(hba->dev,
 		"OCS error from controller = %x\n", ocs);
 		break;
 	} /* end of switch */
@@ -1374,7 +1212,7 @@ static void ufshcd_uic_cc_handler (struct work_struct *work)
 	    !(ufshcd_get_uic_cmd_result(hba))) {
 
 		if (ufshcd_make_hba_operational(hba))
-			dev_err(&hba->pdev->dev,
+			dev_err(hba->dev,
 				"cc: hba not operational state\n");
 		return;
 	}
@@ -1509,7 +1347,7 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	free_slot = ufshcd_get_tm_free_slot(hba);
 	if (free_slot >= hba->nutmrs) {
 		spin_unlock_irqrestore(host->host_lock, flags);
-		dev_err(&hba->pdev->dev, "Task management queue full\n");
+		dev_err(hba->dev, "Task management queue full\n");
 		err = FAILED;
 		goto out;
 	}
@@ -1552,7 +1390,7 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 					 &hba->tm_condition) != 0),
 					 60 * HZ);
 	if (!err) {
-		dev_err(&hba->pdev->dev,
+		dev_err(hba->dev,
 			"Task management command timed-out\n");
 		err = FAILED;
 		goto out;
@@ -1688,23 +1526,13 @@ static struct scsi_host_template ufshcd_driver_template = {
 };
 
 /**
- * ufshcd_shutdown - main function to put the controller in reset state
- * @pdev: pointer to PCI device handle
- */
-static void ufshcd_shutdown(struct pci_dev *pdev)
-{
-	ufshcd_hba_stop((struct ufs_hba *)pci_get_drvdata(pdev));
-}
-
-#ifdef CONFIG_PM
-/**
  * ufshcd_suspend - suspend power management function
- * @pdev: pointer to PCI device handle
+ * @hba: per adapter instance
  * @state: power state
  *
  * Returns -ENOSYS
  */
-static int ufshcd_suspend(struct pci_dev *pdev, pm_message_t state)
+int ufshcd_suspend(struct ufs_hba *hba, pm_message_t state)
 {
 	/*
 	 * TODO:
@@ -1717,14 +1545,15 @@ static int ufshcd_suspend(struct pci_dev *pdev, pm_message_t state)
 
 	return -ENOSYS;
 }
+EXPORT_SYMBOL_GPL(ufshcd_suspend);
 
 /**
  * ufshcd_resume - resume power management function
- * @pdev: pointer to PCI device handle
+ * @hba: per adapter instance
  *
  * Returns -ENOSYS
  */
-static int ufshcd_resume(struct pci_dev *pdev)
+int ufshcd_resume(struct ufs_hba *hba)
 {
 	/*
 	 * TODO:
@@ -1737,7 +1566,7 @@ static int ufshcd_resume(struct pci_dev *pdev)
 
 	return -ENOSYS;
 }
-#endif /* CONFIG_PM */
+EXPORT_SYMBOL_GPL(ufshcd_resume);
 
 /**
  * ufshcd_hba_free - free allocated memory for
@@ -1748,107 +1577,67 @@ static void ufshcd_hba_free(struct ufs_hba *hba)
 {
 	iounmap(hba->mmio_base);
 	ufshcd_free_hba_memory(hba);
-	pci_release_regions(hba->pdev);
 }
 
 /**
- * ufshcd_remove - de-allocate PCI/SCSI host and host memory space
+ * ufshcd_remove - de-allocate SCSI host and host memory space
  *		data structure memory
- * @pdev - pointer to PCI handle
+ * @hba - per adapter instance
  */
-static void ufshcd_remove(struct pci_dev *pdev)
+void ufshcd_remove(struct ufs_hba *hba)
 {
-	struct ufs_hba *hba = pci_get_drvdata(pdev);
-
 	/* disable interrupts */
 	ufshcd_int_config(hba, UFSHCD_INT_DISABLE);
-	free_irq(pdev->irq, hba);
 
 	ufshcd_hba_stop(hba);
 	ufshcd_hba_free(hba);
 
 	scsi_remove_host(hba->host);
 	scsi_host_put(hba->host);
-	pci_set_drvdata(pdev, NULL);
-	pci_clear_master(pdev);
-	pci_disable_device(pdev);
-}
-
-/**
- * ufshcd_set_dma_mask - Set dma mask based on the controller
- *			 addressing capability
- * @pdev: PCI device structure
- *
- * Returns 0 for success, non-zero for failure
- */
-static int ufshcd_set_dma_mask(struct ufs_hba *hba)
-{
-	int err;
-	u64 dma_mask;
-
-	/*
-	 * If controller supports 64 bit addressing mode, then set the DMA
-	 * mask to 64-bit, else set the DMA mask to 32-bit
-	 */
-	if (hba->capabilities & MASK_64_ADDRESSING_SUPPORT)
-		dma_mask = DMA_BIT_MASK(64);
-	else
-		dma_mask = DMA_BIT_MASK(32);
-
-	err = pci_set_dma_mask(hba->pdev, dma_mask);
-	if (err)
-		return err;
-
-	err = pci_set_consistent_dma_mask(hba->pdev, dma_mask);
-
-	return err;
 }
+EXPORT_SYMBOL_GPL(ufshcd_remove);
 
 /**
- * ufshcd_probe - probe routine of the driver
- * @pdev: pointer to PCI device handle
- * @id: PCI device id
- *
+ * ufshcd_init - Driver initialization routine
+ * @dev: pointer to device handle
+ * @hba_handle: driver private handle
+ * @mmio_base: base register address
+ * @irq: Interrupt line of device
  * Returns 0 on success, non-zero value on failure
  */
-static int ufshcd_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle,
+		 void __iomem *mmio_base, unsigned int irq)
 {
 	struct Scsi_Host *host;
 	struct ufs_hba *hba;
 	int err;
 
-	err = pci_enable_device(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "pci_enable_device failed\n");
+	if (!dev) {
+		dev_err(dev,
+		"Invalid memory reference for dev is NULL\n");
+		err = -ENODEV;
 		goto out_error;
 	}
 
-	pci_set_master(pdev);
+	if (!mmio_base) {
+		dev_err(dev,
+		"Invalid memory reference for mmio_base is NULL\n");
+		err = -ENODEV;
+		goto out_error;
+	}
 
 	host = scsi_host_alloc(&ufshcd_driver_template,
 				sizeof(struct ufs_hba));
 	if (!host) {
-		dev_err(&pdev->dev, "scsi_host_alloc failed\n");
+		dev_err(dev, "scsi_host_alloc failed\n");
 		err = -ENOMEM;
-		goto out_disable;
+		goto out_error;
 	}
 	hba = shost_priv(host);
-
-	err = pci_request_regions(pdev, UFSHCD);
-	if (err < 0) {
-		dev_err(&pdev->dev, "request regions failed\n");
-		goto out_host_put;
-	}
-
-	hba->mmio_base = pci_ioremap_bar(pdev, 0);
-	if (!hba->mmio_base) {
-		dev_err(&pdev->dev, "memory map failed\n");
-		err = -ENOMEM;
-		goto out_release_regions;
-	}
-
 	hba->host = host;
-	hba->pdev = pdev;
+	hba->dev = dev;
+	hba->mmio_base = mmio_base;
+	hba->irq = irq;
 
 	/* Read capabilities registers */
 	ufshcd_hba_capabilities(hba);
@@ -1856,17 +1645,11 @@ static int ufshcd_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	/* Get UFS version supported by the controller */
 	hba->ufs_version = ufshcd_get_ufs_version(hba);
 
-	err = ufshcd_set_dma_mask(hba);
-	if (err) {
-		dev_err(&pdev->dev, "set dma mask failed\n");
-		goto out_iounmap;
-	}
-
 	/* Allocate memory for host memory space */
 	err = ufshcd_memory_alloc(hba);
 	if (err) {
-		dev_err(&pdev->dev, "Memory allocation failed\n");
-		goto out_iounmap;
+		dev_err(hba->dev, "Memory allocation failed\n");
+		goto out_disable;
 	}
 
 	/* Configure LRB */
@@ -1888,76 +1671,50 @@ static int ufshcd_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler);
 
 	/* IRQ registration */
-	err = request_irq(pdev->irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba);
+	err = request_irq(irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba);
 	if (err) {
-		dev_err(&pdev->dev, "request irq failed\n");
+		dev_err(hba->dev, "request irq failed\n");
 		goto out_lrb_free;
 	}
 
 	/* Enable SCSI tag mapping */
 	err = scsi_init_shared_tag_map(host, host->can_queue);
 	if (err) {
-		dev_err(&pdev->dev, "init shared queue failed\n");
+		dev_err(hba->dev, "init shared queue failed\n");
 		goto out_free_irq;
 	}
 
-	pci_set_drvdata(pdev, hba);
-
-	err = scsi_add_host(host, &pdev->dev);
+	err = scsi_add_host(host, hba->dev);
 	if (err) {
-		dev_err(&pdev->dev, "scsi_add_host failed\n");
+		dev_err(hba->dev, "scsi_add_host failed\n");
 		goto out_free_irq;
 	}
 
 	/* Initialization routine */
 	err = ufshcd_initialize_hba(hba);
 	if (err) {
-		dev_err(&pdev->dev, "Initialization failed\n");
-		goto out_free_irq;
+		dev_err(hba->dev, "Initialization failed\n");
+		goto out_remove_scsi_host;
 	}
+	*hba_handle = hba;
 
 	return 0;
 
+out_remove_scsi_host:
+	scsi_remove_host(hba->host);
 out_free_irq:
-	free_irq(pdev->irq, hba);
+	free_irq(irq, hba);
 out_lrb_free:
 	ufshcd_free_hba_memory(hba);
-out_iounmap:
-	iounmap(hba->mmio_base);
-out_release_regions:
-	pci_release_regions(pdev);
-out_host_put:
-	scsi_host_put(host);
 out_disable:
-	pci_clear_master(pdev);
-	pci_disable_device(pdev);
+	scsi_host_put(host);
 out_error:
 	return err;
 }
+EXPORT_SYMBOL_GPL(ufshcd_init);
 
-static DEFINE_PCI_DEVICE_TABLE(ufshcd_pci_tbl) = {
-	{ PCI_VENDOR_ID_SAMSUNG, 0xC00C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ }	/* terminate list */
-};
-
-MODULE_DEVICE_TABLE(pci, ufshcd_pci_tbl);
-
-static struct pci_driver ufshcd_pci_driver = {
-	.name = UFSHCD,
-	.id_table = ufshcd_pci_tbl,
-	.probe = ufshcd_probe,
-	.remove = ufshcd_remove,
-	.shutdown = ufshcd_shutdown,
-#ifdef CONFIG_PM
-	.suspend = ufshcd_suspend,
-	.resume = ufshcd_resume,
-#endif
-};
-
-module_pci_driver(ufshcd_pci_driver);
-
-MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>, "
-	      "Vinayak Holikatti <h.vinayak@samsung.com>");
-MODULE_DESCRIPTION("Generic UFS host controller driver");
+MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>");
+MODULE_AUTHOR("Vinayak Holikatti <h.vinayak@samsung.com>");
+MODULE_DESCRIPTION("Generic UFS host controller driver Core");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(UFSHCD_DRIVER_VERSION);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
new file mode 100644
index 000000000000..6b99a42f5819
--- /dev/null
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -0,0 +1,202 @@
+/*
+ * Universal Flash Storage Host controller driver
+ *
+ * This code is based on drivers/scsi/ufs/ufshcd.h
+ * Copyright (C) 2011-2013 Samsung India Software Operations
+ *
+ * Authors:
+ *	Santosh Yaraganavi <santosh.sy@samsung.com>
+ *	Vinayak Holikatti <h.vinayak@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * See the COPYING file in the top-level directory or visit
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This program is provided "AS IS" and "WITH ALL FAULTS" and
+ * without warranty of any kind. You are solely responsible for
+ * determining the appropriateness of using and distributing
+ * the program and assume all risks associated with your exercise
+ * of rights with respect to the program, including but not limited
+ * to infringement of third party rights, the risks and costs of
+ * program errors, damage to or loss of data, programs or equipment,
+ * and unavailability or interruption of operations. Under no
+ * circumstances will the contributor of this Program be liable for
+ * any damages of any kind arising from your use or distribution of
+ * this program.
+ */
+
+#ifndef _UFSHCD_H
+#define _UFSHCD_H
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <linux/bitops.h>
+#include <linux/pm_runtime.h>
+#include <linux/clk.h>
+
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_eh.h>
+
+#include "ufs.h"
+#include "ufshci.h"
+
+#define UFSHCD "ufshcd"
+#define UFSHCD_DRIVER_VERSION "0.2"
+
+/**
+ * struct uic_command - UIC command structure
+ * @command: UIC command
+ * @argument1: UIC command argument 1
+ * @argument2: UIC command argument 2
+ * @argument3: UIC command argument 3
+ * @cmd_active: Indicate if UIC command is outstanding
+ * @result: UIC command result
+ */
+struct uic_command {
+	u32 command;
+	u32 argument1;
+	u32 argument2;
+	u32 argument3;
+	int cmd_active;
+	int result;
+};
+
+/**
+ * struct ufshcd_lrb - local reference block
+ * @utr_descriptor_ptr: UTRD address of the command
+ * @ucd_cmd_ptr: UCD address of the command
+ * @ucd_rsp_ptr: Response UPIU address for this command
+ * @ucd_prdt_ptr: PRDT address of the command
+ * @cmd: pointer to SCSI command
+ * @sense_buffer: pointer to sense buffer address of the SCSI command
+ * @sense_bufflen: Length of the sense buffer
+ * @scsi_status: SCSI status of the command
+ * @command_type: SCSI, UFS, Query.
+ * @task_tag: Task tag of the command
+ * @lun: LUN of the command
+ */
+struct ufshcd_lrb {
+	struct utp_transfer_req_desc *utr_descriptor_ptr;
+	struct utp_upiu_cmd *ucd_cmd_ptr;
+	struct utp_upiu_rsp *ucd_rsp_ptr;
+	struct ufshcd_sg_entry *ucd_prdt_ptr;
+
+	struct scsi_cmnd *cmd;
+	u8 *sense_buffer;
+	unsigned int sense_bufflen;
+	int scsi_status;
+
+	int command_type;
+	int task_tag;
+	unsigned int lun;
+};
+
+
+/**
+ * struct ufs_hba - per adapter private structure
+ * @mmio_base: UFSHCI base register address
+ * @ucdl_base_addr: UFS Command Descriptor base address
+ * @utrdl_base_addr: UTP Transfer Request Descriptor base address
+ * @utmrdl_base_addr: UTP Task Management Descriptor base address
+ * @ucdl_dma_addr: UFS Command Descriptor DMA address
+ * @utrdl_dma_addr: UTRDL DMA address
+ * @utmrdl_dma_addr: UTMRDL DMA address
+ * @host: Scsi_Host instance of the driver
+ * @dev: device handle
+ * @lrb: local reference block
+ * @outstanding_tasks: Bits representing outstanding task requests
+ * @outstanding_reqs: Bits representing outstanding transfer requests
+ * @capabilities: UFS Controller Capabilities
+ * @nutrs: Transfer Request Queue depth supported by controller
+ * @nutmrs: Task Management Queue depth supported by controller
+ * @ufs_version: UFS Version to which controller complies
+ * @irq: Irq number of the controller
+ * @active_uic_cmd: handle of active UIC command
+ * @ufshcd_tm_wait_queue: wait queue for task management
+ * @tm_condition: condition variable for task management
+ * @ufshcd_state: UFSHCD states
+ * @int_enable_mask: Interrupt Mask Bits
+ * @uic_workq: Work queue for UIC completion handling
+ * @feh_workq: Work queue for fatal controller error handling
+ * @errors: HBA errors
+ */
+struct ufs_hba {
+	void __iomem *mmio_base;
+
+	/* Virtual memory reference */
+	struct utp_transfer_cmd_desc *ucdl_base_addr;
+	struct utp_transfer_req_desc *utrdl_base_addr;
+	struct utp_task_req_desc *utmrdl_base_addr;
+
+	/* DMA memory reference */
+	dma_addr_t ucdl_dma_addr;
+	dma_addr_t utrdl_dma_addr;
+	dma_addr_t utmrdl_dma_addr;
+
+	struct Scsi_Host *host;
+	struct device *dev;
+
+	struct ufshcd_lrb *lrb;
+
+	unsigned long outstanding_tasks;
+	unsigned long outstanding_reqs;
+
+	u32 capabilities;
+	int nutrs;
+	int nutmrs;
+	u32 ufs_version;
+	unsigned int irq;
+
+	struct uic_command active_uic_cmd;
+	wait_queue_head_t ufshcd_tm_wait_queue;
+	unsigned long tm_condition;
+
+	u32 ufshcd_state;
+	u32 int_enable_mask;
+
+	/* Work Queues */
+	struct work_struct uic_workq;
+	struct work_struct feh_workq;
+
+	/* HBA Errors */
+	u32 errors;
+};
+
+int ufshcd_init(struct device *, struct ufs_hba ** , void __iomem * ,
+			unsigned int);
+void ufshcd_remove(struct ufs_hba *);
+
+/**
+ * ufshcd_hba_stop - Send controller to reset state
+ * @hba: per adapter instance
+ */
+static inline void ufshcd_hba_stop(struct ufs_hba *hba)
+{
+	writel(CONTROLLER_DISABLE, (hba->mmio_base + REG_CONTROLLER_ENABLE));
+}
+
+#endif /* End of Header */
diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index 6e3510f71167..0c164847a3ef 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h
@@ -2,45 +2,35 @@
  * Universal Flash Storage Host controller driver
  *
  * This code is based on drivers/scsi/ufs/ufshci.h
- * Copyright (C) 2011-2012 Samsung India Software Operations
+ * Copyright (C) 2011-2013 Samsung India Software Operations
  *
- * Santosh Yaraganavi <santosh.sy@samsung.com>
- * Vinayak Holikatti <h.vinayak@samsung.com>
+ * Authors:
+ *	Santosh Yaraganavi <santosh.sy@samsung.com>
+ *	Vinayak Holikatti <h.vinayak@samsung.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version 2
  * of the License, or (at your option) any later version.
+ * See the COPYING file in the top-level directory or visit
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * NO WARRANTY
- * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
- * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
- * solely responsible for determining the appropriateness of using and
- * distributing the Program and assumes all risks associated with its
- * exercise of rights under this Agreement, including but not limited to
- * the risks and costs of program errors, damage to or loss of data,
- * programs or equipment, and unavailability or interruption of operations.
-
- * DISCLAIMER OF LIABILITY
- * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
- * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
- * USA.
+ * This program is provided "AS IS" and "WITH ALL FAULTS" and
+ * without warranty of any kind. You are solely responsible for
+ * determining the appropriateness of using and distributing
+ * the program and assume all risks associated with your exercise
+ * of rights with respect to the program, including but not limited
+ * to infringement of third party rights, the risks and costs of
+ * program errors, damage to or loss of data, programs or equipment,
+ * and unavailability or interruption of operations. Under no
+ * circumstances will the contributor of this Program be liable for
+ * any damages of any kind arising from your use or distribution of
+ * this program.
  */
 
 #ifndef _UFSHCI_H
diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c
index a43415a7fbed..4c0f6d883dd3 100644
--- a/drivers/ssb/driver_chipcommon_pmu.c
+++ b/drivers/ssb/driver_chipcommon_pmu.c
@@ -14,7 +14,7 @@
 #include <linux/delay.h>
 #include <linux/export.h>
 #ifdef CONFIG_BCM47XX
-#include <asm/mach-bcm47xx/nvram.h>
+#include <bcm47xx_nvram.h>
 #endif
 
 #include "ssb_private.h"
@@ -322,7 +322,7 @@ static void ssb_pmu_pll_init(struct ssb_chipcommon *cc)
 	if (bus->bustype == SSB_BUSTYPE_SSB) {
 #ifdef CONFIG_BCM47XX
 		char buf[20];
-		if (nvram_getenv("xtalfreq", buf, sizeof(buf)) >= 0)
+		if (bcm47xx_nvram_getenv("xtalfreq", buf, sizeof(buf)) >= 0)
 			crystalfreq = simple_strtoul(buf, NULL, 0);
 #endif
 	}
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 538ebe213129..24456a0de6b2 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -2880,7 +2880,6 @@ static int binder_release(struct inode *nodp, struct file *filp)
 
 static void binder_deferred_release(struct binder_proc *proc)
 {
-	struct hlist_node *pos;
 	struct binder_transaction *t;
 	struct rb_node *n;
 	int threads, nodes, incoming_refs, outgoing_refs, buffers, active_transactions, page_count;
@@ -2924,7 +2923,7 @@ static void binder_deferred_release(struct binder_proc *proc)
 			node->local_weak_refs = 0;
 			hlist_add_head(&node->dead_node, &binder_dead_nodes);
 
-			hlist_for_each_entry(ref, pos, &node->refs, node_entry) {
+			hlist_for_each_entry(ref, &node->refs, node_entry) {
 				incoming_refs++;
 				if (ref->death) {
 					death++;
@@ -3156,12 +3155,11 @@ static void print_binder_thread(struct seq_file *m,
 static void print_binder_node(struct seq_file *m, struct binder_node *node)
 {
 	struct binder_ref *ref;
-	struct hlist_node *pos;
 	struct binder_work *w;
 	int count;
 
 	count = 0;
-	hlist_for_each_entry(ref, pos, &node->refs, node_entry)
+	hlist_for_each_entry(ref, &node->refs, node_entry)
 		count++;
 
 	seq_printf(m, "  node %d: u%p c%p hs %d hw %d ls %d lw %d is %d iw %d",
@@ -3171,7 +3169,7 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node)
 		   node->internal_strong_refs, count);
 	if (count) {
 		seq_puts(m, " proc");
-		hlist_for_each_entry(ref, pos, &node->refs, node_entry)
+		hlist_for_each_entry(ref, &node->refs, node_entry)
 			seq_printf(m, " %d", ref->proc->pid);
 	}
 	seq_puts(m, "\n");
@@ -3369,7 +3367,6 @@ static void print_binder_proc_stats(struct seq_file *m,
 static int binder_state_show(struct seq_file *m, void *unused)
 {
 	struct binder_proc *proc;
-	struct hlist_node *pos;
 	struct binder_node *node;
 	int do_lock = !binder_debug_no_lock;
 
@@ -3380,10 +3377,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
 
 	if (!hlist_empty(&binder_dead_nodes))
 		seq_puts(m, "dead nodes:\n");
-	hlist_for_each_entry(node, pos, &binder_dead_nodes, dead_node)
+	hlist_for_each_entry(node, &binder_dead_nodes, dead_node)
 		print_binder_node(m, node);
 
-	hlist_for_each_entry(proc, pos, &binder_procs, proc_node)
+	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc(m, proc, 1);
 	if (do_lock)
 		binder_unlock(__func__);
@@ -3393,7 +3390,6 @@ static int binder_state_show(struct seq_file *m, void *unused)
 static int binder_stats_show(struct seq_file *m, void *unused)
 {
 	struct binder_proc *proc;
-	struct hlist_node *pos;
 	int do_lock = !binder_debug_no_lock;
 
 	if (do_lock)
@@ -3403,7 +3399,7 @@ static int binder_stats_show(struct seq_file *m, void *unused)
 
 	print_binder_stats(m, "", &binder_stats);
 
-	hlist_for_each_entry(proc, pos, &binder_procs, proc_node)
+	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc_stats(m, proc);
 	if (do_lock)
 		binder_unlock(__func__);
@@ -3413,14 +3409,13 @@ static int binder_stats_show(struct seq_file *m, void *unused)
 static int binder_transactions_show(struct seq_file *m, void *unused)
 {
 	struct binder_proc *proc;
-	struct hlist_node *pos;
 	int do_lock = !binder_debug_no_lock;
 
 	if (do_lock)
 		binder_lock(__func__);
 
 	seq_puts(m, "binder transactions:\n");
-	hlist_for_each_entry(proc, pos, &binder_procs, proc_node)
+	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc(m, proc, 0);
 	if (do_lock)
 		binder_unlock(__func__);
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 195d56d8a1ee..e336b281b847 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -580,7 +580,7 @@ static int do_devinfo_ioctl(struct comedi_device *dev,
 			    struct comedi_devinfo __user *arg,
 			    struct file *file)
 {
-	const unsigned minor = iminor(file->f_dentry->d_inode);
+	const unsigned minor = iminor(file_inode(file));
 	struct comedi_file_info *info = comedi_file_info_from_minor(minor);
 	struct comedi_subdevice *s;
 	struct comedi_devinfo devinfo;
@@ -1615,7 +1615,7 @@ static int do_poll_ioctl(struct comedi_device *dev, unsigned int arg,
 static long comedi_unlocked_ioctl(struct file *file, unsigned int cmd,
 				  unsigned long arg)
 {
-	const unsigned minor = iminor(file->f_dentry->d_inode);
+	const unsigned minor = iminor(file_inode(file));
 	struct comedi_file_info *info = comedi_file_info_from_minor(minor);
 	struct comedi_device *dev = comedi_dev_from_file_info(info);
 	int rc;
@@ -1743,7 +1743,7 @@ static struct vm_operations_struct comedi_vm_ops = {
 
 static int comedi_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	const unsigned minor = iminor(file->f_dentry->d_inode);
+	const unsigned minor = iminor(file_inode(file));
 	struct comedi_file_info *info = comedi_file_info_from_minor(minor);
 	struct comedi_device *dev = comedi_dev_from_file_info(info);
 	struct comedi_subdevice *s;
@@ -1823,7 +1823,7 @@ done:
 static unsigned int comedi_poll(struct file *file, poll_table *wait)
 {
 	unsigned int mask = 0;
-	const unsigned minor = iminor(file->f_dentry->d_inode);
+	const unsigned minor = iminor(file_inode(file));
 	struct comedi_file_info *info = comedi_file_info_from_minor(minor);
 	struct comedi_device *dev = comedi_dev_from_file_info(info);
 	struct comedi_subdevice *s;
@@ -1869,7 +1869,7 @@ static ssize_t comedi_write(struct file *file, const char __user *buf,
 	struct comedi_async *async;
 	int n, m, count = 0, retval = 0;
 	DECLARE_WAITQUEUE(wait, current);
-	const unsigned minor = iminor(file->f_dentry->d_inode);
+	const unsigned minor = iminor(file_inode(file));
 	struct comedi_file_info *info = comedi_file_info_from_minor(minor);
 	struct comedi_device *dev = comedi_dev_from_file_info(info);
 
@@ -1964,7 +1964,7 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes,
 	struct comedi_async *async;
 	int n, m, count = 0, retval = 0;
 	DECLARE_WAITQUEUE(wait, current);
-	const unsigned minor = iminor(file->f_dentry->d_inode);
+	const unsigned minor = iminor(file_inode(file));
 	struct comedi_file_info *info = comedi_file_info_from_minor(minor);
 	struct comedi_device *dev = comedi_dev_from_file_info(info);
 
@@ -2133,7 +2133,7 @@ ok:
 
 static int comedi_fasync(int fd, struct file *file, int on)
 {
-	const unsigned minor = iminor(file->f_dentry->d_inode);
+	const unsigned minor = iminor(file_inode(file));
 	struct comedi_device *dev = comedi_dev_from_minor(minor);
 
 	if (!dev)
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 23a98e658306..7ea246a07731 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -144,23 +144,24 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
 	spin_lock_init(&tiqn->login_stats.lock);
 	spin_lock_init(&tiqn->logout_stats.lock);
 
-	if (!idr_pre_get(&tiqn_idr, GFP_KERNEL)) {
-		pr_err("idr_pre_get() for tiqn_idr failed\n");
-		kfree(tiqn);
-		return ERR_PTR(-ENOMEM);
-	}
 	tiqn->tiqn_state = TIQN_STATE_ACTIVE;
 
+	idr_preload(GFP_KERNEL);
 	spin_lock(&tiqn_lock);
-	ret = idr_get_new(&tiqn_idr, NULL, &tiqn->tiqn_index);
+
+	ret = idr_alloc(&tiqn_idr, NULL, 0, 0, GFP_NOWAIT);
 	if (ret < 0) {
-		pr_err("idr_get_new() failed for tiqn->tiqn_index\n");
+		pr_err("idr_alloc() failed for tiqn->tiqn_index\n");
 		spin_unlock(&tiqn_lock);
+		idr_preload_end();
 		kfree(tiqn);
 		return ERR_PTR(ret);
 	}
+	tiqn->tiqn_index = ret;
 	list_add_tail(&tiqn->tiqn_list, &g_tiqn_list);
+
 	spin_unlock(&tiqn_lock);
+	idr_preload_end();
 
 	pr_debug("CORE[0] - Added iSCSI Target IQN: %s\n", tiqn->tiqn);
 
@@ -3583,6 +3584,10 @@ check_rsp_state:
 				spin_lock_bh(&cmd->istate_lock);
 				cmd->i_state = ISTATE_SENT_STATUS;
 				spin_unlock_bh(&cmd->istate_lock);
+
+				if (atomic_read(&conn->check_immediate_queue))
+					return 1;
+
 				continue;
 			} else if (ret == 2) {
 				/* Still must send status,
@@ -3672,7 +3677,7 @@ check_rsp_state:
 		}
 
 		if (atomic_read(&conn->check_immediate_queue))
-			break;
+			return 1;
 	}
 
 	return 0;
@@ -3716,12 +3721,15 @@ restart:
 		     signal_pending(current))
 			goto transport_err;
 
+get_immediate:
 		ret = handle_immediate_queue(conn);
 		if (ret < 0)
 			goto transport_err;
 
 		ret = handle_response_queue(conn);
-		if (ret == -EAGAIN)
+		if (ret == 1)
+			goto get_immediate;
+		else if (ret == -EAGAIN)
 			goto restart;
 		else if (ret < 0)
 			goto transport_err;
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index fdb632f0ab85..2535d4d46c0e 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -247,19 +247,16 @@ static int iscsi_login_zero_tsih_s1(
 	spin_lock_init(&sess->session_usage_lock);
 	spin_lock_init(&sess->ttt_lock);
 
-	if (!idr_pre_get(&sess_idr, GFP_KERNEL)) {
-		pr_err("idr_pre_get() for sess_idr failed\n");
-		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
-				ISCSI_LOGIN_STATUS_NO_RESOURCES);
-		kfree(sess);
-		return -ENOMEM;
-	}
+	idr_preload(GFP_KERNEL);
 	spin_lock_bh(&sess_idr_lock);
-	ret = idr_get_new(&sess_idr, NULL, &sess->session_index);
+	ret = idr_alloc(&sess_idr, NULL, 0, 0, GFP_NOWAIT);
+	if (ret >= 0)
+		sess->session_index = ret;
 	spin_unlock_bh(&sess_idr_lock);
+	idr_preload_end();
 
 	if (ret < 0) {
-		pr_err("idr_get_new() for sess_idr failed\n");
+		pr_err("idr_alloc() for sess_idr failed\n");
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
 		kfree(sess);
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 6917a9e938e7..d3536f57444f 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -2598,7 +2598,7 @@ static int __init sbp_init(void)
 	return 0;
 };
 
-static void sbp_exit(void)
+static void __exit sbp_exit(void)
 {
 	sbp_deregister_configfs();
 };
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index d226c10a985b..17a6acbc3ab0 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -631,7 +631,7 @@ static int __init fileio_module_init(void)
 	return transport_subsystem_register(&fileio_template);
 }
 
-static void fileio_module_exit(void)
+static void __exit fileio_module_exit(void)
 {
 	transport_subsystem_release(&fileio_template);
 }
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index c73f4a950e23..8bcc514ec8b6 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -821,7 +821,7 @@ static int __init iblock_module_init(void)
 	return transport_subsystem_register(&iblock_template);
 }
 
-static void iblock_module_exit(void)
+static void __exit iblock_module_exit(void)
 {
 	transport_subsystem_release(&iblock_template);
 }
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 2bcfd79cf595..82e78d72fdb6 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -840,14 +840,14 @@ static void pscsi_bi_endio(struct bio *bio, int error)
 	bio_put(bio);
 }
 
-static inline struct bio *pscsi_get_bio(int sg_num)
+static inline struct bio *pscsi_get_bio(int nr_vecs)
 {
 	struct bio *bio;
 	/*
 	 * Use bio_malloc() following the comment in for bio -> struct request
 	 * in block/blk-core.c:blk_make_request()
 	 */
-	bio = bio_kmalloc(GFP_KERNEL, sg_num);
+	bio = bio_kmalloc(GFP_KERNEL, nr_vecs);
 	if (!bio) {
 		pr_err("PSCSI: bio_kmalloc() failed\n");
 		return NULL;
@@ -940,7 +940,6 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 				bio = NULL;
 			}
 
-			page++;
 			len -= bytes;
 			data_len -= bytes;
 			off = 0;
@@ -952,7 +951,6 @@ fail:
 	while (*hbio) {
 		bio = *hbio;
 		*hbio = (*hbio)->bi_next;
-		bio->bi_next = NULL;
 		bio_endio(bio, 0);	/* XXX: should be error */
 	}
 	return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -1092,7 +1090,6 @@ fail_free_bio:
 	while (hbio) {
 		struct bio *bio = hbio;
 		hbio = hbio->bi_next;
-		bio->bi_next = NULL;
 		bio_endio(bio, 0);	/* XXX: should be error */
 	}
 	ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -1178,7 +1175,7 @@ static int __init pscsi_module_init(void)
 	return transport_subsystem_register(&pscsi_template);
 }
 
-static void pscsi_module_exit(void)
+static void __exit pscsi_module_exit(void)
 {
 	transport_subsystem_release(&pscsi_template);
 }
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
index 6659dd36e806..113f33598b9f 100644
--- a/drivers/target/tcm_fc/tfc_sess.c
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -169,7 +169,6 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id)
 {
 	struct ft_tport *tport;
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct ft_sess *sess;
 
 	rcu_read_lock();
@@ -178,7 +177,7 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id)
 		goto out;
 
 	head = &tport->hash[ft_sess_hash(port_id)];
-	hlist_for_each_entry_rcu(sess, pos, head, hash) {
+	hlist_for_each_entry_rcu(sess, head, hash) {
 		if (sess->port_id == port_id) {
 			kref_get(&sess->kref);
 			rcu_read_unlock();
@@ -201,10 +200,9 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id,
 {
 	struct ft_sess *sess;
 	struct hlist_head *head;
-	struct hlist_node *pos;
 
 	head = &tport->hash[ft_sess_hash(port_id)];
-	hlist_for_each_entry_rcu(sess, pos, head, hash)
+	hlist_for_each_entry_rcu(sess, head, hash)
 		if (sess->port_id == port_id)
 			return sess;
 
@@ -253,11 +251,10 @@ static void ft_sess_unhash(struct ft_sess *sess)
 static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct ft_sess *sess;
 
 	head = &tport->hash[ft_sess_hash(port_id)];
-	hlist_for_each_entry_rcu(sess, pos, head, hash) {
+	hlist_for_each_entry_rcu(sess, head, hash) {
 		if (sess->port_id == port_id) {
 			ft_sess_unhash(sess);
 			return sess;
@@ -273,12 +270,11 @@ static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id)
 static void ft_sess_delete_all(struct ft_tport *tport)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct ft_sess *sess;
 
 	for (head = tport->hash;
 	     head < &tport->hash[FT_SESS_HASH_SIZE]; head++) {
-		hlist_for_each_entry_rcu(sess, pos, head, hash) {
+		hlist_for_each_entry_rcu(sess, head, hash) {
 			ft_sess_unhash(sess);
 			transport_deregister_session_configfs(sess->se_sess);
 			ft_sess_put(sess);	/* release from table */
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index c2c77d1ac499..a764f165b589 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -29,14 +29,14 @@ choice
 
 config THERMAL_DEFAULT_GOV_STEP_WISE
 	bool "step_wise"
-	select STEP_WISE
+	select THERMAL_GOV_STEP_WISE
 	help
 	  Use the step_wise governor as default. This throttles the
 	  devices one step at a time.
 
 config THERMAL_DEFAULT_GOV_FAIR_SHARE
 	bool "fair_share"
-	select FAIR_SHARE
+	select THERMAL_GOV_FAIR_SHARE
 	help
 	  Use the fair_share governor as default. This throttles the
 	  devices based on their 'contribution' to a zone. The
@@ -44,24 +44,24 @@ config THERMAL_DEFAULT_GOV_FAIR_SHARE
 
 config THERMAL_DEFAULT_GOV_USER_SPACE
 	bool "user_space"
-	select USER_SPACE
+	select THERMAL_GOV_USER_SPACE
 	help
 	  Select this if you want to let the user space manage the
 	  lpatform thermals.
 
 endchoice
 
-config FAIR_SHARE
+config THERMAL_GOV_FAIR_SHARE
 	bool "Fair-share thermal governor"
 	help
 	  Enable this to manage platform thermals using fair-share governor.
 
-config STEP_WISE
+config THERMAL_GOV_STEP_WISE
 	bool "Step_wise thermal governor"
 	help
 	  Enable this to manage platform thermals using a simple linear
 
-config USER_SPACE
+config THERMAL_GOV_USER_SPACE
 	bool "User_space thermal governor"
 	help
 	  Enable this to let the user space manage the platform thermals.
@@ -78,6 +78,14 @@ config CPU_THERMAL
 	  and not the ACPI interface.
 	  If you want this support, you should say Y here.
 
+config THERMAL_EMULATION
+	bool "Thermal emulation mode support"
+	help
+	  Enable this option to make a emul_temp sysfs node in thermal zone
+	  directory to support temperature emulation. With emulation sysfs node,
+	  user can manually input temperature and test the different trip
+	  threshold behaviour for simulation purpose.
+
 config SPEAR_THERMAL
 	bool "SPEAr thermal sensor driver"
 	depends on PLAT_SPEAR
@@ -93,6 +101,14 @@ config RCAR_THERMAL
 	  Enable this to plug the R-Car thermal sensor driver into the Linux
 	  thermal framework
 
+config KIRKWOOD_THERMAL
+	tristate "Temperature sensor on Marvell Kirkwood SoCs"
+	depends on ARCH_KIRKWOOD
+	depends on OF
+	help
+	  Support for the Kirkwood thermal sensor driver into the Linux thermal
+	  framework. Only kirkwood 88F6282 and 88F6283 have this sensor.
+
 config EXYNOS_THERMAL
 	tristate "Temperature sensor on Samsung EXYNOS"
 	depends on (ARCH_EXYNOS4 || ARCH_EXYNOS5)
@@ -101,6 +117,23 @@ config EXYNOS_THERMAL
 	  If you say yes here you get support for TMU (Thermal Management
 	  Unit) on SAMSUNG EXYNOS series of SoC.
 
+config EXYNOS_THERMAL_EMUL
+	bool "EXYNOS TMU emulation mode support"
+	depends on EXYNOS_THERMAL
+	help
+	  Exynos 4412 and 4414 and 5 series has emulation mode on TMU.
+	  Enable this option will be make sysfs node in exynos thermal platform
+	  device directory to support emulation mode. With emulation mode sysfs
+	  node, you can manually input temperature to TMU for simulation purpose.
+
+config DOVE_THERMAL
+	tristate "Temperature sensor on Marvell Dove SoCs"
+	depends on ARCH_DOVE
+	depends on OF
+	help
+	  Support for the Dove thermal sensor driver in the Linux thermal
+	  framework.
+
 config DB8500_THERMAL
 	bool "DB8500 thermal management"
 	depends on ARCH_U8500
@@ -122,4 +155,14 @@ config DB8500_CPUFREQ_COOLING
 	  bound cpufreq cooling device turns active to set CPU frequency low to
 	  cool down the CPU.
 
+config INTEL_POWERCLAMP
+	tristate "Intel PowerClamp idle injection driver"
+	depends on THERMAL
+	depends on X86
+	depends on CPU_SUP_INTEL
+	help
+	  Enable this to enable Intel PowerClamp idle injection driver. This
+	  enforce idle time which results in more package C-state residency. The
+	  user interface is exposed via generic thermal framework.
+
 endif
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index d8da683245fc..d3a2b38c31e8 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -5,9 +5,9 @@
 obj-$(CONFIG_THERMAL)		+= thermal_sys.o
 
 # governors
-obj-$(CONFIG_FAIR_SHARE)	+= fair_share.o
-obj-$(CONFIG_STEP_WISE)		+= step_wise.o
-obj-$(CONFIG_USER_SPACE)	+= user_space.o
+obj-$(CONFIG_THERMAL_GOV_FAIR_SHARE)	+= fair_share.o
+obj-$(CONFIG_THERMAL_GOV_STEP_WISE)	+= step_wise.o
+obj-$(CONFIG_THERMAL_GOV_USER_SPACE)	+= user_space.o
 
 # cpufreq cooling
 obj-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
@@ -15,6 +15,10 @@ obj-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 # platform thermal drivers
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)	+= rcar_thermal.o
+obj-$(CONFIG_KIRKWOOD_THERMAL)  += kirkwood_thermal.o
 obj-$(CONFIG_EXYNOS_THERMAL)	+= exynos_thermal.o
+obj-$(CONFIG_DOVE_THERMAL)  	+= dove_thermal.o
 obj-$(CONFIG_DB8500_THERMAL)	+= db8500_thermal.o
 obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+= db8500_cpufreq_cooling.o
+obj-$(CONFIG_INTEL_POWERCLAMP)	+= intel_powerclamp.o
+
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 836828e29a87..8dc44cbb3e09 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -73,21 +73,14 @@ static struct cpufreq_cooling_device *notify_device;
  */
 static int get_idr(struct idr *idr, int *id)
 {
-	int err;
-again:
-	if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
-		return -ENOMEM;
+	int ret;
 
 	mutex_lock(&cooling_cpufreq_lock);
-	err = idr_get_new(idr, NULL, id);
+	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
 	mutex_unlock(&cooling_cpufreq_lock);
-
-	if (unlikely(err == -EAGAIN))
-		goto again;
-	else if (unlikely(err))
-		return err;
-
-	*id = *id & MAX_IDR_MASK;
+	if (unlikely(ret < 0))
+		return ret;
+	*id = ret;
 	return 0;
 }
 
@@ -118,8 +111,8 @@ static int is_cpufreq_valid(int cpu)
 /**
  * get_cpu_frequency - get the absolute value of frequency from level.
  * @cpu: cpu for which frequency is fetched.
- * @level: level of frequency of the CPU
- *	e.g level=1 --> 1st MAX FREQ, LEVEL=2 ---> 2nd MAX FREQ, .... etc
+ * @level: level of frequency, equals cooling state of cpu cooling device
+ *	e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
  */
 static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level)
 {
diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c
index 4cf8e72af90a..21419851fc02 100644
--- a/drivers/thermal/db8500_cpufreq_cooling.c
+++ b/drivers/thermal/db8500_cpufreq_cooling.c
@@ -21,6 +21,7 @@
 #include <linux/cpufreq.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -73,15 +74,13 @@ static const struct of_device_id db8500_cpufreq_cooling_match[] = {
 	{ .compatible = "stericsson,db8500-cpufreq-cooling" },
 	{},
 };
-#else
-#define db8500_cpufreq_cooling_match NULL
 #endif
 
 static struct platform_driver db8500_cpufreq_cooling_driver = {
 	.driver = {
 		.owner = THIS_MODULE,
 		.name = "db8500-cpufreq-cooling",
-		.of_match_table = db8500_cpufreq_cooling_match,
+		.of_match_table = of_match_ptr(db8500_cpufreq_cooling_match),
 	},
 	.probe = db8500_cpufreq_cooling_probe,
 	.suspend = db8500_cpufreq_cooling_suspend,
diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c
index ec71ade3e317..61ce60a35921 100644
--- a/drivers/thermal/db8500_thermal.c
+++ b/drivers/thermal/db8500_thermal.c
@@ -508,15 +508,13 @@ static const struct of_device_id db8500_thermal_match[] = {
 	{ .compatible = "stericsson,db8500-thermal" },
 	{},
 };
-#else
-#define db8500_thermal_match NULL
 #endif
 
 static struct platform_driver db8500_thermal_driver = {
 	.driver = {
 		.owner = THIS_MODULE,
 		.name = "db8500-thermal",
-		.of_match_table = db8500_thermal_match,
+		.of_match_table = of_match_ptr(db8500_thermal_match),
 	},
 	.probe = db8500_thermal_probe,
 	.suspend = db8500_thermal_suspend,
diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c
new file mode 100644
index 000000000000..7b0bfa0e7a9c
--- /dev/null
+++ b/drivers/thermal/dove_thermal.c
@@ -0,0 +1,209 @@
+/*
+ * Dove thermal sensor driver
+ *
+ * Copyright (C) 2013 Andrew Lunn <andrew@lunn.ch>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define DOVE_THERMAL_TEMP_OFFSET	1
+#define DOVE_THERMAL_TEMP_MASK		0x1FF
+
+/* Dove Thermal Manager Control and Status Register */
+#define PMU_TM_DISABLE_OFFS		0
+#define PMU_TM_DISABLE_MASK		(0x1 << PMU_TM_DISABLE_OFFS)
+
+/* Dove Theraml Diode Control 0 Register */
+#define PMU_TDC0_SW_RST_MASK		(0x1 << 1)
+#define PMU_TDC0_SEL_VCAL_OFFS		5
+#define PMU_TDC0_SEL_VCAL_MASK		(0x3 << PMU_TDC0_SEL_VCAL_OFFS)
+#define PMU_TDC0_REF_CAL_CNT_OFFS	11
+#define PMU_TDC0_REF_CAL_CNT_MASK	(0x1FF << PMU_TDC0_REF_CAL_CNT_OFFS)
+#define PMU_TDC0_AVG_NUM_OFFS		25
+#define PMU_TDC0_AVG_NUM_MASK		(0x7 << PMU_TDC0_AVG_NUM_OFFS)
+
+/* Dove Thermal Diode Control 1 Register */
+#define PMU_TEMP_DIOD_CTRL1_REG		0x04
+#define PMU_TDC1_TEMP_VALID_MASK	(0x1 << 10)
+
+/* Dove Thermal Sensor Dev Structure */
+struct dove_thermal_priv {
+	void __iomem *sensor;
+	void __iomem *control;
+};
+
+static int dove_init_sensor(const struct dove_thermal_priv *priv)
+{
+	u32 reg;
+	u32 i;
+
+	/* Configure the Diode Control Register #0 */
+	reg = readl_relaxed(priv->control);
+
+	/* Use average of 2 */
+	reg &= ~PMU_TDC0_AVG_NUM_MASK;
+	reg |= (0x1 << PMU_TDC0_AVG_NUM_OFFS);
+
+	/* Reference calibration value */
+	reg &= ~PMU_TDC0_REF_CAL_CNT_MASK;
+	reg |= (0x0F1 << PMU_TDC0_REF_CAL_CNT_OFFS);
+
+	/* Set the high level reference for calibration */
+	reg &= ~PMU_TDC0_SEL_VCAL_MASK;
+	reg |= (0x2 << PMU_TDC0_SEL_VCAL_OFFS);
+	writel(reg, priv->control);
+
+	/* Reset the sensor */
+	reg = readl_relaxed(priv->control);
+	writel((reg | PMU_TDC0_SW_RST_MASK), priv->control);
+	writel(reg, priv->control);
+
+	/* Enable the sensor */
+	reg = readl_relaxed(priv->sensor);
+	reg &= ~PMU_TM_DISABLE_MASK;
+	writel(reg, priv->sensor);
+
+	/* Poll the sensor for the first reading */
+	for (i = 0; i < 1000000; i++) {
+		reg = readl_relaxed(priv->sensor);
+		if (reg & DOVE_THERMAL_TEMP_MASK)
+			break;
+	}
+
+	if (i == 1000000)
+		return -EIO;
+
+	return 0;
+}
+
+static int dove_get_temp(struct thermal_zone_device *thermal,
+			  unsigned long *temp)
+{
+	unsigned long reg;
+	struct dove_thermal_priv *priv = thermal->devdata;
+
+	/* Valid check */
+	reg = readl_relaxed(priv->control + PMU_TEMP_DIOD_CTRL1_REG);
+	if ((reg & PMU_TDC1_TEMP_VALID_MASK) == 0x0) {
+		dev_err(&thermal->device,
+			"Temperature sensor reading not valid\n");
+		return -EIO;
+	}
+
+	/*
+	 * Calculate temperature. See Section 8.10.1 of 88AP510,
+	 * Documentation/arm/Marvell/README
+	 */
+	reg = readl_relaxed(priv->sensor);
+	reg = (reg >> DOVE_THERMAL_TEMP_OFFSET) & DOVE_THERMAL_TEMP_MASK;
+	*temp = ((2281638UL - (7298*reg)) / 10);
+
+	return 0;
+}
+
+static struct thermal_zone_device_ops ops = {
+	.get_temp = dove_get_temp,
+};
+
+static const struct of_device_id dove_thermal_id_table[] = {
+	{ .compatible = "marvell,dove-thermal" },
+	{}
+};
+
+static int dove_thermal_probe(struct platform_device *pdev)
+{
+	struct thermal_zone_device *thermal = NULL;
+	struct dove_thermal_priv *priv;
+	struct resource *res;
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get platform resource\n");
+		return -ENODEV;
+	}
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->sensor = devm_request_and_ioremap(&pdev->dev, res);
+	if (!priv->sensor) {
+		dev_err(&pdev->dev, "Failed to request_ioremap memory\n");
+		return -EADDRNOTAVAIL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get platform resource\n");
+		return -ENODEV;
+	}
+	priv->control = devm_request_and_ioremap(&pdev->dev, res);
+	if (!priv->control) {
+		dev_err(&pdev->dev, "Failed to request_ioremap memory\n");
+		return -EADDRNOTAVAIL;
+	}
+
+	ret = dove_init_sensor(priv);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to initialize sensor\n");
+		return ret;
+	}
+
+	thermal = thermal_zone_device_register("dove_thermal", 0, 0,
+					       priv, &ops, NULL, 0, 0);
+	if (IS_ERR(thermal)) {
+		dev_err(&pdev->dev,
+			"Failed to register thermal zone device\n");
+		return PTR_ERR(thermal);
+	}
+
+	platform_set_drvdata(pdev, thermal);
+
+	return 0;
+}
+
+static int dove_thermal_exit(struct platform_device *pdev)
+{
+	struct thermal_zone_device *dove_thermal =
+		platform_get_drvdata(pdev);
+
+	thermal_zone_device_unregister(dove_thermal);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+MODULE_DEVICE_TABLE(of, dove_thermal_id_table);
+
+static struct platform_driver dove_thermal_driver = {
+	.probe = dove_thermal_probe,
+	.remove = dove_thermal_exit,
+	.driver = {
+		.name = "dove_thermal",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(dove_thermal_id_table),
+	},
+};
+
+module_platform_driver(dove_thermal_driver);
+
+MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>");
+MODULE_DESCRIPTION("Dove thermal driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c
index bada1308318b..e04ebd8671ac 100644
--- a/drivers/thermal/exynos_thermal.c
+++ b/drivers/thermal/exynos_thermal.c
@@ -82,7 +82,7 @@
 
 #define EXYNOS_TRIMINFO_RELOAD		0x1
 #define EXYNOS_TMU_CLEAR_RISE_INT	0x111
-#define EXYNOS_TMU_CLEAR_FALL_INT	(0x111 << 16)
+#define EXYNOS_TMU_CLEAR_FALL_INT	(0x111 << 12)
 #define EXYNOS_MUX_ADDR_VALUE		6
 #define EXYNOS_MUX_ADDR_SHIFT		20
 #define EXYNOS_TMU_TRIP_MODE_SHIFT	13
@@ -94,11 +94,20 @@
 #define SENSOR_NAME_LEN	16
 #define MAX_TRIP_COUNT	8
 #define MAX_COOLING_DEVICE 4
+#define MAX_THRESHOLD_LEVS 4
 
 #define ACTIVE_INTERVAL 500
 #define IDLE_INTERVAL 10000
 #define MCELSIUS	1000
 
+#ifdef CONFIG_EXYNOS_THERMAL_EMUL
+#define EXYNOS_EMUL_TIME	0x57F0
+#define EXYNOS_EMUL_TIME_SHIFT	16
+#define EXYNOS_EMUL_DATA_SHIFT	8
+#define EXYNOS_EMUL_DATA_MASK	0xFF
+#define EXYNOS_EMUL_ENABLE	0x1
+#endif /* CONFIG_EXYNOS_THERMAL_EMUL */
+
 /* CPU Zone information */
 #define PANIC_ZONE      4
 #define WARN_ZONE       3
@@ -125,6 +134,7 @@ struct exynos_tmu_data {
 struct	thermal_trip_point_conf {
 	int trip_val[MAX_TRIP_COUNT];
 	int trip_count;
+	u8 trigger_falling;
 };
 
 struct	thermal_cooling_conf {
@@ -174,7 +184,8 @@ static int exynos_set_mode(struct thermal_zone_device *thermal,
 
 	mutex_lock(&th_zone->therm_dev->lock);
 
-	if (mode == THERMAL_DEVICE_ENABLED)
+	if (mode == THERMAL_DEVICE_ENABLED &&
+		!th_zone->sensor_conf->trip_data.trigger_falling)
 		th_zone->therm_dev->polling_delay = IDLE_INTERVAL;
 	else
 		th_zone->therm_dev->polling_delay = 0;
@@ -284,7 +295,7 @@ static int exynos_bind(struct thermal_zone_device *thermal,
 		case MONITOR_ZONE:
 		case WARN_ZONE:
 			if (thermal_zone_bind_cooling_device(thermal, i, cdev,
-								level, level)) {
+								level, 0)) {
 				pr_err("error binding cdev inst %d\n", i);
 				ret = -EINVAL;
 			}
@@ -362,10 +373,17 @@ static int exynos_get_temp(struct thermal_zone_device *thermal,
 static int exynos_get_trend(struct thermal_zone_device *thermal,
 			int trip, enum thermal_trend *trend)
 {
-	if (thermal->temperature >= trip)
-		*trend = THERMAL_TREND_RAISING;
+	int ret;
+	unsigned long trip_temp;
+
+	ret = exynos_get_trip_temp(thermal, trip, &trip_temp);
+	if (ret < 0)
+		return ret;
+
+	if (thermal->temperature >= trip_temp)
+		*trend = THERMAL_TREND_RAISE_FULL;
 	else
-		*trend = THERMAL_TREND_DROPPING;
+		*trend = THERMAL_TREND_DROP_FULL;
 
 	return 0;
 }
@@ -413,7 +431,8 @@ static void exynos_report_trigger(void)
 			break;
 	}
 
-	if (th_zone->mode == THERMAL_DEVICE_ENABLED) {
+	if (th_zone->mode == THERMAL_DEVICE_ENABLED &&
+		!th_zone->sensor_conf->trip_data.trigger_falling) {
 		if (i > 0)
 			th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL;
 		else
@@ -452,7 +471,8 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
 
 	th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name,
 			EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0,
-			IDLE_INTERVAL);
+			sensor_conf->trip_data.trigger_falling ?
+			0 : IDLE_INTERVAL);
 
 	if (IS_ERR(th_zone->therm_dev)) {
 		pr_err("Failed to register thermal zone device\n");
@@ -559,8 +579,9 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 {
 	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	unsigned int status, trim_info, rising_threshold;
-	int ret = 0, threshold_code;
+	unsigned int status, trim_info;
+	unsigned int rising_threshold = 0, falling_threshold = 0;
+	int ret = 0, threshold_code, i, trigger_levs = 0;
 
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
@@ -585,6 +606,11 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 			(data->temp_error2 != 0))
 		data->temp_error1 = pdata->efuse_value;
 
+	/* Count trigger levels to be enabled */
+	for (i = 0; i < MAX_THRESHOLD_LEVS; i++)
+		if (pdata->trigger_levels[i])
+			trigger_levs++;
+
 	if (data->soc == SOC_ARCH_EXYNOS4210) {
 		/* Write temperature code for threshold */
 		threshold_code = temp_to_code(data, pdata->threshold);
@@ -594,44 +620,38 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 		}
 		writeb(threshold_code,
 			data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP);
-
-		writeb(pdata->trigger_levels[0],
-			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0);
-		writeb(pdata->trigger_levels[1],
-			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL1);
-		writeb(pdata->trigger_levels[2],
-			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL2);
-		writeb(pdata->trigger_levels[3],
-			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL3);
+		for (i = 0; i < trigger_levs; i++)
+			writeb(pdata->trigger_levels[i],
+			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4);
 
 		writel(EXYNOS4210_TMU_INTCLEAR_VAL,
 			data->base + EXYNOS_TMU_REG_INTCLEAR);
 	} else if (data->soc == SOC_ARCH_EXYNOS) {
-		/* Write temperature code for threshold */
-		threshold_code = temp_to_code(data, pdata->trigger_levels[0]);
-		if (threshold_code < 0) {
-			ret = threshold_code;
-			goto out;
-		}
-		rising_threshold = threshold_code;
-		threshold_code = temp_to_code(data, pdata->trigger_levels[1]);
-		if (threshold_code < 0) {
-			ret = threshold_code;
-			goto out;
-		}
-		rising_threshold |= (threshold_code << 8);
-		threshold_code = temp_to_code(data, pdata->trigger_levels[2]);
-		if (threshold_code < 0) {
-			ret = threshold_code;
-			goto out;
+		/* Write temperature code for rising and falling threshold */
+		for (i = 0; i < trigger_levs; i++) {
+			threshold_code = temp_to_code(data,
+						pdata->trigger_levels[i]);
+			if (threshold_code < 0) {
+				ret = threshold_code;
+				goto out;
+			}
+			rising_threshold |= threshold_code << 8 * i;
+			if (pdata->threshold_falling) {
+				threshold_code = temp_to_code(data,
+						pdata->trigger_levels[i] -
+						pdata->threshold_falling);
+				if (threshold_code > 0)
+					falling_threshold |=
+						threshold_code << 8 * i;
+			}
 		}
-		rising_threshold |= (threshold_code << 16);
 
 		writel(rising_threshold,
 				data->base + EXYNOS_THD_TEMP_RISE);
-		writel(0, data->base + EXYNOS_THD_TEMP_FALL);
+		writel(falling_threshold,
+				data->base + EXYNOS_THD_TEMP_FALL);
 
-		writel(EXYNOS_TMU_CLEAR_RISE_INT|EXYNOS_TMU_CLEAR_FALL_INT,
+		writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT,
 				data->base + EXYNOS_TMU_REG_INTCLEAR);
 	}
 out:
@@ -664,6 +684,8 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on)
 			pdata->trigger_level2_en << 8 |
 			pdata->trigger_level1_en << 4 |
 			pdata->trigger_level0_en;
+		if (pdata->threshold_falling)
+			interrupt_en |= interrupt_en << 16;
 	} else {
 		con |= EXYNOS_TMU_CORE_OFF;
 		interrupt_en = 0; /* Disable all interrupts */
@@ -697,20 +719,19 @@ static void exynos_tmu_work(struct work_struct *work)
 	struct exynos_tmu_data *data = container_of(work,
 			struct exynos_tmu_data, irq_work);
 
+	exynos_report_trigger();
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
-
-
 	if (data->soc == SOC_ARCH_EXYNOS)
-		writel(EXYNOS_TMU_CLEAR_RISE_INT,
+		writel(EXYNOS_TMU_CLEAR_RISE_INT |
+				EXYNOS_TMU_CLEAR_FALL_INT,
 				data->base + EXYNOS_TMU_REG_INTCLEAR);
 	else
 		writel(EXYNOS4210_TMU_INTCLEAR_VAL,
 				data->base + EXYNOS_TMU_REG_INTCLEAR);
-
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
-	exynos_report_trigger();
+
 	enable_irq(data->irq);
 }
 
@@ -759,6 +780,7 @@ static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = {
 
 #if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412)
 static struct exynos_tmu_platform_data const exynos_default_tmu_data = {
+	.threshold_falling = 10,
 	.trigger_levels[0] = 85,
 	.trigger_levels[1] = 103,
 	.trigger_levels[2] = 110,
@@ -800,8 +822,6 @@ static const struct of_device_id exynos_tmu_match[] = {
 	{},
 };
 MODULE_DEVICE_TABLE(of, exynos_tmu_match);
-#else
-#define  exynos_tmu_match NULL
 #endif
 
 static struct platform_device_id exynos_tmu_driver_ids[] = {
@@ -832,6 +852,94 @@ static inline struct  exynos_tmu_platform_data *exynos_get_driver_data(
 	return (struct exynos_tmu_platform_data *)
 			platform_get_device_id(pdev)->driver_data;
 }
+
+#ifdef CONFIG_EXYNOS_THERMAL_EMUL
+static ssize_t exynos_tmu_emulation_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct platform_device *pdev = container_of(dev,
+					struct platform_device, dev);
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	unsigned int reg;
+	u8 temp_code;
+	int temp = 0;
+
+	if (data->soc == SOC_ARCH_EXYNOS4210)
+		goto out;
+
+	mutex_lock(&data->lock);
+	clk_enable(data->clk);
+	reg = readl(data->base + EXYNOS_EMUL_CON);
+	clk_disable(data->clk);
+	mutex_unlock(&data->lock);
+
+	if (reg & EXYNOS_EMUL_ENABLE) {
+		reg >>= EXYNOS_EMUL_DATA_SHIFT;
+		temp_code = reg & EXYNOS_EMUL_DATA_MASK;
+		temp = code_to_temp(data, temp_code);
+	}
+out:
+	return sprintf(buf, "%d\n", temp * MCELSIUS);
+}
+
+static ssize_t exynos_tmu_emulation_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct platform_device *pdev = container_of(dev,
+					struct platform_device, dev);
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	unsigned int reg;
+	int temp;
+
+	if (data->soc == SOC_ARCH_EXYNOS4210)
+		goto out;
+
+	if (!sscanf(buf, "%d\n", &temp) || temp < 0)
+		return -EINVAL;
+
+	mutex_lock(&data->lock);
+	clk_enable(data->clk);
+
+	reg = readl(data->base + EXYNOS_EMUL_CON);
+
+	if (temp) {
+		/* Both CELSIUS and MCELSIUS type are available for input */
+		if (temp > MCELSIUS)
+			temp /= MCELSIUS;
+
+		reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) |
+			(temp_to_code(data, (temp / MCELSIUS))
+			 << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE;
+	} else {
+		reg &= ~EXYNOS_EMUL_ENABLE;
+	}
+
+	writel(reg, data->base + EXYNOS_EMUL_CON);
+
+	clk_disable(data->clk);
+	mutex_unlock(&data->lock);
+
+out:
+	return count;
+}
+
+static DEVICE_ATTR(emulation, 0644, exynos_tmu_emulation_show,
+					exynos_tmu_emulation_store);
+static int create_emulation_sysfs(struct device *dev)
+{
+	return device_create_file(dev, &dev_attr_emulation);
+}
+static void remove_emulation_sysfs(struct device *dev)
+{
+	device_remove_file(dev, &dev_attr_emulation);
+}
+#else
+static inline int create_emulation_sysfs(struct device *dev) { return 0; }
+static inline void remove_emulation_sysfs(struct device *dev) {}
+#endif
+
 static int exynos_tmu_probe(struct platform_device *pdev)
 {
 	struct exynos_tmu_data *data;
@@ -914,6 +1022,8 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 		exynos_sensor_conf.trip_data.trip_val[i] =
 			pdata->threshold + pdata->trigger_levels[i];
 
+	exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling;
+
 	exynos_sensor_conf.cooling_data.freq_clip_count =
 						pdata->freq_tab_count;
 	for (i = 0; i < pdata->freq_tab_count; i++) {
@@ -928,6 +1038,11 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "Failed to register thermal interface\n");
 		goto err_clk;
 	}
+
+	ret = create_emulation_sysfs(&pdev->dev);
+	if (ret)
+		dev_err(&pdev->dev, "Failed to create emulation mode sysfs node\n");
+
 	return 0;
 err_clk:
 	platform_set_drvdata(pdev, NULL);
@@ -939,6 +1054,8 @@ static int exynos_tmu_remove(struct platform_device *pdev)
 {
 	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 
+	remove_emulation_sysfs(&pdev->dev);
+
 	exynos_tmu_control(pdev, false);
 
 	exynos_unregister_thermal();
@@ -980,7 +1097,7 @@ static struct platform_driver exynos_tmu_driver = {
 		.name   = "exynos-tmu",
 		.owner  = THIS_MODULE,
 		.pm     = EXYNOS_TMU_PM,
-		.of_match_table = exynos_tmu_match,
+		.of_match_table = of_match_ptr(exynos_tmu_match),
 	},
 	.probe = exynos_tmu_probe,
 	.remove	= exynos_tmu_remove,
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
new file mode 100644
index 000000000000..b40b37cd25e0
--- /dev/null
+++ b/drivers/thermal/intel_powerclamp.c
@@ -0,0 +1,795 @@
+/*
+ * intel_powerclamp.c - package c-state idle injection
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ * Authors:
+ *     Arjan van de Ven <arjan@linux.intel.com>
+ *     Jacob Pan <jacob.jun.pan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *
+ *	TODO:
+ *           1. better handle wakeup from external interrupts, currently a fixed
+ *              compensation is added to clamping duration when excessive amount
+ *              of wakeups are observed during idle time. the reason is that in
+ *              case of external interrupts without need for ack, clamping down
+ *              cpu in non-irq context does not reduce irq. for majority of the
+ *              cases, clamping down cpu does help reduce irq as well, we should
+ *              be able to differenciate the two cases and give a quantitative
+ *              solution for the irqs that we can control. perhaps based on
+ *              get_cpu_iowait_time_us()
+ *
+ *	     2. synchronization with other hw blocks
+ *
+ *
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/cpu.h>
+#include <linux/thermal.h>
+#include <linux/slab.h>
+#include <linux/tick.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/sched/rt.h>
+
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/mwait.h>
+#include <asm/cpu_device_id.h>
+#include <asm/idle.h>
+#include <asm/hardirq.h>
+
+#define MAX_TARGET_RATIO (50U)
+/* For each undisturbed clamping period (no extra wake ups during idle time),
+ * we increment the confidence counter for the given target ratio.
+ * CONFIDENCE_OK defines the level where runtime calibration results are
+ * valid.
+ */
+#define CONFIDENCE_OK (3)
+/* Default idle injection duration, driver adjust sleep time to meet target
+ * idle ratio. Similar to frequency modulation.
+ */
+#define DEFAULT_DURATION_JIFFIES (6)
+
+static unsigned int target_mwait;
+static struct dentry *debug_dir;
+
+/* user selected target */
+static unsigned int set_target_ratio;
+static unsigned int current_ratio;
+static bool should_skip;
+static bool reduce_irq;
+static atomic_t idle_wakeup_counter;
+static unsigned int control_cpu; /* The cpu assigned to collect stat and update
+				  * control parameters. default to BSP but BSP
+				  * can be offlined.
+				  */
+static bool clamping;
+
+
+static struct task_struct * __percpu *powerclamp_thread;
+static struct thermal_cooling_device *cooling_dev;
+static unsigned long *cpu_clamping_mask;  /* bit map for tracking per cpu
+					   * clamping thread
+					   */
+
+static unsigned int duration;
+static unsigned int pkg_cstate_ratio_cur;
+static unsigned int window_size;
+
+static int duration_set(const char *arg, const struct kernel_param *kp)
+{
+	int ret = 0;
+	unsigned long new_duration;
+
+	ret = kstrtoul(arg, 10, &new_duration);
+	if (ret)
+		goto exit;
+	if (new_duration > 25 || new_duration < 6) {
+		pr_err("Out of recommended range %lu, between 6-25ms\n",
+			new_duration);
+		ret = -EINVAL;
+	}
+
+	duration = clamp(new_duration, 6ul, 25ul);
+	smp_mb();
+
+exit:
+
+	return ret;
+}
+
+static struct kernel_param_ops duration_ops = {
+	.set = duration_set,
+	.get = param_get_int,
+};
+
+
+module_param_cb(duration, &duration_ops, &duration, 0644);
+MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
+
+struct powerclamp_calibration_data {
+	unsigned long confidence;  /* used for calibration, basically a counter
+				    * gets incremented each time a clamping
+				    * period is completed without extra wakeups
+				    * once that counter is reached given level,
+				    * compensation is deemed usable.
+				    */
+	unsigned long steady_comp; /* steady state compensation used when
+				    * no extra wakeups occurred.
+				    */
+	unsigned long dynamic_comp; /* compensate excessive wakeup from idle
+				     * mostly from external interrupts.
+				     */
+};
+
+static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
+
+static int window_size_set(const char *arg, const struct kernel_param *kp)
+{
+	int ret = 0;
+	unsigned long new_window_size;
+
+	ret = kstrtoul(arg, 10, &new_window_size);
+	if (ret)
+		goto exit_win;
+	if (new_window_size > 10 || new_window_size < 2) {
+		pr_err("Out of recommended window size %lu, between 2-10\n",
+			new_window_size);
+		ret = -EINVAL;
+	}
+
+	window_size = clamp(new_window_size, 2ul, 10ul);
+	smp_mb();
+
+exit_win:
+
+	return ret;
+}
+
+static struct kernel_param_ops window_size_ops = {
+	.set = window_size_set,
+	.get = param_get_int,
+};
+
+module_param_cb(window_size, &window_size_ops, &window_size, 0644);
+MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
+	"\tpowerclamp controls idle ratio within this window. larger\n"
+	"\twindow size results in slower response time but more smooth\n"
+	"\tclamping results. default to 2.");
+
+static void find_target_mwait(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	unsigned int highest_cstate = 0;
+	unsigned int highest_subcstate = 0;
+	int i;
+
+	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+		return;
+
+	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+		return;
+
+	edx >>= MWAIT_SUBSTATE_SIZE;
+	for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
+		if (edx & MWAIT_SUBSTATE_MASK) {
+			highest_cstate = i;
+			highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
+		}
+	}
+	target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
+		(highest_subcstate - 1);
+
+}
+
+static u64 pkg_state_counter(void)
+{
+	u64 val;
+	u64 count = 0;
+
+	static bool skip_c2;
+	static bool skip_c3;
+	static bool skip_c6;
+	static bool skip_c7;
+
+	if (!skip_c2) {
+		if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val))
+			count += val;
+		else
+			skip_c2 = true;
+	}
+
+	if (!skip_c3) {
+		if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val))
+			count += val;
+		else
+			skip_c3 = true;
+	}
+
+	if (!skip_c6) {
+		if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val))
+			count += val;
+		else
+			skip_c6 = true;
+	}
+
+	if (!skip_c7) {
+		if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val))
+			count += val;
+		else
+			skip_c7 = true;
+	}
+
+	return count;
+}
+
+static void noop_timer(unsigned long foo)
+{
+	/* empty... just the fact that we get the interrupt wakes us up */
+}
+
+static unsigned int get_compensation(int ratio)
+{
+	unsigned int comp = 0;
+
+	/* we only use compensation if all adjacent ones are good */
+	if (ratio == 1 &&
+		cal_data[ratio].confidence >= CONFIDENCE_OK &&
+		cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
+		cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
+		comp = (cal_data[ratio].steady_comp +
+			cal_data[ratio + 1].steady_comp +
+			cal_data[ratio + 2].steady_comp) / 3;
+	} else if (ratio == MAX_TARGET_RATIO - 1 &&
+		cal_data[ratio].confidence >= CONFIDENCE_OK &&
+		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
+		cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
+		comp = (cal_data[ratio].steady_comp +
+			cal_data[ratio - 1].steady_comp +
+			cal_data[ratio - 2].steady_comp) / 3;
+	} else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
+		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
+		cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
+		comp = (cal_data[ratio].steady_comp +
+			cal_data[ratio - 1].steady_comp +
+			cal_data[ratio + 1].steady_comp) / 3;
+	}
+
+	/* REVISIT: simple penalty of double idle injection */
+	if (reduce_irq)
+		comp = ratio;
+	/* do not exceed limit */
+	if (comp + ratio >= MAX_TARGET_RATIO)
+		comp = MAX_TARGET_RATIO - ratio - 1;
+
+	return comp;
+}
+
+static void adjust_compensation(int target_ratio, unsigned int win)
+{
+	int delta;
+	struct powerclamp_calibration_data *d = &cal_data[target_ratio];
+
+	/*
+	 * adjust compensations if confidence level has not been reached or
+	 * there are too many wakeups during the last idle injection period, we
+	 * cannot trust the data for compensation.
+	 */
+	if (d->confidence >= CONFIDENCE_OK ||
+		atomic_read(&idle_wakeup_counter) >
+		win * num_online_cpus())
+		return;
+
+	delta = set_target_ratio - current_ratio;
+	/* filter out bad data */
+	if (delta >= 0 && delta <= (1+target_ratio/10)) {
+		if (d->steady_comp)
+			d->steady_comp =
+				roundup(delta+d->steady_comp, 2)/2;
+		else
+			d->steady_comp = delta;
+		d->confidence++;
+	}
+}
+
+static bool powerclamp_adjust_controls(unsigned int target_ratio,
+				unsigned int guard, unsigned int win)
+{
+	static u64 msr_last, tsc_last;
+	u64 msr_now, tsc_now;
+	u64 val64;
+
+	/* check result for the last window */
+	msr_now = pkg_state_counter();
+	rdtscll(tsc_now);
+
+	/* calculate pkg cstate vs tsc ratio */
+	if (!msr_last || !tsc_last)
+		current_ratio = 1;
+	else if (tsc_now-tsc_last) {
+		val64 = 100*(msr_now-msr_last);
+		do_div(val64, (tsc_now-tsc_last));
+		current_ratio = val64;
+	}
+
+	/* update record */
+	msr_last = msr_now;
+	tsc_last = tsc_now;
+
+	adjust_compensation(target_ratio, win);
+	/*
+	 * too many external interrupts, set flag such
+	 * that we can take measure later.
+	 */
+	reduce_irq = atomic_read(&idle_wakeup_counter) >=
+		2 * win * num_online_cpus();
+
+	atomic_set(&idle_wakeup_counter, 0);
+	/* if we are above target+guard, skip */
+	return set_target_ratio + guard <= current_ratio;
+}
+
+static int clamp_thread(void *arg)
+{
+	int cpunr = (unsigned long)arg;
+	DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0);
+	static const struct sched_param param = {
+		.sched_priority = MAX_USER_RT_PRIO/2,
+	};
+	unsigned int count = 0;
+	unsigned int target_ratio;
+
+	set_bit(cpunr, cpu_clamping_mask);
+	set_freezable();
+	init_timer_on_stack(&wakeup_timer);
+	sched_setscheduler(current, SCHED_FIFO, &param);
+
+	while (true == clamping && !kthread_should_stop() &&
+		cpu_online(cpunr)) {
+		int sleeptime;
+		unsigned long target_jiffies;
+		unsigned int guard;
+		unsigned int compensation = 0;
+		int interval; /* jiffies to sleep for each attempt */
+		unsigned int duration_jiffies = msecs_to_jiffies(duration);
+		unsigned int window_size_now;
+
+		try_to_freeze();
+		/*
+		 * make sure user selected ratio does not take effect until
+		 * the next round. adjust target_ratio if user has changed
+		 * target such that we can converge quickly.
+		 */
+		target_ratio = set_target_ratio;
+		guard = 1 + target_ratio/20;
+		window_size_now = window_size;
+		count++;
+
+		/*
+		 * systems may have different ability to enter package level
+		 * c-states, thus we need to compensate the injected idle ratio
+		 * to achieve the actual target reported by the HW.
+		 */
+		compensation = get_compensation(target_ratio);
+		interval = duration_jiffies*100/(target_ratio+compensation);
+
+		/* align idle time */
+		target_jiffies = roundup(jiffies, interval);
+		sleeptime = target_jiffies - jiffies;
+		if (sleeptime <= 0)
+			sleeptime = 1;
+		schedule_timeout_interruptible(sleeptime);
+		/*
+		 * only elected controlling cpu can collect stats and update
+		 * control parameters.
+		 */
+		if (cpunr == control_cpu && !(count%window_size_now)) {
+			should_skip =
+				powerclamp_adjust_controls(target_ratio,
+							guard, window_size_now);
+			smp_mb();
+		}
+
+		if (should_skip)
+			continue;
+
+		target_jiffies = jiffies + duration_jiffies;
+		mod_timer(&wakeup_timer, target_jiffies);
+		if (unlikely(local_softirq_pending()))
+			continue;
+		/*
+		 * stop tick sched during idle time, interrupts are still
+		 * allowed. thus jiffies are updated properly.
+		 */
+		preempt_disable();
+		tick_nohz_idle_enter();
+		/* mwait until target jiffies is reached */
+		while (time_before(jiffies, target_jiffies)) {
+			unsigned long ecx = 1;
+			unsigned long eax = target_mwait;
+
+			/*
+			 * REVISIT: may call enter_idle() to notify drivers who
+			 * can save power during cpu idle. same for exit_idle()
+			 */
+			local_touch_nmi();
+			stop_critical_timings();
+			__monitor((void *)&current_thread_info()->flags, 0, 0);
+			cpu_relax(); /* allow HT sibling to run */
+			__mwait(eax, ecx);
+			start_critical_timings();
+			atomic_inc(&idle_wakeup_counter);
+		}
+		tick_nohz_idle_exit();
+		preempt_enable_no_resched();
+	}
+	del_timer_sync(&wakeup_timer);
+	clear_bit(cpunr, cpu_clamping_mask);
+
+	return 0;
+}
+
+/*
+ * 1 HZ polling while clamping is active, useful for userspace
+ * to monitor actual idle ratio.
+ */
+static void poll_pkg_cstate(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
+static void poll_pkg_cstate(struct work_struct *dummy)
+{
+	static u64 msr_last;
+	static u64 tsc_last;
+	static unsigned long jiffies_last;
+
+	u64 msr_now;
+	unsigned long jiffies_now;
+	u64 tsc_now;
+	u64 val64;
+
+	msr_now = pkg_state_counter();
+	rdtscll(tsc_now);
+	jiffies_now = jiffies;
+
+	/* calculate pkg cstate vs tsc ratio */
+	if (!msr_last || !tsc_last)
+		pkg_cstate_ratio_cur = 1;
+	else {
+		if (tsc_now - tsc_last) {
+			val64 = 100 * (msr_now - msr_last);
+			do_div(val64, (tsc_now - tsc_last));
+			pkg_cstate_ratio_cur = val64;
+		}
+	}
+
+	/* update record */
+	msr_last = msr_now;
+	jiffies_last = jiffies_now;
+	tsc_last = tsc_now;
+
+	if (true == clamping)
+		schedule_delayed_work(&poll_pkg_cstate_work, HZ);
+}
+
+static int start_power_clamp(void)
+{
+	unsigned long cpu;
+	struct task_struct *thread;
+
+	/* check if pkg cstate counter is completely 0, abort in this case */
+	if (!pkg_state_counter()) {
+		pr_err("pkg cstate counter not functional, abort\n");
+		return -EINVAL;
+	}
+
+	set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
+	/* prevent cpu hotplug */
+	get_online_cpus();
+
+	/* prefer BSP */
+	control_cpu = 0;
+	if (!cpu_online(control_cpu))
+		control_cpu = smp_processor_id();
+
+	clamping = true;
+	schedule_delayed_work(&poll_pkg_cstate_work, 0);
+
+	/* start one thread per online cpu */
+	for_each_online_cpu(cpu) {
+		struct task_struct **p =
+			per_cpu_ptr(powerclamp_thread, cpu);
+
+		thread = kthread_create_on_node(clamp_thread,
+						(void *) cpu,
+						cpu_to_node(cpu),
+						"kidle_inject/%ld", cpu);
+		/* bind to cpu here */
+		if (likely(!IS_ERR(thread))) {
+			kthread_bind(thread, cpu);
+			wake_up_process(thread);
+			*p = thread;
+		}
+
+	}
+	put_online_cpus();
+
+	return 0;
+}
+
+static void end_power_clamp(void)
+{
+	int i;
+	struct task_struct *thread;
+
+	clamping = false;
+	/*
+	 * make clamping visible to other cpus and give per cpu clamping threads
+	 * sometime to exit, or gets killed later.
+	 */
+	smp_mb();
+	msleep(20);
+	if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
+		for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
+			pr_debug("clamping thread for cpu %d alive, kill\n", i);
+			thread = *per_cpu_ptr(powerclamp_thread, i);
+			kthread_stop(thread);
+		}
+	}
+}
+
+static int powerclamp_cpu_callback(struct notifier_block *nfb,
+				unsigned long action, void *hcpu)
+{
+	unsigned long cpu = (unsigned long)hcpu;
+	struct task_struct *thread;
+	struct task_struct **percpu_thread =
+		per_cpu_ptr(powerclamp_thread, cpu);
+
+	if (false == clamping)
+		goto exit_ok;
+
+	switch (action) {
+	case CPU_ONLINE:
+		thread = kthread_create_on_node(clamp_thread,
+						(void *) cpu,
+						cpu_to_node(cpu),
+						"kidle_inject/%lu", cpu);
+		if (likely(!IS_ERR(thread))) {
+			kthread_bind(thread, cpu);
+			wake_up_process(thread);
+			*percpu_thread = thread;
+		}
+		/* prefer BSP as controlling CPU */
+		if (cpu == 0) {
+			control_cpu = 0;
+			smp_mb();
+		}
+		break;
+	case CPU_DEAD:
+		if (test_bit(cpu, cpu_clamping_mask)) {
+			pr_err("cpu %lu dead but powerclamping thread is not\n",
+				cpu);
+			kthread_stop(*percpu_thread);
+		}
+		if (cpu == control_cpu) {
+			control_cpu = smp_processor_id();
+			smp_mb();
+		}
+	}
+
+exit_ok:
+	return NOTIFY_OK;
+}
+
+static struct notifier_block powerclamp_cpu_notifier = {
+	.notifier_call = powerclamp_cpu_callback,
+};
+
+static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
+				 unsigned long *state)
+{
+	*state = MAX_TARGET_RATIO;
+
+	return 0;
+}
+
+static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
+				 unsigned long *state)
+{
+	if (true == clamping)
+		*state = pkg_cstate_ratio_cur;
+	else
+		/* to save power, do not poll idle ratio while not clamping */
+		*state = -1; /* indicates invalid state */
+
+	return 0;
+}
+
+static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
+				 unsigned long new_target_ratio)
+{
+	int ret = 0;
+
+	new_target_ratio = clamp(new_target_ratio, 0UL,
+				(unsigned long) (MAX_TARGET_RATIO-1));
+	if (set_target_ratio == 0 && new_target_ratio > 0) {
+		pr_info("Start idle injection to reduce power\n");
+		set_target_ratio = new_target_ratio;
+		ret = start_power_clamp();
+		goto exit_set;
+	} else	if (set_target_ratio > 0 && new_target_ratio == 0) {
+		pr_info("Stop forced idle injection\n");
+		set_target_ratio = 0;
+		end_power_clamp();
+	} else	/* adjust currently running */ {
+		set_target_ratio = new_target_ratio;
+		/* make new set_target_ratio visible to other cpus */
+		smp_mb();
+	}
+
+exit_set:
+	return ret;
+}
+
+/* bind to generic thermal layer as cooling device*/
+static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
+	.get_max_state = powerclamp_get_max_state,
+	.get_cur_state = powerclamp_get_cur_state,
+	.set_cur_state = powerclamp_set_cur_state,
+};
+
+/* runs on Nehalem and later */
+static const struct x86_cpu_id intel_powerclamp_ids[] = {
+	{ X86_VENDOR_INTEL, 6, 0x1a},
+	{ X86_VENDOR_INTEL, 6, 0x1c},
+	{ X86_VENDOR_INTEL, 6, 0x1e},
+	{ X86_VENDOR_INTEL, 6, 0x1f},
+	{ X86_VENDOR_INTEL, 6, 0x25},
+	{ X86_VENDOR_INTEL, 6, 0x26},
+	{ X86_VENDOR_INTEL, 6, 0x2a},
+	{ X86_VENDOR_INTEL, 6, 0x2c},
+	{ X86_VENDOR_INTEL, 6, 0x2d},
+	{ X86_VENDOR_INTEL, 6, 0x2e},
+	{ X86_VENDOR_INTEL, 6, 0x2f},
+	{ X86_VENDOR_INTEL, 6, 0x3a},
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
+
+static int powerclamp_probe(void)
+{
+	if (!x86_match_cpu(intel_powerclamp_ids)) {
+		pr_err("Intel powerclamp does not run on family %d model %d\n",
+				boot_cpu_data.x86, boot_cpu_data.x86_model);
+		return -ENODEV;
+	}
+	if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
+		!boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ||
+		!boot_cpu_has(X86_FEATURE_MWAIT) ||
+		!boot_cpu_has(X86_FEATURE_ARAT))
+		return -ENODEV;
+
+	/* find the deepest mwait value */
+	find_target_mwait();
+
+	return 0;
+}
+
+static int powerclamp_debug_show(struct seq_file *m, void *unused)
+{
+	int i = 0;
+
+	seq_printf(m, "controlling cpu: %d\n", control_cpu);
+	seq_printf(m, "pct confidence steady dynamic (compensation)\n");
+	for (i = 0; i < MAX_TARGET_RATIO; i++) {
+		seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
+			i,
+			cal_data[i].confidence,
+			cal_data[i].steady_comp,
+			cal_data[i].dynamic_comp);
+	}
+
+	return 0;
+}
+
+static int powerclamp_debug_open(struct inode *inode,
+			struct file *file)
+{
+	return single_open(file, powerclamp_debug_show, inode->i_private);
+}
+
+static const struct file_operations powerclamp_debug_fops = {
+	.open		= powerclamp_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.owner		= THIS_MODULE,
+};
+
+static inline void powerclamp_create_debug_files(void)
+{
+	debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
+	if (!debug_dir)
+		return;
+
+	if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir,
+					cal_data, &powerclamp_debug_fops))
+		goto file_error;
+
+	return;
+
+file_error:
+	debugfs_remove_recursive(debug_dir);
+}
+
+static int powerclamp_init(void)
+{
+	int retval;
+	int bitmap_size;
+
+	bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
+	cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!cpu_clamping_mask)
+		return -ENOMEM;
+
+	/* probe cpu features and ids here */
+	retval = powerclamp_probe();
+	if (retval)
+		return retval;
+	/* set default limit, maybe adjusted during runtime based on feedback */
+	window_size = 2;
+	register_hotcpu_notifier(&powerclamp_cpu_notifier);
+	powerclamp_thread = alloc_percpu(struct task_struct *);
+	cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
+						&powerclamp_cooling_ops);
+	if (IS_ERR(cooling_dev))
+		return -ENODEV;
+
+	if (!duration)
+		duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
+	powerclamp_create_debug_files();
+
+	return 0;
+}
+module_init(powerclamp_init);
+
+static void powerclamp_exit(void)
+{
+	unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
+	end_power_clamp();
+	free_percpu(powerclamp_thread);
+	thermal_cooling_device_unregister(cooling_dev);
+	kfree(cpu_clamping_mask);
+
+	cancel_delayed_work_sync(&poll_pkg_cstate_work);
+	debugfs_remove_recursive(debug_dir);
+}
+module_exit(powerclamp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
+MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
+MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");
diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c
new file mode 100644
index 000000000000..65cb4f09e8f6
--- /dev/null
+++ b/drivers/thermal/kirkwood_thermal.c
@@ -0,0 +1,134 @@
+/*
+ * Kirkwood thermal sensor driver
+ *
+ * Copyright (C) 2012 Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define KIRKWOOD_THERMAL_VALID_OFFSET	9
+#define KIRKWOOD_THERMAL_VALID_MASK	0x1
+#define KIRKWOOD_THERMAL_TEMP_OFFSET	10
+#define KIRKWOOD_THERMAL_TEMP_MASK	0x1FF
+
+/* Kirkwood Thermal Sensor Dev Structure */
+struct kirkwood_thermal_priv {
+	void __iomem *sensor;
+};
+
+static int kirkwood_get_temp(struct thermal_zone_device *thermal,
+			  unsigned long *temp)
+{
+	unsigned long reg;
+	struct kirkwood_thermal_priv *priv = thermal->devdata;
+
+	reg = readl_relaxed(priv->sensor);
+
+	/* Valid check */
+	if (!(reg >> KIRKWOOD_THERMAL_VALID_OFFSET) &
+	    KIRKWOOD_THERMAL_VALID_MASK) {
+		dev_err(&thermal->device,
+			"Temperature sensor reading not valid\n");
+		return -EIO;
+	}
+
+	/*
+	 * Calculate temperature. See Section 8.10.1 of the 88AP510,
+	 * datasheet, which has the same sensor.
+	 * Documentation/arm/Marvell/README
+	 */
+	reg = (reg >> KIRKWOOD_THERMAL_TEMP_OFFSET) &
+		KIRKWOOD_THERMAL_TEMP_MASK;
+	*temp = ((2281638UL - (7298*reg)) / 10);
+
+	return 0;
+}
+
+static struct thermal_zone_device_ops ops = {
+	.get_temp = kirkwood_get_temp,
+};
+
+static const struct of_device_id kirkwood_thermal_id_table[] = {
+	{ .compatible = "marvell,kirkwood-thermal" },
+	{}
+};
+
+static int kirkwood_thermal_probe(struct platform_device *pdev)
+{
+	struct thermal_zone_device *thermal = NULL;
+	struct kirkwood_thermal_priv *priv;
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get platform resource\n");
+		return -ENODEV;
+	}
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->sensor = devm_request_and_ioremap(&pdev->dev, res);
+	if (!priv->sensor) {
+		dev_err(&pdev->dev, "Failed to request_ioremap memory\n");
+		return -EADDRNOTAVAIL;
+	}
+
+	thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0,
+					       priv, &ops, NULL, 0, 0);
+	if (IS_ERR(thermal)) {
+		dev_err(&pdev->dev,
+			"Failed to register thermal zone device\n");
+		return PTR_ERR(thermal);
+	}
+
+	platform_set_drvdata(pdev, thermal);
+
+	return 0;
+}
+
+static int kirkwood_thermal_exit(struct platform_device *pdev)
+{
+	struct thermal_zone_device *kirkwood_thermal =
+		platform_get_drvdata(pdev);
+
+	thermal_zone_device_unregister(kirkwood_thermal);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+MODULE_DEVICE_TABLE(of, kirkwood_thermal_id_table);
+
+static struct platform_driver kirkwood_thermal_driver = {
+	.probe = kirkwood_thermal_probe,
+	.remove = kirkwood_thermal_exit,
+	.driver = {
+		.name = "kirkwood_thermal",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(kirkwood_thermal_id_table),
+	},
+};
+
+module_platform_driver(kirkwood_thermal_driver);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu@nigauri.org>");
+MODULE_DESCRIPTION("kirkwood thermal driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 90db951725da..28f091994013 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -19,225 +19,473 @@
  */
 #include <linux/delay.h>
 #include <linux/err.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/reboot.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/thermal.h>
 
-#define THSCR	0x2c
-#define THSSR	0x30
+#define IDLE_INTERVAL	5000
+
+#define COMMON_STR	0x00
+#define COMMON_ENR	0x04
+#define COMMON_INTMSK	0x0c
+
+#define REG_POSNEG	0x20
+#define REG_FILONOFF	0x28
+#define REG_THSCR	0x2c
+#define REG_THSSR	0x30
+#define REG_INTCTRL	0x34
 
 /* THSCR */
-#define CPTAP	0xf
+#define CPCTL	(1 << 12)
 
 /* THSSR */
 #define CTEMP	0x3f
 
-
-struct rcar_thermal_priv {
+struct rcar_thermal_common {
 	void __iomem *base;
 	struct device *dev;
+	struct list_head head;
 	spinlock_t lock;
-	u32 comp;
 };
 
+struct rcar_thermal_priv {
+	void __iomem *base;
+	struct rcar_thermal_common *common;
+	struct thermal_zone_device *zone;
+	struct delayed_work work;
+	struct mutex lock;
+	struct list_head list;
+	int id;
+	int ctemp;
+};
+
+#define rcar_thermal_for_each_priv(pos, common)	\
+	list_for_each_entry(pos, &common->head, list)
+
 #define MCELSIUS(temp)			((temp) * 1000)
-#define rcar_zone_to_priv(zone)		(zone->devdata)
+#define rcar_zone_to_priv(zone)		((zone)->devdata)
+#define rcar_priv_to_dev(priv)		((priv)->common->dev)
+#define rcar_has_irq_support(priv)	((priv)->common->base)
+#define rcar_id_to_shift(priv)		((priv)->id * 8)
+
+#ifdef DEBUG
+# define rcar_force_update_temp(priv)	1
+#else
+# define rcar_force_update_temp(priv)	0
+#endif
 
 /*
  *		basic functions
  */
-static u32 rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg)
+#define rcar_thermal_common_read(c, r) \
+	_rcar_thermal_common_read(c, COMMON_ ##r)
+static u32 _rcar_thermal_common_read(struct rcar_thermal_common *common,
+				     u32 reg)
 {
-	unsigned long flags;
-	u32 ret;
-
-	spin_lock_irqsave(&priv->lock, flags);
+	return ioread32(common->base + reg);
+}
 
-	ret = ioread32(priv->base + reg);
+#define rcar_thermal_common_write(c, r, d) \
+	_rcar_thermal_common_write(c, COMMON_ ##r, d)
+static void _rcar_thermal_common_write(struct rcar_thermal_common *common,
+				       u32 reg, u32 data)
+{
+	iowrite32(data, common->base + reg);
+}
 
-	spin_unlock_irqrestore(&priv->lock, flags);
+#define rcar_thermal_common_bset(c, r, m, d) \
+	_rcar_thermal_common_bset(c, COMMON_ ##r, m, d)
+static void _rcar_thermal_common_bset(struct rcar_thermal_common *common,
+				      u32 reg, u32 mask, u32 data)
+{
+	u32 val;
 
-	return ret;
+	val = ioread32(common->base + reg);
+	val &= ~mask;
+	val |= (data & mask);
+	iowrite32(val, common->base + reg);
 }
 
-#if 0 /* no user at this point */
-static void rcar_thermal_write(struct rcar_thermal_priv *priv,
-			       u32 reg, u32 data)
+#define rcar_thermal_read(p, r) _rcar_thermal_read(p, REG_ ##r)
+static u32 _rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->lock, flags);
+	return ioread32(priv->base + reg);
+}
 
+#define rcar_thermal_write(p, r, d) _rcar_thermal_write(p, REG_ ##r, d)
+static void _rcar_thermal_write(struct rcar_thermal_priv *priv,
+				u32 reg, u32 data)
+{
 	iowrite32(data, priv->base + reg);
-
-	spin_unlock_irqrestore(&priv->lock, flags);
 }
-#endif
 
-static void rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg,
-			      u32 mask, u32 data)
+#define rcar_thermal_bset(p, r, m, d) _rcar_thermal_bset(p, REG_ ##r, m, d)
+static void _rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg,
+			       u32 mask, u32 data)
 {
-	unsigned long flags;
 	u32 val;
 
-	spin_lock_irqsave(&priv->lock, flags);
-
 	val = ioread32(priv->base + reg);
 	val &= ~mask;
 	val |= (data & mask);
 	iowrite32(val, priv->base + reg);
-
-	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 /*
  *		zone device functions
  */
-static int rcar_thermal_get_temp(struct thermal_zone_device *zone,
-			   unsigned long *temp)
+static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv)
 {
-	struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
-	int val, min, max, tmp;
-
-	tmp = -200; /* default */
-	while (1) {
-		if (priv->comp < 1 || priv->comp > 12) {
-			dev_err(priv->dev,
-				"THSSR invalid data (%d)\n", priv->comp);
-			priv->comp = 4; /* for next thermal */
-			return -EINVAL;
-		}
+	struct device *dev = rcar_priv_to_dev(priv);
+	int i;
+	int ctemp, old, new;
 
-		/*
-		 * THS comparator offset and the reference temperature
-		 *
-		 * Comparator	| reference	| Temperature field
-		 * offset	| temperature	| measurement
-		 *		| (degrees C)	| (degrees C)
-		 * -------------+---------------+-------------------
-		 *  1		|  -45		|  -45 to  -30
-		 *  2		|  -30		|  -30 to  -15
-		 *  3		|  -15		|  -15 to    0
-		 *  4		|    0		|    0 to  +15
-		 *  5		|  +15		|  +15 to  +30
-		 *  6		|  +30		|  +30 to  +45
-		 *  7		|  +45		|  +45 to  +60
-		 *  8		|  +60		|  +60 to  +75
-		 *  9		|  +75		|  +75 to  +90
-		 * 10		|  +90		|  +90 to +105
-		 * 11		| +105		| +105 to +120
-		 * 12		| +120		| +120 to +135
-		 */
+	mutex_lock(&priv->lock);
 
-		/* calculate thermal limitation */
-		min = (priv->comp * 15) - 60;
-		max = min + 15;
+	/*
+	 * TSC decides a value of CPTAP automatically,
+	 * and this is the conditions which validate interrupt.
+	 */
+	rcar_thermal_bset(priv, THSCR, CPCTL, CPCTL);
 
+	ctemp = 0;
+	old = ~0;
+	for (i = 0; i < 128; i++) {
 		/*
 		 * we need to wait 300us after changing comparator offset
 		 * to get stable temperature.
 		 * see "Usage Notes" on datasheet
 		 */
-		rcar_thermal_bset(priv, THSCR, CPTAP, priv->comp);
 		udelay(300);
 
-		/* calculate current temperature */
-		val = rcar_thermal_read(priv, THSSR) & CTEMP;
-		val = (val * 5) - 65;
+		new = rcar_thermal_read(priv, THSSR) & CTEMP;
+		if (new == old) {
+			ctemp = new;
+			break;
+		}
+		old = new;
+	}
 
-		dev_dbg(priv->dev, "comp/min/max/val = %d/%d/%d/%d\n",
-			priv->comp, min, max, val);
+	if (!ctemp) {
+		dev_err(dev, "thermal sensor was broken\n");
+		return -EINVAL;
+	}
 
-		/*
-		 * If val is same as min/max, then,
-		 * it should try again on next comparator.
-		 * But the val might be correct temperature.
-		 * Keep it on "tmp" and compare with next val.
-		 */
-		if (tmp == val)
-			break;
+	/*
+	 * enable IRQ
+	 */
+	if (rcar_has_irq_support(priv)) {
+		rcar_thermal_write(priv, FILONOFF, 0);
 
-		if (val <= min) {
-			tmp = min;
-			priv->comp--; /* try again */
-		} else if (val >= max) {
-			tmp = max;
-			priv->comp++; /* try again */
-		} else {
-			tmp = val;
-			break;
-		}
+		/* enable Rising/Falling edge interrupt */
+		rcar_thermal_write(priv, POSNEG,  0x1);
+		rcar_thermal_write(priv, INTCTRL, (((ctemp - 0) << 8) |
+						   ((ctemp - 1) << 0)));
+	}
+
+	dev_dbg(dev, "thermal%d  %d -> %d\n", priv->id, priv->ctemp, ctemp);
+
+	priv->ctemp = ctemp;
+
+	mutex_unlock(&priv->lock);
+
+	return 0;
+}
+
+static int rcar_thermal_get_temp(struct thermal_zone_device *zone,
+				 unsigned long *temp)
+{
+	struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+
+	if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv))
+		rcar_thermal_update_temp(priv);
+
+	mutex_lock(&priv->lock);
+	*temp =  MCELSIUS((priv->ctemp * 5) - 65);
+	mutex_unlock(&priv->lock);
+
+	return 0;
+}
+
+static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone,
+				      int trip, enum thermal_trip_type *type)
+{
+	struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+	struct device *dev = rcar_priv_to_dev(priv);
+
+	/* see rcar_thermal_get_temp() */
+	switch (trip) {
+	case 0: /* +90 <= temp */
+		*type = THERMAL_TRIP_CRITICAL;
+		break;
+	default:
+		dev_err(dev, "rcar driver trip error\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone,
+				      int trip, unsigned long *temp)
+{
+	struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+	struct device *dev = rcar_priv_to_dev(priv);
+
+	/* see rcar_thermal_get_temp() */
+	switch (trip) {
+	case 0: /* +90 <= temp */
+		*temp = MCELSIUS(90);
+		break;
+	default:
+		dev_err(dev, "rcar driver trip error\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rcar_thermal_notify(struct thermal_zone_device *zone,
+			       int trip, enum thermal_trip_type type)
+{
+	struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+	struct device *dev = rcar_priv_to_dev(priv);
+
+	switch (type) {
+	case THERMAL_TRIP_CRITICAL:
+		/* FIXME */
+		dev_warn(dev, "Thermal reached to critical temperature\n");
+		break;
+	default:
+		break;
 	}
 
-	*temp = MCELSIUS(tmp);
 	return 0;
 }
 
 static struct thermal_zone_device_ops rcar_thermal_zone_ops = {
-	.get_temp = rcar_thermal_get_temp,
+	.get_temp	= rcar_thermal_get_temp,
+	.get_trip_type	= rcar_thermal_get_trip_type,
+	.get_trip_temp	= rcar_thermal_get_trip_temp,
+	.notify		= rcar_thermal_notify,
 };
 
 /*
- *		platform functions
+ *		interrupt
  */
-static int rcar_thermal_probe(struct platform_device *pdev)
+#define rcar_thermal_irq_enable(p)	_rcar_thermal_irq_ctrl(p, 1)
+#define rcar_thermal_irq_disable(p)	_rcar_thermal_irq_ctrl(p, 0)
+static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable)
+{
+	struct rcar_thermal_common *common = priv->common;
+	unsigned long flags;
+	u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */
+
+	spin_lock_irqsave(&common->lock, flags);
+
+	rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask);
+
+	spin_unlock_irqrestore(&common->lock, flags);
+}
+
+static void rcar_thermal_work(struct work_struct *work)
 {
-	struct thermal_zone_device *zone;
 	struct rcar_thermal_priv *priv;
-	struct resource *res;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "Could not get platform resource\n");
-		return -ENODEV;
+	priv = container_of(work, struct rcar_thermal_priv, work.work);
+
+	rcar_thermal_update_temp(priv);
+	rcar_thermal_irq_enable(priv);
+	thermal_zone_device_update(priv->zone);
+}
+
+static u32 rcar_thermal_had_changed(struct rcar_thermal_priv *priv, u32 status)
+{
+	struct device *dev = rcar_priv_to_dev(priv);
+
+	status = (status >> rcar_id_to_shift(priv)) & 0x3;
+
+	if (status & 0x3) {
+		dev_dbg(dev, "thermal%d %s%s\n",
+			priv->id,
+			(status & 0x2) ? "Rising " : "",
+			(status & 0x1) ? "Falling" : "");
 	}
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		dev_err(&pdev->dev, "Could not allocate priv\n");
-		return -ENOMEM;
+	return status;
+}
+
+static irqreturn_t rcar_thermal_irq(int irq, void *data)
+{
+	struct rcar_thermal_common *common = data;
+	struct rcar_thermal_priv *priv;
+	unsigned long flags;
+	u32 status, mask;
+
+	spin_lock_irqsave(&common->lock, flags);
+
+	mask	= rcar_thermal_common_read(common, INTMSK);
+	status	= rcar_thermal_common_read(common, STR);
+	rcar_thermal_common_write(common, STR, 0x000F0F0F & mask);
+
+	spin_unlock_irqrestore(&common->lock, flags);
+
+	status = status & ~mask;
+
+	/*
+	 * check the status
+	 */
+	rcar_thermal_for_each_priv(priv, common) {
+		if (rcar_thermal_had_changed(priv, status)) {
+			rcar_thermal_irq_disable(priv);
+			schedule_delayed_work(&priv->work,
+					      msecs_to_jiffies(300));
+		}
 	}
 
-	priv->comp = 4; /* basic setup */
-	priv->dev = &pdev->dev;
-	spin_lock_init(&priv->lock);
-	priv->base = devm_ioremap_nocache(&pdev->dev,
-					  res->start, resource_size(res));
-	if (!priv->base) {
-		dev_err(&pdev->dev, "Unable to ioremap thermal register\n");
+	return IRQ_HANDLED;
+}
+
+/*
+ *		platform functions
+ */
+static int rcar_thermal_probe(struct platform_device *pdev)
+{
+	struct rcar_thermal_common *common;
+	struct rcar_thermal_priv *priv;
+	struct device *dev = &pdev->dev;
+	struct resource *res, *irq;
+	int mres = 0;
+	int i;
+	int idle = IDLE_INTERVAL;
+
+	common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL);
+	if (!common) {
+		dev_err(dev, "Could not allocate common\n");
 		return -ENOMEM;
 	}
 
-	zone = thermal_zone_device_register("rcar_thermal", 0, 0, priv,
-				    &rcar_thermal_zone_ops, NULL, 0, 0);
-	if (IS_ERR(zone)) {
-		dev_err(&pdev->dev, "thermal zone device is NULL\n");
-		return PTR_ERR(zone);
+	INIT_LIST_HEAD(&common->head);
+	spin_lock_init(&common->lock);
+	common->dev = dev;
+
+	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (irq) {
+		int ret;
+
+		/*
+		 * platform has IRQ support.
+		 * Then, drier use common register
+		 */
+		res = platform_get_resource(pdev, IORESOURCE_MEM, mres++);
+		if (!res) {
+			dev_err(dev, "Could not get platform resource\n");
+			return -ENODEV;
+		}
+
+		ret = devm_request_irq(dev, irq->start, rcar_thermal_irq, 0,
+				       dev_name(dev), common);
+		if (ret) {
+			dev_err(dev, "irq request failed\n ");
+			return ret;
+		}
+
+		/*
+		 * rcar_has_irq_support() will be enabled
+		 */
+		common->base = devm_request_and_ioremap(dev, res);
+		if (!common->base) {
+			dev_err(dev, "Unable to ioremap thermal register\n");
+			return -ENOMEM;
+		}
+
+		/* enable temperature comparation */
+		rcar_thermal_common_write(common, ENR, 0x00030303);
+
+		idle = 0; /* polling delaye is not needed */
 	}
 
-	platform_set_drvdata(pdev, zone);
+	for (i = 0;; i++) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, mres++);
+		if (!res)
+			break;
+
+		priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+		if (!priv) {
+			dev_err(dev, "Could not allocate priv\n");
+			return -ENOMEM;
+		}
+
+		priv->base = devm_request_and_ioremap(dev, res);
+		if (!priv->base) {
+			dev_err(dev, "Unable to ioremap priv register\n");
+			return -ENOMEM;
+		}
 
-	dev_info(&pdev->dev, "proved\n");
+		priv->common = common;
+		priv->id = i;
+		mutex_init(&priv->lock);
+		INIT_LIST_HEAD(&priv->list);
+		INIT_DELAYED_WORK(&priv->work, rcar_thermal_work);
+		rcar_thermal_update_temp(priv);
+
+		priv->zone = thermal_zone_device_register("rcar_thermal",
+						1, 0, priv,
+						&rcar_thermal_zone_ops, NULL, 0,
+						idle);
+		if (IS_ERR(priv->zone)) {
+			dev_err(dev, "can't register thermal zone\n");
+			goto error_unregister;
+		}
+
+		list_move_tail(&priv->list, &common->head);
+
+		if (rcar_has_irq_support(priv))
+			rcar_thermal_irq_enable(priv);
+	}
+
+	platform_set_drvdata(pdev, common);
+
+	dev_info(dev, "%d sensor proved\n", i);
 
 	return 0;
+
+error_unregister:
+	rcar_thermal_for_each_priv(priv, common)
+		thermal_zone_device_unregister(priv->zone);
+
+	return -ENODEV;
 }
 
 static int rcar_thermal_remove(struct platform_device *pdev)
 {
-	struct thermal_zone_device *zone = platform_get_drvdata(pdev);
+	struct rcar_thermal_common *common = platform_get_drvdata(pdev);
+	struct rcar_thermal_priv *priv;
+
+	rcar_thermal_for_each_priv(priv, common)
+		thermal_zone_device_unregister(priv->zone);
 
-	thermal_zone_device_unregister(zone);
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }
 
+static const struct of_device_id rcar_thermal_dt_ids[] = {
+	{ .compatible = "renesas,rcar-thermal", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
+
 static struct platform_driver rcar_thermal_driver = {
 	.driver	= {
 		.name	= "rcar_thermal",
+		.of_match_table = rcar_thermal_dt_ids,
 	},
 	.probe		= rcar_thermal_probe,
 	.remove		= rcar_thermal_remove,
diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c
index 6b2d8b21aaee..3c5ee5607977 100644
--- a/drivers/thermal/spear_thermal.c
+++ b/drivers/thermal/spear_thermal.c
@@ -131,7 +131,7 @@ static int spear_thermal_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	stdev->clk = clk_get(&pdev->dev, NULL);
+	stdev->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(stdev->clk)) {
 		dev_err(&pdev->dev, "Can't get clock\n");
 		return PTR_ERR(stdev->clk);
@@ -140,7 +140,7 @@ static int spear_thermal_probe(struct platform_device *pdev)
 	ret = clk_enable(stdev->clk);
 	if (ret) {
 		dev_err(&pdev->dev, "Can't enable clock\n");
-		goto put_clk;
+		return ret;
 	}
 
 	stdev->flags = val;
@@ -163,8 +163,6 @@ static int spear_thermal_probe(struct platform_device *pdev)
 
 disable_clk:
 	clk_disable(stdev->clk);
-put_clk:
-	clk_put(stdev->clk);
 
 	return ret;
 }
@@ -183,7 +181,6 @@ static int spear_thermal_exit(struct platform_device *pdev)
 	writel_relaxed(actual_mask & ~stdev->flags, stdev->thermal_base);
 
 	clk_disable(stdev->clk);
-	clk_put(stdev->clk);
 
 	return 0;
 }
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index 0cd5e9fbab1c..407cde3211c1 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -35,21 +35,54 @@
  *       state for this trip point
  *    b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
  *       state for this trip point
+ *    c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit
+ *       for this trip point
+ *    d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit
+ *       for this trip point
+ * If the temperature is lower than a trip point,
+ *    a. if the trend is THERMAL_TREND_RAISING, do nothing
+ *    b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
+ *       state for this trip point, if the cooling state already
+ *       equals lower limit, deactivate the thermal instance
+ *    c. if the trend is THERMAL_TREND_RAISE_FULL, do nothing
+ *    d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit,
+ *       if the cooling state already equals lower limit,
+ *       deactive the thermal instance
  */
 static unsigned long get_target_state(struct thermal_instance *instance,
-					enum thermal_trend trend)
+				enum thermal_trend trend, bool throttle)
 {
 	struct thermal_cooling_device *cdev = instance->cdev;
 	unsigned long cur_state;
 
 	cdev->ops->get_cur_state(cdev, &cur_state);
 
-	if (trend == THERMAL_TREND_RAISING) {
-		cur_state = cur_state < instance->upper ?
-			    (cur_state + 1) : instance->upper;
-	} else if (trend == THERMAL_TREND_DROPPING) {
-		cur_state = cur_state > instance->lower ?
-			    (cur_state - 1) : instance->lower;
+	switch (trend) {
+	case THERMAL_TREND_RAISING:
+		if (throttle)
+			cur_state = cur_state < instance->upper ?
+				    (cur_state + 1) : instance->upper;
+		break;
+	case THERMAL_TREND_RAISE_FULL:
+		if (throttle)
+			cur_state = instance->upper;
+		break;
+	case THERMAL_TREND_DROPPING:
+		if (cur_state == instance->lower) {
+			if (!throttle)
+				cur_state = -1;
+		} else
+			cur_state -= 1;
+		break;
+	case THERMAL_TREND_DROP_FULL:
+		if (cur_state == instance->lower) {
+			if (!throttle)
+				cur_state = -1;
+		} else
+			cur_state = instance->lower;
+		break;
+	default:
+		break;
 	}
 
 	return cur_state;
@@ -66,57 +99,14 @@ static void update_passive_instance(struct thermal_zone_device *tz,
 		tz->passive += value;
 }
 
-static void update_instance_for_throttle(struct thermal_zone_device *tz,
-				int trip, enum thermal_trip_type trip_type,
-				enum thermal_trend trend)
-{
-	struct thermal_instance *instance;
-
-	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
-		if (instance->trip != trip)
-			continue;
-
-		instance->target = get_target_state(instance, trend);
-
-		/* Activate a passive thermal instance */
-		if (instance->target == THERMAL_NO_TARGET)
-			update_passive_instance(tz, trip_type, 1);
-
-		instance->cdev->updated = false; /* cdev needs update */
-	}
-}
-
-static void update_instance_for_dethrottle(struct thermal_zone_device *tz,
-				int trip, enum thermal_trip_type trip_type)
-{
-	struct thermal_instance *instance;
-	struct thermal_cooling_device *cdev;
-	unsigned long cur_state;
-
-	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
-		if (instance->trip != trip ||
-			instance->target == THERMAL_NO_TARGET)
-			continue;
-
-		cdev = instance->cdev;
-		cdev->ops->get_cur_state(cdev, &cur_state);
-
-		instance->target = cur_state > instance->lower ?
-			    (cur_state - 1) : THERMAL_NO_TARGET;
-
-		/* Deactivate a passive thermal instance */
-		if (instance->target == THERMAL_NO_TARGET)
-			update_passive_instance(tz, trip_type, -1);
-
-		cdev->updated = false; /* cdev needs update */
-	}
-}
-
 static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 {
 	long trip_temp;
 	enum thermal_trip_type trip_type;
 	enum thermal_trend trend;
+	struct thermal_instance *instance;
+	bool throttle = false;
+	int old_target;
 
 	if (trip == THERMAL_TRIPS_NONE) {
 		trip_temp = tz->forced_passive;
@@ -128,12 +118,30 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 
 	trend = get_tz_trend(tz, trip);
 
+	if (tz->temperature >= trip_temp)
+		throttle = true;
+
 	mutex_lock(&tz->lock);
 
-	if (tz->temperature >= trip_temp)
-		update_instance_for_throttle(tz, trip, trip_type, trend);
-	else
-		update_instance_for_dethrottle(tz, trip, trip_type);
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		if (instance->trip != trip)
+			continue;
+
+		old_target = instance->target;
+		instance->target = get_target_state(instance, trend, throttle);
+
+		/* Activate a passive thermal instance */
+		if (old_target == THERMAL_NO_TARGET &&
+			instance->target != THERMAL_NO_TARGET)
+			update_passive_instance(tz, trip_type, 1);
+		/* Deactivate a passive thermal instance */
+		else if (old_target != THERMAL_NO_TARGET &&
+			instance->target == THERMAL_NO_TARGET)
+			update_passive_instance(tz, trip_type, -1);
+
+
+		instance->cdev->updated = false; /* cdev needs update */
+	}
 
 	mutex_unlock(&tz->lock);
 }
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 8c8ce806180f..5b7863a03f98 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -32,7 +32,6 @@
 #include <linux/kdev_t.h>
 #include <linux/idr.h>
 #include <linux/thermal.h>
-#include <linux/spinlock.h>
 #include <linux/reboot.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -132,23 +131,16 @@ EXPORT_SYMBOL_GPL(thermal_unregister_governor);
 
 static int get_idr(struct idr *idr, struct mutex *lock, int *id)
 {
-	int err;
-
-again:
-	if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
-		return -ENOMEM;
+	int ret;
 
 	if (lock)
 		mutex_lock(lock);
-	err = idr_get_new(idr, NULL, id);
+	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
 	if (lock)
 		mutex_unlock(lock);
-	if (unlikely(err == -EAGAIN))
-		goto again;
-	else if (unlikely(err))
-		return err;
-
-	*id = *id & MAX_IDR_MASK;
+	if (unlikely(ret < 0))
+		return ret;
+	*id = ret;
 	return 0;
 }
 
@@ -355,8 +347,9 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
 		tz->ops->notify(tz, trip, trip_type);
 
 	if (trip_type == THERMAL_TRIP_CRITICAL) {
-		pr_emerg("Critical temperature reached(%d C),shutting down\n",
-			 tz->temperature / 1000);
+		dev_emerg(&tz->device,
+			  "critical temperature reached(%d C),shutting down\n",
+			  tz->temperature / 1000);
 		orderly_poweroff(true);
 	}
 }
@@ -378,23 +371,57 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
 	monitor_thermal_zone(tz);
 }
 
+static int thermal_zone_get_temp(struct thermal_zone_device *tz,
+				unsigned long *temp)
+{
+	int ret = 0;
+#ifdef CONFIG_THERMAL_EMULATION
+	int count;
+	unsigned long crit_temp = -1UL;
+	enum thermal_trip_type type;
+#endif
+
+	mutex_lock(&tz->lock);
+
+	ret = tz->ops->get_temp(tz, temp);
+#ifdef CONFIG_THERMAL_EMULATION
+	if (!tz->emul_temperature)
+		goto skip_emul;
+
+	for (count = 0; count < tz->trips; count++) {
+		ret = tz->ops->get_trip_type(tz, count, &type);
+		if (!ret && type == THERMAL_TRIP_CRITICAL) {
+			ret = tz->ops->get_trip_temp(tz, count, &crit_temp);
+			break;
+		}
+	}
+
+	if (ret)
+		goto skip_emul;
+
+	if (*temp < crit_temp)
+		*temp = tz->emul_temperature;
+skip_emul:
+#endif
+	mutex_unlock(&tz->lock);
+	return ret;
+}
+
 static void update_temperature(struct thermal_zone_device *tz)
 {
 	long temp;
 	int ret;
 
-	mutex_lock(&tz->lock);
-
-	ret = tz->ops->get_temp(tz, &temp);
+	ret = thermal_zone_get_temp(tz, &temp);
 	if (ret) {
-		pr_warn("failed to read out thermal zone %d\n", tz->id);
-		goto exit;
+		dev_warn(&tz->device, "failed to read out thermal zone %d\n",
+			 tz->id);
+		return;
 	}
 
+	mutex_lock(&tz->lock);
 	tz->last_temperature = tz->temperature;
 	tz->temperature = temp;
-
-exit:
 	mutex_unlock(&tz->lock);
 }
 
@@ -437,10 +464,7 @@ temp_show(struct device *dev, struct device_attribute *attr, char *buf)
 	long temperature;
 	int ret;
 
-	if (!tz->ops->get_temp)
-		return -EPERM;
-
-	ret = tz->ops->get_temp(tz, &temperature);
+	ret = thermal_zone_get_temp(tz, &temperature);
 
 	if (ret)
 		return ret;
@@ -700,6 +724,31 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf)
 	return sprintf(buf, "%s\n", tz->governor->name);
 }
 
+#ifdef CONFIG_THERMAL_EMULATION
+static ssize_t
+emul_temp_store(struct device *dev, struct device_attribute *attr,
+		     const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int ret = 0;
+	unsigned long temperature;
+
+	if (kstrtoul(buf, 10, &temperature))
+		return -EINVAL;
+
+	if (!tz->ops->set_emul_temp) {
+		mutex_lock(&tz->lock);
+		tz->emul_temperature = temperature;
+		mutex_unlock(&tz->lock);
+	} else {
+		ret = tz->ops->set_emul_temp(tz, temperature);
+	}
+
+	return ret ? ret : count;
+}
+static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
+#endif/*CONFIG_THERMAL_EMULATION*/
+
 static DEVICE_ATTR(type, 0444, type_show, NULL);
 static DEVICE_ATTR(temp, 0444, temp_show, NULL);
 static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
@@ -842,7 +891,7 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
 				       temp_input);
 	struct thermal_zone_device *tz = temp->tz;
 
-	ret = tz->ops->get_temp(tz, &temperature);
+	ret = thermal_zone_get_temp(tz, &temperature);
 
 	if (ret)
 		return ret;
@@ -1529,6 +1578,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	if (!ops || !ops->get_temp)
 		return ERR_PTR(-EINVAL);
 
+	if (trips > 0 && !ops->get_trip_type)
+		return ERR_PTR(-EINVAL);
+
 	tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL);
 	if (!tz)
 		return ERR_PTR(-ENOMEM);
@@ -1592,6 +1644,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 			goto unregister;
 	}
 
+#ifdef CONFIG_THERMAL_EMULATION
+	result = device_create_file(&tz->device, &dev_attr_emul_temp);
+	if (result)
+		goto unregister;
+#endif
 	/* Create policy attribute */
 	result = device_create_file(&tz->device, &dev_attr_policy);
 	if (result)
@@ -1711,7 +1768,8 @@ static struct genl_multicast_group thermal_event_mcgrp = {
 	.name = THERMAL_GENL_MCAST_GROUP_NAME,
 };
 
-int thermal_generate_netlink_event(u32 orig, enum events event)
+int thermal_generate_netlink_event(struct thermal_zone_device *tz,
+					enum events event)
 {
 	struct sk_buff *skb;
 	struct nlattr *attr;
@@ -1721,6 +1779,9 @@ int thermal_generate_netlink_event(u32 orig, enum events event)
 	int result;
 	static unsigned int thermal_event_seqnum;
 
+	if (!tz)
+		return -EINVAL;
+
 	/* allocate memory */
 	size = nla_total_size(sizeof(struct thermal_genl_event)) +
 	       nla_total_size(0);
@@ -1755,7 +1816,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event)
 
 	memset(thermal_event, 0, sizeof(struct thermal_genl_event));
 
-	thermal_event->orig = orig;
+	thermal_event->orig = tz->id;
 	thermal_event->event = event;
 
 	/* send multicast genetlink message */
@@ -1767,7 +1828,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event)
 
 	result = genlmsg_multicast(skb, 0, thermal_event_mcgrp.id, GFP_ATOMIC);
 	if (result)
-		pr_info("failed to send netlink event:%d\n", result);
+		dev_err(&tz->device, "Failed to send netlink event:%d", result);
 
 	return result;
 }
@@ -1807,6 +1868,7 @@ static int __init thermal_init(void)
 		idr_destroy(&thermal_cdev_idr);
 		mutex_destroy(&thermal_idr_lock);
 		mutex_destroy(&thermal_list_lock);
+		return result;
 	}
 	result = genetlink_init();
 	return result;
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index a0162cbf0557..cf9210db9fa9 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -729,19 +729,19 @@ config SERIAL_SH_SCI_DMA
 
 config SERIAL_PNX8XXX
 	bool "Enable PNX8XXX SoCs' UART Support"
-	depends on SOC_PNX8550 || SOC_PNX833X
+	depends on SOC_PNX833X
 	select SERIAL_CORE
 	help
-	  If you have a MIPS-based Philips SoC such as PNX8550 or PNX8330
-	  and you want to use serial ports, say Y.  Otherwise, say N.
+	  If you have a MIPS-based Philips SoC such as PNX8330 and you want
+	  to use serial ports, say Y.  Otherwise, say N.
 
 config SERIAL_PNX8XXX_CONSOLE
 	bool "Enable PNX8XX0 serial console"
 	depends on SERIAL_PNX8XXX
 	select SERIAL_CORE_CONSOLE
 	help
-	  If you have a MIPS-based Philips SoC such as PNX8550 or PNX8330
-	  and you want to use serial console, say Y. Otherwise, say N.
+	  If you have a MIPS-based Philips SoC such as PNX8330 and you want
+	  to use serial console, say Y. Otherwise, say N.
 
 config SERIAL_HS_LPC32XX
 	tristate "LPC32XX high speed serial port support"
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index fd473639ab70..05400acbc456 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -960,11 +960,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
 			loff_t *ppos)
 {
 	int i;
-	struct inode *inode = file->f_path.dentry->d_inode;
 	struct tty_struct *tty = file_tty(file);
 	struct tty_ldisc *ld;
 
-	if (tty_paranoia_check(tty, inode, "tty_read"))
+	if (tty_paranoia_check(tty, file_inode(file), "tty_read"))
 		return -EIO;
 	if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
 		return -EIO;
@@ -1132,12 +1131,11 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 static ssize_t tty_write(struct file *file, const char __user *buf,
 						size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
 	struct tty_struct *tty = file_tty(file);
  	struct tty_ldisc *ld;
 	ssize_t ret;
 
-	if (tty_paranoia_check(tty, inode, "tty_write"))
+	if (tty_paranoia_check(tty, file_inode(file), "tty_write"))
 		return -EIO;
 	if (!tty || !tty->ops->write ||
 		(test_bit(TTY_IO_ERROR, &tty->flags)))
@@ -2047,7 +2045,7 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait)
 	struct tty_ldisc *ld;
 	int ret = 0;
 
-	if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll"))
+	if (tty_paranoia_check(tty, file_inode(filp), "tty_poll"))
 		return 0;
 
 	ld = tty_ldisc_ref_wait(tty);
@@ -2063,7 +2061,7 @@ static int __tty_fasync(int fd, struct file *filp, int on)
 	unsigned long flags;
 	int retval = 0;
 
-	if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync"))
+	if (tty_paranoia_check(tty, file_inode(filp), "tty_fasync"))
 		goto out;
 
 	retval = fasync_helper(fd, filp, on, &tty->fasync);
@@ -2637,9 +2635,8 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	void __user *p = (void __user *)arg;
 	int retval;
 	struct tty_ldisc *ld;
-	struct inode *inode = file->f_dentry->d_inode;
 
-	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
+	if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
 		return -EINVAL;
 
 	real_tty = tty_pair_get_tty(tty);
@@ -2780,12 +2777,11 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 static long tty_compat_ioctl(struct file *file, unsigned int cmd,
 				unsigned long arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
 	struct tty_struct *tty = file_tty(file);
 	struct tty_ldisc *ld;
 	int retval = -ENOIOCTLCMD;
 
-	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
+	if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
 		return -EINVAL;
 
 	if (tty->ops->compat_ioctl) {
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 5110f367f1f1..c8b926291e28 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -369,26 +369,15 @@ static void uio_dev_del_attributes(struct uio_device *idev)
 static int uio_get_minor(struct uio_device *idev)
 {
 	int retval = -ENOMEM;
-	int id;
 
 	mutex_lock(&minor_lock);
-	if (idr_pre_get(&uio_idr, GFP_KERNEL) == 0)
-		goto exit;
-
-	retval = idr_get_new(&uio_idr, idev, &id);
-	if (retval < 0) {
-		if (retval == -EAGAIN)
-			retval = -ENOMEM;
-		goto exit;
-	}
-	if (id < UIO_MAX_DEVICES) {
-		idev->minor = id;
-	} else {
+	retval = idr_alloc(&uio_idr, idev, 0, UIO_MAX_DEVICES, GFP_KERNEL);
+	if (retval >= 0) {
+		idev->minor = retval;
+	} else if (retval == -ENOSPC) {
 		dev_err(idev->dev, "too many uio devices\n");
 		retval = -EINVAL;
-		idr_remove(&uio_idr, id);
 	}
-exit:
 	mutex_unlock(&minor_lock);
 	return retval;
 }
diff --git a/drivers/usb/host/ehci-timer.c b/drivers/usb/host/ehci-timer.c
index f904071d70df..20dbdcbe9b0f 100644
--- a/drivers/usb/host/ehci-timer.c
+++ b/drivers/usb/host/ehci-timer.c
@@ -113,15 +113,14 @@ static void ehci_poll_ASS(struct ehci_hcd *ehci)
 
 	if (want != actual) {
 
-		/* Poll again later */
-		ehci_enable_event(ehci, EHCI_HRTIMER_POLL_ASS, true);
-		++ehci->ASS_poll_count;
-		return;
+		/* Poll again later, but give up after about 20 ms */
+		if (ehci->ASS_poll_count++ < 20) {
+			ehci_enable_event(ehci, EHCI_HRTIMER_POLL_ASS, true);
+			return;
+		}
+		ehci_dbg(ehci, "Waited too long for the async schedule status (%x/%x), giving up\n",
+				want, actual);
 	}
-
-	if (ehci->ASS_poll_count > 20)
-		ehci_dbg(ehci, "ASS poll count reached %d\n",
-				ehci->ASS_poll_count);
 	ehci->ASS_poll_count = 0;
 
 	/* The status is up-to-date; restart or stop the schedule as needed */
@@ -160,14 +159,14 @@ static void ehci_poll_PSS(struct ehci_hcd *ehci)
 
 	if (want != actual) {
 
-		/* Poll again later */
-		ehci_enable_event(ehci, EHCI_HRTIMER_POLL_PSS, true);
-		return;
+		/* Poll again later, but give up after about 20 ms */
+		if (ehci->PSS_poll_count++ < 20) {
+			ehci_enable_event(ehci, EHCI_HRTIMER_POLL_PSS, true);
+			return;
+		}
+		ehci_dbg(ehci, "Waited too long for the periodic schedule status (%x/%x), giving up\n",
+				want, actual);
 	}
-
-	if (ehci->PSS_poll_count > 20)
-		ehci_dbg(ehci, "PSS poll count reached %d\n",
-				ehci->PSS_poll_count);
 	ehci->PSS_poll_count = 0;
 
 	/* The status is up-to-date; restart or stop the schedule as needed */
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 28e2d5b2c0c7..fcc12f3e60a3 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -139,23 +139,8 @@ EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
  */
 static int vfio_alloc_group_minor(struct vfio_group *group)
 {
-	int ret, minor;
-
-again:
-	if (unlikely(idr_pre_get(&vfio.group_idr, GFP_KERNEL) == 0))
-		return -ENOMEM;
-
 	/* index 0 is used by /dev/vfio/vfio */
-	ret = idr_get_new_above(&vfio.group_idr, group, 1, &minor);
-	if (ret == -EAGAIN)
-		goto again;
-	if (ret || minor > MINORMASK) {
-		if (minor > MINORMASK)
-			idr_remove(&vfio.group_idr, minor);
-		return -ENOSPC;
-	}
-
-	return minor;
+	return idr_alloc(&vfio.group_idr, group, 1, MINORMASK + 1, GFP_KERNEL);
 }
 
 static void vfio_free_group_minor(int minor)
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index be27b551473f..db10d0120d2b 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -384,6 +384,12 @@ config BACKLIGHT_LP855X
 	  This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557
 	  backlight driver.
 
+config BACKLIGHT_LP8788
+	tristate "Backlight driver for TI LP8788 MFD"
+	depends on BACKLIGHT_CLASS_DEVICE && MFD_LP8788
+	help
+	  This supports TI LP8788 backlight driver.
+
 config BACKLIGHT_OT200
 	tristate "Backlight driver for ot200 visualisation device"
 	depends on BACKLIGHT_CLASS_DEVICE && CS5535_MFGPT && GPIO_CS5535
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 4606c218e8e4..96c4d620c5ce 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_BACKLIGHT_LM3630)		+= lm3630_bl.o
 obj-$(CONFIG_BACKLIGHT_LM3639)		+= lm3639_bl.o
 obj-$(CONFIG_BACKLIGHT_LOCOMO)		+= locomolcd.o
 obj-$(CONFIG_BACKLIGHT_LP855X)		+= lp855x_bl.o
+obj-$(CONFIG_BACKLIGHT_LP8788)		+= lp8788_bl.o
 obj-$(CONFIG_BACKLIGHT_MAX8925)		+= max8925_bl.o
 obj-$(CONFIG_BACKLIGHT_OMAP1)		+= omap1_bl.o
 obj-$(CONFIG_BACKLIGHT_OT200)		+= ot200_bl.o
diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c
index d29e49443f29..c02aa2c2575a 100644
--- a/drivers/video/backlight/ams369fg06.c
+++ b/drivers/video/backlight/ams369fg06.c
@@ -317,10 +317,7 @@ static int ams369fg06_power_on(struct ams369fg06 *lcd)
 	pd = lcd->lcd_pd;
 	bd = lcd->bd;
 
-	if (!pd->power_on) {
-		dev_err(lcd->dev, "power_on is NULL.\n");
-		return -EINVAL;
-	} else {
+	if (pd->power_on) {
 		pd->power_on(lcd->ld, 1);
 		msleep(pd->power_on_delay);
 	}
@@ -370,7 +367,8 @@ static int ams369fg06_power_off(struct ams369fg06 *lcd)
 
 	msleep(pd->power_off_delay);
 
-	pd->power_on(lcd->ld, 0);
+	if (pd->power_on)
+		pd->power_on(lcd->ld, 0);
 
 	return 0;
 }
diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c
new file mode 100644
index 000000000000..4bb8b4f140cf
--- /dev/null
+++ b/drivers/video/backlight/lp8788_bl.c
@@ -0,0 +1,333 @@
+/*
+ * TI LP8788 MFD - backlight driver
+ *
+ * Copyright 2012 Texas Instruments
+ *
+ * Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/backlight.h>
+#include <linux/err.h>
+#include <linux/mfd/lp8788.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/slab.h>
+
+/* Register address */
+#define LP8788_BL_CONFIG		0x96
+#define LP8788_BL_EN			BIT(0)
+#define LP8788_BL_PWM_INPUT_EN		BIT(5)
+#define LP8788_BL_FULLSCALE_SHIFT	2
+#define LP8788_BL_DIM_MODE_SHIFT	1
+#define LP8788_BL_PWM_POLARITY_SHIFT	6
+
+#define LP8788_BL_BRIGHTNESS		0x97
+
+#define LP8788_BL_RAMP			0x98
+#define LP8788_BL_RAMP_RISE_SHIFT	4
+
+#define MAX_BRIGHTNESS			127
+#define DEFAULT_BL_NAME			"lcd-backlight"
+
+struct lp8788_bl_config {
+	enum lp8788_bl_ctrl_mode bl_mode;
+	enum lp8788_bl_dim_mode dim_mode;
+	enum lp8788_bl_full_scale_current full_scale;
+	enum lp8788_bl_ramp_step rise_time;
+	enum lp8788_bl_ramp_step fall_time;
+	enum pwm_polarity pwm_pol;
+};
+
+struct lp8788_bl {
+	struct lp8788 *lp;
+	struct backlight_device *bl_dev;
+	struct lp8788_backlight_platform_data *pdata;
+	enum lp8788_bl_ctrl_mode mode;
+	struct pwm_device *pwm;
+};
+
+struct lp8788_bl_config default_bl_config = {
+	.bl_mode    = LP8788_BL_REGISTER_ONLY,
+	.dim_mode   = LP8788_DIM_EXPONENTIAL,
+	.full_scale = LP8788_FULLSCALE_1900uA,
+	.rise_time  = LP8788_RAMP_8192us,
+	.fall_time  = LP8788_RAMP_8192us,
+	.pwm_pol    = PWM_POLARITY_NORMAL,
+};
+
+static inline bool is_brightness_ctrl_by_pwm(enum lp8788_bl_ctrl_mode mode)
+{
+	return (mode == LP8788_BL_COMB_PWM_BASED);
+}
+
+static inline bool is_brightness_ctrl_by_register(enum lp8788_bl_ctrl_mode mode)
+{
+	return (mode == LP8788_BL_REGISTER_ONLY ||
+		mode == LP8788_BL_COMB_REGISTER_BASED);
+}
+
+static int lp8788_backlight_configure(struct lp8788_bl *bl)
+{
+	struct lp8788_backlight_platform_data *pdata = bl->pdata;
+	struct lp8788_bl_config *cfg = &default_bl_config;
+	int ret;
+	u8 val;
+
+	/*
+	 * Update chip configuration if platform data exists,
+	 * otherwise use the default settings.
+	 */
+	if (pdata) {
+		cfg->bl_mode    = pdata->bl_mode;
+		cfg->dim_mode   = pdata->dim_mode;
+		cfg->full_scale = pdata->full_scale;
+		cfg->rise_time  = pdata->rise_time;
+		cfg->fall_time  = pdata->fall_time;
+		cfg->pwm_pol    = pdata->pwm_pol;
+	}
+
+	/* Brightness ramp up/down */
+	val = (cfg->rise_time << LP8788_BL_RAMP_RISE_SHIFT) | cfg->fall_time;
+	ret = lp8788_write_byte(bl->lp, LP8788_BL_RAMP, val);
+	if (ret)
+		return ret;
+
+	/* Fullscale current setting */
+	val = (cfg->full_scale << LP8788_BL_FULLSCALE_SHIFT) |
+		(cfg->dim_mode << LP8788_BL_DIM_MODE_SHIFT);
+
+	/* Brightness control mode */
+	switch (cfg->bl_mode) {
+	case LP8788_BL_REGISTER_ONLY:
+		val |= LP8788_BL_EN;
+		break;
+	case LP8788_BL_COMB_PWM_BASED:
+	case LP8788_BL_COMB_REGISTER_BASED:
+		val |= LP8788_BL_EN | LP8788_BL_PWM_INPUT_EN |
+			(cfg->pwm_pol << LP8788_BL_PWM_POLARITY_SHIFT);
+		break;
+	default:
+		dev_err(bl->lp->dev, "invalid mode: %d\n", cfg->bl_mode);
+		return -EINVAL;
+	}
+
+	bl->mode = cfg->bl_mode;
+
+	return lp8788_write_byte(bl->lp, LP8788_BL_CONFIG, val);
+}
+
+static void lp8788_pwm_ctrl(struct lp8788_bl *bl, int br, int max_br)
+{
+	unsigned int period;
+	unsigned int duty;
+	struct device *dev;
+	struct pwm_device *pwm;
+
+	if (!bl->pdata)
+		return;
+
+	period = bl->pdata->period_ns;
+	duty = br * period / max_br;
+	dev = bl->lp->dev;
+
+	/* request PWM device with the consumer name */
+	if (!bl->pwm) {
+		pwm = devm_pwm_get(dev, LP8788_DEV_BACKLIGHT);
+		if (IS_ERR(pwm)) {
+			dev_err(dev, "can not get PWM device\n");
+			return;
+		}
+
+		bl->pwm = pwm;
+	}
+
+	pwm_config(bl->pwm, duty, period);
+	if (duty)
+		pwm_enable(bl->pwm);
+	else
+		pwm_disable(bl->pwm);
+}
+
+static int lp8788_bl_update_status(struct backlight_device *bl_dev)
+{
+	struct lp8788_bl *bl = bl_get_data(bl_dev);
+	enum lp8788_bl_ctrl_mode mode = bl->mode;
+
+	if (bl_dev->props.state & BL_CORE_SUSPENDED)
+		bl_dev->props.brightness = 0;
+
+	if (is_brightness_ctrl_by_pwm(mode)) {
+		int brt = bl_dev->props.brightness;
+		int max = bl_dev->props.max_brightness;
+
+		lp8788_pwm_ctrl(bl, brt, max);
+	} else if (is_brightness_ctrl_by_register(mode)) {
+		u8 brt = bl_dev->props.brightness;
+
+		lp8788_write_byte(bl->lp, LP8788_BL_BRIGHTNESS, brt);
+	}
+
+	return 0;
+}
+
+static int lp8788_bl_get_brightness(struct backlight_device *bl_dev)
+{
+	return bl_dev->props.brightness;
+}
+
+static const struct backlight_ops lp8788_bl_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.update_status = lp8788_bl_update_status,
+	.get_brightness = lp8788_bl_get_brightness,
+};
+
+static int lp8788_backlight_register(struct lp8788_bl *bl)
+{
+	struct backlight_device *bl_dev;
+	struct backlight_properties props;
+	struct lp8788_backlight_platform_data *pdata = bl->pdata;
+	int init_brt;
+	char *name;
+
+	props.type = BACKLIGHT_PLATFORM;
+	props.max_brightness = MAX_BRIGHTNESS;
+
+	/* Initial brightness */
+	if (pdata)
+		init_brt = min_t(int, pdata->initial_brightness,
+				props.max_brightness);
+	else
+		init_brt = 0;
+
+	props.brightness = init_brt;
+
+	/* Backlight device name */
+	if (!pdata || !pdata->name)
+		name = DEFAULT_BL_NAME;
+	else
+		name = pdata->name;
+
+	bl_dev = backlight_device_register(name, bl->lp->dev, bl,
+				       &lp8788_bl_ops, &props);
+	if (IS_ERR(bl_dev))
+		return PTR_ERR(bl_dev);
+
+	bl->bl_dev = bl_dev;
+
+	return 0;
+}
+
+static void lp8788_backlight_unregister(struct lp8788_bl *bl)
+{
+	struct backlight_device *bl_dev = bl->bl_dev;
+
+	if (bl_dev)
+		backlight_device_unregister(bl_dev);
+}
+
+static ssize_t lp8788_get_bl_ctl_mode(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct lp8788_bl *bl = dev_get_drvdata(dev);
+	enum lp8788_bl_ctrl_mode mode = bl->mode;
+	char *strmode;
+
+	if (is_brightness_ctrl_by_pwm(mode))
+		strmode = "PWM based";
+	else if (is_brightness_ctrl_by_register(mode))
+		strmode = "Register based";
+	else
+		strmode = "Invalid mode";
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", strmode);
+}
+
+static DEVICE_ATTR(bl_ctl_mode, S_IRUGO, lp8788_get_bl_ctl_mode, NULL);
+
+static struct attribute *lp8788_attributes[] = {
+	&dev_attr_bl_ctl_mode.attr,
+	NULL,
+};
+
+static const struct attribute_group lp8788_attr_group = {
+	.attrs = lp8788_attributes,
+};
+
+static int lp8788_backlight_probe(struct platform_device *pdev)
+{
+	struct lp8788 *lp = dev_get_drvdata(pdev->dev.parent);
+	struct lp8788_bl *bl;
+	int ret;
+
+	bl = devm_kzalloc(lp->dev, sizeof(struct lp8788_bl), GFP_KERNEL);
+	if (!bl)
+		return -ENOMEM;
+
+	bl->lp = lp;
+	if (lp->pdata)
+		bl->pdata = lp->pdata->bl_pdata;
+
+	platform_set_drvdata(pdev, bl);
+
+	ret = lp8788_backlight_configure(bl);
+	if (ret) {
+		dev_err(lp->dev, "backlight config err: %d\n", ret);
+		goto err_dev;
+	}
+
+	ret = lp8788_backlight_register(bl);
+	if (ret) {
+		dev_err(lp->dev, "register backlight err: %d\n", ret);
+		goto err_dev;
+	}
+
+	ret = sysfs_create_group(&pdev->dev.kobj, &lp8788_attr_group);
+	if (ret) {
+		dev_err(lp->dev, "register sysfs err: %d\n", ret);
+		goto err_sysfs;
+	}
+
+	backlight_update_status(bl->bl_dev);
+
+	return 0;
+
+err_sysfs:
+	lp8788_backlight_unregister(bl);
+err_dev:
+	return ret;
+}
+
+static int lp8788_backlight_remove(struct platform_device *pdev)
+{
+	struct lp8788_bl *bl = platform_get_drvdata(pdev);
+	struct backlight_device *bl_dev = bl->bl_dev;
+
+	bl_dev->props.brightness = 0;
+	backlight_update_status(bl_dev);
+	sysfs_remove_group(&pdev->dev.kobj, &lp8788_attr_group);
+	lp8788_backlight_unregister(bl);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver lp8788_bl_driver = {
+	.probe = lp8788_backlight_probe,
+	.remove = lp8788_backlight_remove,
+	.driver = {
+		.name = LP8788_DEV_BACKLIGHT,
+		.owner = THIS_MODULE,
+	},
+};
+module_platform_driver(lp8788_bl_driver);
+
+MODULE_DESCRIPTION("Texas Instruments LP8788 Backlight Driver");
+MODULE_AUTHOR("Milo Kim");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:lp8788-backlight");
diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c
index 372c8c0d54a0..950d354d50e2 100644
--- a/drivers/w1/masters/mxc_w1.c
+++ b/drivers/w1/masters/mxc_w1.c
@@ -157,9 +157,16 @@ static int mxc_w1_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static struct of_device_id mxc_w1_dt_ids[] = {
+	{ .compatible = "fsl,imx21-owire" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mxc_w1_dt_ids);
+
 static struct platform_driver mxc_w1_driver = {
 	.driver = {
-		   .name = "mxc_w1",
+		.name = "mxc_w1",
+		.of_match_table = mxc_w1_dt_ids,
 	},
 	.probe = mxc_w1_probe,
 	.remove = mxc_w1_remove,
diff --git a/drivers/w1/slaves/Kconfig b/drivers/w1/slaves/Kconfig
index 67526690acbc..762561fbabbf 100644
--- a/drivers/w1/slaves/Kconfig
+++ b/drivers/w1/slaves/Kconfig
@@ -17,11 +17,16 @@ config W1_SLAVE_SMEM
 	  simple 64bit memory rom(ds2401/ds2411/ds1990*) to your wire.
 
 config W1_SLAVE_DS2408
-        tristate "8-Channel Addressable Switch (IO Expander) 0x29 family support (DS2408)"
-        help
-          Say Y here if you want to use a 1-wire
+	tristate "8-Channel Addressable Switch (IO Expander) 0x29 family support (DS2408)"
+	help
+	  Say Y here if you want to use a 1-wire
+	  DS2408 8-Channel Addressable Switch device support
 
-		  DS2408 8-Channel Addressable Switch device support
+config W1_SLAVE_DS2413
+	tristate "Dual Channel Addressable Switch 0x3a family support (DS2413)"
+	help
+	  Say Y here if you want to use a 1-wire
+	  DS2413 Dual Channel Addressable Switch device support
 
 config W1_SLAVE_DS2423
 	tristate "Counter 1-wire device (DS2423)"
diff --git a/drivers/w1/slaves/Makefile b/drivers/w1/slaves/Makefile
index 05188f6aab5a..06529f3157ab 100644
--- a/drivers/w1/slaves/Makefile
+++ b/drivers/w1/slaves/Makefile
@@ -4,7 +4,8 @@
 
 obj-$(CONFIG_W1_SLAVE_THERM)	+= w1_therm.o
 obj-$(CONFIG_W1_SLAVE_SMEM)	+= w1_smem.o
-obj-$(CONFIG_W1_SLAVE_DS2408)   += w1_ds2408.o
+obj-$(CONFIG_W1_SLAVE_DS2408)	+= w1_ds2408.o
+obj-$(CONFIG_W1_SLAVE_DS2413)	+= w1_ds2413.o
 obj-$(CONFIG_W1_SLAVE_DS2423)	+= w1_ds2423.o
 obj-$(CONFIG_W1_SLAVE_DS2431)	+= w1_ds2431.o
 obj-$(CONFIG_W1_SLAVE_DS2433)	+= w1_ds2433.o
diff --git a/drivers/w1/slaves/w1_ds2413.c b/drivers/w1/slaves/w1_ds2413.c
new file mode 100644
index 000000000000..829786252c6b
--- /dev/null
+++ b/drivers/w1/slaves/w1_ds2413.c
@@ -0,0 +1,177 @@
+/*
+ * w1_ds2413.c - w1 family 3a (DS2413) driver
+ * based on w1_ds2408.c by Jean-Francois Dagenais <dagenaisj@sonatest.com>
+ *
+ * Copyright (c) 2013 Mariusz Bialonczyk <manio@skyboo.net>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include "../w1.h"
+#include "../w1_int.h"
+#include "../w1_family.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mariusz Bialonczyk <manio@skyboo.net>");
+MODULE_DESCRIPTION("w1 family 3a driver for DS2413 2 Pin IO");
+
+#define W1_F3A_RETRIES                     3
+#define W1_F3A_FUNC_PIO_ACCESS_READ        0xF5
+#define W1_F3A_FUNC_PIO_ACCESS_WRITE       0x5A
+#define W1_F3A_SUCCESS_CONFIRM_BYTE        0xAA
+
+static ssize_t w1_f3a_read_state(
+	struct file *filp, struct kobject *kobj,
+	struct bin_attribute *bin_attr,
+	char *buf, loff_t off, size_t count)
+{
+	struct w1_slave *sl = kobj_to_w1_slave(kobj);
+	dev_dbg(&sl->dev,
+		"Reading %s kobj: %p, off: %0#10x, count: %zu, buff addr: %p",
+		bin_attr->attr.name, kobj, (unsigned int)off, count, buf);
+
+	if (off != 0)
+		return 0;
+	if (!buf)
+		return -EINVAL;
+
+	mutex_lock(&sl->master->bus_mutex);
+	dev_dbg(&sl->dev, "mutex locked");
+
+	if (w1_reset_select_slave(sl)) {
+		mutex_unlock(&sl->master->bus_mutex);
+		return -EIO;
+	}
+
+	w1_write_8(sl->master, W1_F3A_FUNC_PIO_ACCESS_READ);
+	*buf = w1_read_8(sl->master);
+
+	mutex_unlock(&sl->master->bus_mutex);
+	dev_dbg(&sl->dev, "mutex unlocked");
+
+	/* check for correct complement */
+	if ((*buf & 0x0F) != ((~*buf >> 4) & 0x0F))
+		return -EIO;
+	else
+		return 1;
+}
+
+static ssize_t w1_f3a_write_output(
+	struct file *filp, struct kobject *kobj,
+	struct bin_attribute *bin_attr,
+	char *buf, loff_t off, size_t count)
+{
+	struct w1_slave *sl = kobj_to_w1_slave(kobj);
+	u8 w1_buf[3];
+	unsigned int retries = W1_F3A_RETRIES;
+
+	if (count != 1 || off != 0)
+		return -EFAULT;
+
+	dev_dbg(&sl->dev, "locking mutex for write_output");
+	mutex_lock(&sl->master->bus_mutex);
+	dev_dbg(&sl->dev, "mutex locked");
+
+	if (w1_reset_select_slave(sl))
+		goto error;
+
+	/* according to the DS2413 datasheet the most significant 6 bits
+	   should be set to "1"s, so do it now */
+	*buf = *buf | 0xFC;
+
+	while (retries--) {
+		w1_buf[0] = W1_F3A_FUNC_PIO_ACCESS_WRITE;
+		w1_buf[1] = *buf;
+		w1_buf[2] = ~(*buf);
+		w1_write_block(sl->master, w1_buf, 3);
+
+		if (w1_read_8(sl->master) == W1_F3A_SUCCESS_CONFIRM_BYTE) {
+			mutex_unlock(&sl->master->bus_mutex);
+			dev_dbg(&sl->dev, "mutex unlocked, retries:%d", retries);
+			return 1;
+		}
+		if (w1_reset_resume_command(sl->master))
+			goto error;
+	}
+
+error:
+	mutex_unlock(&sl->master->bus_mutex);
+	dev_dbg(&sl->dev, "mutex unlocked in error, retries:%d", retries);
+	return -EIO;
+}
+
+#define NB_SYSFS_BIN_FILES 2
+static struct bin_attribute w1_f3a_sysfs_bin_files[NB_SYSFS_BIN_FILES] = {
+	{
+		.attr = {
+			.name = "state",
+			.mode = S_IRUGO,
+		},
+		.size = 1,
+		.read = w1_f3a_read_state,
+	},
+	{
+		.attr = {
+			.name = "output",
+			.mode = S_IRUGO | S_IWUSR | S_IWGRP,
+		},
+		.size = 1,
+		.write = w1_f3a_write_output,
+	}
+};
+
+static int w1_f3a_add_slave(struct w1_slave *sl)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; i < NB_SYSFS_BIN_FILES && !err; ++i)
+		err = sysfs_create_bin_file(
+			&sl->dev.kobj,
+			&(w1_f3a_sysfs_bin_files[i]));
+	if (err)
+		while (--i >= 0)
+			sysfs_remove_bin_file(&sl->dev.kobj,
+				&(w1_f3a_sysfs_bin_files[i]));
+	return err;
+}
+
+static void w1_f3a_remove_slave(struct w1_slave *sl)
+{
+	int i;
+	for (i = NB_SYSFS_BIN_FILES - 1; i >= 0; --i)
+		sysfs_remove_bin_file(&sl->dev.kobj,
+			&(w1_f3a_sysfs_bin_files[i]));
+}
+
+static struct w1_family_ops w1_f3a_fops = {
+	.add_slave      = w1_f3a_add_slave,
+	.remove_slave   = w1_f3a_remove_slave,
+};
+
+static struct w1_family w1_family_3a = {
+	.fid = W1_FAMILY_DS2413,
+	.fops = &w1_f3a_fops,
+};
+
+static int __init w1_f3a_init(void)
+{
+	return w1_register_family(&w1_family_3a);
+}
+
+static void __exit w1_f3a_exit(void)
+{
+	w1_unregister_family(&w1_family_3a);
+}
+
+module_init(w1_f3a_init);
+module_exit(w1_f3a_exit);
diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h
index a1f0ce151d53..625dd08f775f 100644
--- a/drivers/w1/w1_family.h
+++ b/drivers/w1/w1_family.h
@@ -39,6 +39,7 @@
 #define W1_EEPROM_DS2431	0x2D
 #define W1_FAMILY_DS2760	0x30
 #define W1_FAMILY_DS2780	0x32
+#define W1_FAMILY_DS2413	0x3A
 #define W1_THERM_DS1825		0x3B
 #define W1_FAMILY_DS2781	0x3D
 #define W1_THERM_DS28EA00	0x42
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 26e1fdbddf69..9fcc70c11cea 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -79,6 +79,7 @@ config DA9052_WATCHDOG
 config DA9055_WATCHDOG
 	tristate "Dialog Semiconductor DA9055 Watchdog"
 	depends on MFD_DA9055
+	select WATCHDOG_CORE
 	help
 	  If you say yes here you get support for watchdog on the Dialog
 	  Semiconductor DA9055 PMIC.
@@ -108,7 +109,7 @@ config WM8350_WATCHDOG
 
 config ARM_SP805_WATCHDOG
 	tristate "ARM SP805 Watchdog"
-	depends on ARM_AMBA
+	depends on ARM && ARM_AMBA
 	select WATCHDOG_CORE
 	help
 	  ARM Primecell SP805 Watchdog timer. This will reboot your system when
@@ -116,7 +117,7 @@ config ARM_SP805_WATCHDOG
 
 config AT91RM9200_WATCHDOG
 	tristate "AT91RM9200 watchdog"
-	depends on ARCH_AT91RM9200
+	depends on ARCH_AT91
 	help
 	  Watchdog timer embedded into AT91RM9200 chips. This will reboot your
 	  system when the timeout is reached.
@@ -124,6 +125,7 @@ config AT91RM9200_WATCHDOG
 config AT91SAM9X_WATCHDOG
 	tristate "AT91SAM9X / AT91CAP9 watchdog"
 	depends on ARCH_AT91 && !ARCH_AT91RM9200
+	select WATCHDOG_CORE
 	help
 	  Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will
 	  reboot your system when the timeout is reached.
@@ -316,14 +318,15 @@ config TWL4030_WATCHDOG
 	  Support for TI TWL4030 watchdog.  Say 'Y' here to enable the
 	  watchdog timer support for TWL4030 chips.
 
-config STMP3XXX_WATCHDOG
-	tristate "Freescale STMP3XXX watchdog"
-	depends on ARCH_STMP3XXX
+config STMP3XXX_RTC_WATCHDOG
+	tristate "Freescale STMP3XXX & i.MX23/28 watchdog"
+	depends on RTC_DRV_STMP
+	select WATCHDOG_CORE
 	help
-	  Say Y here if to include support for the watchdog timer
-	  for the Sigmatel STMP37XX/378X SoC.
+	  Say Y here to include support for the watchdog timer inside
+	  the RTC for the STMP37XX/378X or i.MX23/28 SoC.
 	  To compile this driver as a module, choose M here: the
-	  module will be called stmp3xxx_wdt.
+	  module will be called stmp3xxx_rtc_wdt.
 
 config NUC900_WATCHDOG
 	tristate "Nuvoton NUC900 watchdog"
@@ -376,6 +379,18 @@ config UX500_WATCHDOG
 	  To compile this driver as a module, choose M here: the
 	  module will be called ux500_wdt.
 
+config RETU_WATCHDOG
+	tristate "Retu watchdog"
+	depends on MFD_RETU
+	select WATCHDOG_CORE
+	help
+	  Retu watchdog driver for Nokia Internet Tablets (770, N800,
+	  N810). At least on N800 the watchdog cannot be disabled, so
+	  this driver is essential and you should enable it.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called retu_wdt.
+
 # AVR32 Architecture
 
 config AT32AP700X_WDT
@@ -593,7 +608,7 @@ config IE6XX_WDT
 
 config INTEL_SCU_WATCHDOG
 	bool "Intel SCU Watchdog for Mobile Platforms"
-	depends on X86_MRST
+	depends on X86_INTEL_MID
 	---help---
 	  Hardware driver for the watchdog time built into the Intel SCU
 	  for Intel Mobile Platforms.
@@ -983,6 +998,7 @@ config ATH79_WDT
 config BCM47XX_WDT
 	tristate "Broadcom BCM47xx Watchdog Timer"
 	depends on BCM47XX
+	select WATCHDOG_CORE
 	help
 	  Hardware driver for the Broadcom BCM47xx Watchdog Timer.
 
@@ -1131,6 +1147,7 @@ config PIKA_WDT
 config BOOKE_WDT
 	tristate "PowerPC Book-E Watchdog Timer"
 	depends on BOOKE || 4xx
+	select WATCHDOG_CORE
 	---help---
 	  Watchdog driver for PowerPC Book-E chips, such as the Freescale
 	  MPC85xx SOCs and the IBM PowerPC 440.
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index bec86ee6e9e3..a300b948f254 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -48,11 +48,12 @@ obj-$(CONFIG_IOP_WATCHDOG) += iop_wdt.o
 obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o
 obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o
 obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o
-obj-$(CONFIG_STMP3XXX_WATCHDOG) += stmp3xxx_wdt.o
+obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o
 obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o
 obj-$(CONFIG_TS72XX_WATCHDOG) += ts72xx_wdt.o
 obj-$(CONFIG_IMX2_WDT) += imx2_wdt.o
 obj-$(CONFIG_UX500_WATCHDOG) += ux500_wdt.o
+obj-$(CONFIG_RETU_WATCHDOG) += retu_wdt.o
 
 # AVR32 Architecture
 obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c
index 89831ed24a4f..1c75260b987c 100644
--- a/drivers/watchdog/at91rm9200_wdt.c
+++ b/drivers/watchdog/at91rm9200_wdt.c
@@ -24,6 +24,8 @@
 #include <linux/types.h>
 #include <linux/watchdog.h>
 #include <linux/uaccess.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include <mach/at91_st.h>
 
 #define WDT_DEFAULT_TIME	5	/* seconds */
@@ -252,6 +254,12 @@ static int at91wdt_resume(struct platform_device *pdev)
 #define at91wdt_resume	NULL
 #endif
 
+static const struct of_device_id at91_wdt_dt_ids[] = {
+	{ .compatible = "atmel,at91rm9200-wdt" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, at91_wdt_dt_ids);
+
 static struct platform_driver at91wdt_driver = {
 	.probe		= at91wdt_probe,
 	.remove		= at91wdt_remove,
@@ -261,6 +269,7 @@ static struct platform_driver at91wdt_driver = {
 	.driver		= {
 		.name	= "at91_wdt",
 		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(at91_wdt_dt_ids),
 	},
 };
 
diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index c08933cc565e..be37dde4f864 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -18,11 +18,9 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/errno.h>
-#include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
-#include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/platform_device.h>
@@ -58,7 +56,7 @@
 
 /* User land timeout */
 #define WDT_HEARTBEAT 15
-static int heartbeat = WDT_HEARTBEAT;
+static int heartbeat;
 module_param(heartbeat, int, 0);
 MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. "
 	"(default = " __MODULE_STRING(WDT_HEARTBEAT) ")");
@@ -68,19 +66,17 @@ module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
 	"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
+static struct watchdog_device at91_wdt_dev;
 static void at91_ping(unsigned long data);
 
 static struct {
 	void __iomem *base;
 	unsigned long next_heartbeat;	/* the next_heartbeat for the timer */
-	unsigned long open;
-	char expect_close;
 	struct timer_list timer;	/* The timer that pings the watchdog */
 } at91wdt_private;
 
 /* ......................................................................... */
 
-
 /*
  * Reload the watchdog timer.  (ie, pat the watchdog)
  */
@@ -95,39 +91,37 @@ static inline void at91_wdt_reset(void)
 static void at91_ping(unsigned long data)
 {
 	if (time_before(jiffies, at91wdt_private.next_heartbeat) ||
-			(!nowayout && !at91wdt_private.open)) {
+	    (!watchdog_active(&at91_wdt_dev))) {
 		at91_wdt_reset();
 		mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT);
 	} else
 		pr_crit("I will reset your machine !\n");
 }
 
-/*
- * Watchdog device is opened, and watchdog starts running.
- */
-static int at91_wdt_open(struct inode *inode, struct file *file)
+static int at91_wdt_ping(struct watchdog_device *wdd)
 {
-	if (test_and_set_bit(0, &at91wdt_private.open))
-		return -EBUSY;
+	/* calculate when the next userspace timeout will be */
+	at91wdt_private.next_heartbeat = jiffies + wdd->timeout * HZ;
+	return 0;
+}
 
-	at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ;
+static int at91_wdt_start(struct watchdog_device *wdd)
+{
+	/* calculate the next userspace timeout and modify the timer */
+	at91_wdt_ping(wdd);
 	mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT);
-
-	return nonseekable_open(inode, file);
+	return 0;
 }
 
-/*
- * Close the watchdog device.
- */
-static int at91_wdt_close(struct inode *inode, struct file *file)
+static int at91_wdt_stop(struct watchdog_device *wdd)
 {
-	clear_bit(0, &at91wdt_private.open);
-
-	/* stop internal ping */
-	if (!at91wdt_private.expect_close)
-		del_timer(&at91wdt_private.timer);
+	/* The watchdog timer hardware can not be stopped... */
+	return 0;
+}
 
-	at91wdt_private.expect_close = 0;
+static int at91_wdt_set_timeout(struct watchdog_device *wdd, unsigned int new_timeout)
+{
+	wdd->timeout = new_timeout;
 	return 0;
 }
 
@@ -163,96 +157,28 @@ static int at91_wdt_settimeout(unsigned int timeout)
 	return 0;
 }
 
+/* ......................................................................... */
+
 static const struct watchdog_info at91_wdt_info = {
 	.identity	= DRV_NAME,
 	.options	= WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
 						WDIOF_MAGICCLOSE,
 };
 
-/*
- * Handle commands from user-space.
- */
-static long at91_wdt_ioctl(struct file *file,
-		unsigned int cmd, unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-	int new_value;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		return copy_to_user(argp, &at91_wdt_info,
-				    sizeof(at91_wdt_info)) ? -EFAULT : 0;
-
-	case WDIOC_GETSTATUS:
-	case WDIOC_GETBOOTSTATUS:
-		return put_user(0, p);
-
-	case WDIOC_KEEPALIVE:
-		at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ;
-		return 0;
-
-	case WDIOC_SETTIMEOUT:
-		if (get_user(new_value, p))
-			return -EFAULT;
-
-		heartbeat = new_value;
-		at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ;
-
-		return put_user(new_value, p);  /* return current value */
-
-	case WDIOC_GETTIMEOUT:
-		return put_user(heartbeat, p);
-	}
-	return -ENOTTY;
-}
-
-/*
- * Pat the watchdog whenever device is written to.
- */
-static ssize_t at91_wdt_write(struct file *file, const char *data, size_t len,
-								loff_t *ppos)
-{
-	if (!len)
-		return 0;
-
-	/* Scan for magic character */
-	if (!nowayout) {
-		size_t i;
-
-		at91wdt_private.expect_close = 0;
-
-		for (i = 0; i < len; i++) {
-			char c;
-			if (get_user(c, data + i))
-				return -EFAULT;
-			if (c == 'V') {
-				at91wdt_private.expect_close = 42;
-				break;
-			}
-		}
-	}
-
-	at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ;
-
-	return len;
-}
-
-/* ......................................................................... */
-
-static const struct file_operations at91wdt_fops = {
-	.owner			= THIS_MODULE,
-	.llseek			= no_llseek,
-	.unlocked_ioctl	= at91_wdt_ioctl,
-	.open			= at91_wdt_open,
-	.release		= at91_wdt_close,
-	.write			= at91_wdt_write,
+static const struct watchdog_ops at91_wdt_ops = {
+	.owner =	THIS_MODULE,
+	.start =	at91_wdt_start,
+	.stop =		at91_wdt_stop,
+	.ping =		at91_wdt_ping,
+	.set_timeout =	at91_wdt_set_timeout,
 };
 
-static struct miscdevice at91wdt_miscdev = {
-	.minor		= WATCHDOG_MINOR,
-	.name		= "watchdog",
-	.fops		= &at91wdt_fops,
+static struct watchdog_device at91_wdt_dev = {
+	.info =		&at91_wdt_info,
+	.ops =		&at91_wdt_ops,
+	.timeout =	WDT_HEARTBEAT,
+	.min_timeout =	1,
+	.max_timeout =	0xFFFF,
 };
 
 static int __init at91wdt_probe(struct platform_device *pdev)
@@ -260,10 +186,6 @@ static int __init at91wdt_probe(struct platform_device *pdev)
 	struct resource	*r;
 	int res;
 
-	if (at91wdt_miscdev.parent)
-		return -EBUSY;
-	at91wdt_miscdev.parent = &pdev->dev;
-
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!r)
 		return -ENODEV;
@@ -273,38 +195,41 @@ static int __init at91wdt_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	at91_wdt_dev.parent = &pdev->dev;
+	watchdog_init_timeout(&at91_wdt_dev, heartbeat, &pdev->dev);
+	watchdog_set_nowayout(&at91_wdt_dev, nowayout);
+
 	/* Set watchdog */
 	res = at91_wdt_settimeout(ms_to_ticks(WDT_HW_TIMEOUT * 1000));
 	if (res)
 		return res;
 
-	res = misc_register(&at91wdt_miscdev);
+	res = watchdog_register_device(&at91_wdt_dev);
 	if (res)
 		return res;
 
-	at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ;
+	at91wdt_private.next_heartbeat = jiffies + at91_wdt_dev.timeout * HZ;
 	setup_timer(&at91wdt_private.timer, at91_ping, 0);
 	mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT);
 
 	pr_info("enabled (heartbeat=%d sec, nowayout=%d)\n",
-		heartbeat, nowayout);
+		at91_wdt_dev.timeout, nowayout);
 
 	return 0;
 }
 
 static int __exit at91wdt_remove(struct platform_device *pdev)
 {
-	int res;
+	watchdog_unregister_device(&at91_wdt_dev);
 
-	res = misc_deregister(&at91wdt_miscdev);
-	if (!res)
-		at91wdt_miscdev.parent = NULL;
+	pr_warn("I quit now, hardware will probably reboot!\n");
+	del_timer(&at91wdt_private.timer);
 
-	return res;
+	return 0;
 }
 
 #if defined(CONFIG_OF)
-static const struct of_device_id at91_wdt_dt_ids[] __initconst = {
+static const struct of_device_id at91_wdt_dt_ids[] = {
 	{ .compatible = "atmel,at91sam9260-wdt" },
 	{ /* sentinel */ }
 };
@@ -326,4 +251,3 @@ module_platform_driver_probe(at91wdt_driver, at91wdt_probe);
 MODULE_AUTHOR("Renaud CERRATO <r.cerrato@til-technologies.fr>");
 MODULE_DESCRIPTION("Watchdog driver for Atmel AT91SAM9x processors");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c
index 38a999e60c0d..898799074a13 100644
--- a/drivers/watchdog/ath79_wdt.c
+++ b/drivers/watchdog/ath79_wdt.c
@@ -23,6 +23,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
@@ -32,14 +33,16 @@
 #include <linux/watchdog.h>
 #include <linux/clk.h>
 #include <linux/err.h>
-
-#include <asm/mach-ath79/ath79.h>
-#include <asm/mach-ath79/ar71xx_regs.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
 
 #define DRIVER_NAME	"ath79-wdt"
 
 #define WDT_TIMEOUT	15	/* seconds */
 
+#define WDOG_REG_CTRL		0x00
+#define WDOG_REG_TIMER		0x04
+
 #define WDOG_CTRL_LAST_RESET	BIT(31)
 #define WDOG_CTRL_ACTION_MASK	3
 #define WDOG_CTRL_ACTION_NONE	0	/* no action */
@@ -66,27 +69,38 @@ static struct clk *wdt_clk;
 static unsigned long wdt_freq;
 static int boot_status;
 static int max_timeout;
+static void __iomem *wdt_base;
+
+static inline void ath79_wdt_wr(unsigned reg, u32 val)
+{
+	iowrite32(val, wdt_base + reg);
+}
+
+static inline u32 ath79_wdt_rr(unsigned reg)
+{
+	return ioread32(wdt_base + reg);
+}
 
 static inline void ath79_wdt_keepalive(void)
 {
-	ath79_reset_wr(AR71XX_RESET_REG_WDOG, wdt_freq * timeout);
+	ath79_wdt_wr(WDOG_REG_TIMER, wdt_freq * timeout);
 	/* flush write */
-	ath79_reset_rr(AR71XX_RESET_REG_WDOG);
+	ath79_wdt_rr(WDOG_REG_TIMER);
 }
 
 static inline void ath79_wdt_enable(void)
 {
 	ath79_wdt_keepalive();
-	ath79_reset_wr(AR71XX_RESET_REG_WDOG_CTRL, WDOG_CTRL_ACTION_FCR);
+	ath79_wdt_wr(WDOG_REG_CTRL, WDOG_CTRL_ACTION_FCR);
 	/* flush write */
-	ath79_reset_rr(AR71XX_RESET_REG_WDOG_CTRL);
+	ath79_wdt_rr(WDOG_REG_CTRL);
 }
 
 static inline void ath79_wdt_disable(void)
 {
-	ath79_reset_wr(AR71XX_RESET_REG_WDOG_CTRL, WDOG_CTRL_ACTION_NONE);
+	ath79_wdt_wr(WDOG_REG_CTRL, WDOG_CTRL_ACTION_NONE);
 	/* flush write */
-	ath79_reset_rr(AR71XX_RESET_REG_WDOG_CTRL);
+	ath79_wdt_rr(WDOG_REG_CTRL);
 }
 
 static int ath79_wdt_set_timeout(int val)
@@ -226,16 +240,32 @@ static struct miscdevice ath79_wdt_miscdev = {
 
 static int ath79_wdt_probe(struct platform_device *pdev)
 {
+	struct resource *res;
 	u32 ctrl;
 	int err;
 
-	wdt_clk = clk_get(&pdev->dev, "wdt");
+	if (wdt_base)
+		return -EBUSY;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no memory resource found\n");
+		return -EINVAL;
+	}
+
+	wdt_base = devm_request_and_ioremap(&pdev->dev, res);
+	if (!wdt_base) {
+		dev_err(&pdev->dev, "unable to remap memory region\n");
+		return -ENOMEM;
+	}
+
+	wdt_clk = devm_clk_get(&pdev->dev, "wdt");
 	if (IS_ERR(wdt_clk))
 		return PTR_ERR(wdt_clk);
 
 	err = clk_enable(wdt_clk);
 	if (err)
-		goto err_clk_put;
+		return err;
 
 	wdt_freq = clk_get_rate(wdt_clk);
 	if (!wdt_freq) {
@@ -251,7 +281,7 @@ static int ath79_wdt_probe(struct platform_device *pdev)
 			max_timeout, timeout);
 	}
 
-	ctrl = ath79_reset_rr(AR71XX_RESET_REG_WDOG_CTRL);
+	ctrl = ath79_wdt_rr(WDOG_REG_CTRL);
 	boot_status = (ctrl & WDOG_CTRL_LAST_RESET) ? WDIOF_CARDRESET : 0;
 
 	err = misc_register(&ath79_wdt_miscdev);
@@ -265,8 +295,6 @@ static int ath79_wdt_probe(struct platform_device *pdev)
 
 err_clk_disable:
 	clk_disable(wdt_clk);
-err_clk_put:
-	clk_put(wdt_clk);
 	return err;
 }
 
@@ -274,7 +302,6 @@ static int ath79_wdt_remove(struct platform_device *pdev)
 {
 	misc_deregister(&ath79_wdt_miscdev);
 	clk_disable(wdt_clk);
-	clk_put(wdt_clk);
 	return 0;
 }
 
@@ -283,6 +310,14 @@ static void ath97_wdt_shutdown(struct platform_device *pdev)
 	ath79_wdt_disable();
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id ath79_wdt_match[] = {
+	{ .compatible = "qca,ar7130-wdt" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ath79_wdt_match);
+#endif
+
 static struct platform_driver ath79_wdt_driver = {
 	.probe		= ath79_wdt_probe,
 	.remove		= ath79_wdt_remove,
@@ -290,6 +325,7 @@ static struct platform_driver ath79_wdt_driver = {
 	.driver		= {
 		.name	= DRIVER_NAME,
 		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(ath79_wdt_match),
 	},
 };
 
diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c
index bc0e91e78e86..b4021a2b459b 100644
--- a/drivers/watchdog/bcm47xx_wdt.c
+++ b/drivers/watchdog/bcm47xx_wdt.c
@@ -3,6 +3,7 @@
  *
  *  Copyright (C) 2008 Aleksandar Radovanovic <biblbroks@sezampro.rs>
  *  Copyright (C) 2009 Matthieu CASTET <castet.matthieu@free.fr>
+ *  Copyright (C) 2012-2013 Hauke Mehrtens <hauke@hauke-m.de>
  *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
@@ -12,165 +13,143 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bcm47xx_wdt.h>
 #include <linux/bitops.h>
 #include <linux/errno.h>
-#include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/types.h>
-#include <linux/uaccess.h>
 #include <linux/watchdog.h>
 #include <linux/timer.h>
 #include <linux/jiffies.h>
-#include <linux/ssb/ssb_embedded.h>
-#include <asm/mach-bcm47xx/bcm47xx.h>
 
 #define DRV_NAME		"bcm47xx_wdt"
 
 #define WDT_DEFAULT_TIME	30	/* seconds */
-#define WDT_MAX_TIME		255	/* seconds */
+#define WDT_SOFTTIMER_MAX	255	/* seconds */
+#define WDT_SOFTTIMER_THRESHOLD	60	/* seconds */
 
-static int wdt_time = WDT_DEFAULT_TIME;
+static int timeout = WDT_DEFAULT_TIME;
 static bool nowayout = WATCHDOG_NOWAYOUT;
 
-module_param(wdt_time, int, 0);
-MODULE_PARM_DESC(wdt_time, "Watchdog time in seconds. (default="
+module_param(timeout, int, 0);
+MODULE_PARM_DESC(timeout, "Watchdog time in seconds. (default="
 				__MODULE_STRING(WDT_DEFAULT_TIME) ")");
 
-#ifdef CONFIG_WATCHDOG_NOWAYOUT
 module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout,
 		"Watchdog cannot be stopped once started (default="
 				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-#endif
 
-static unsigned long bcm47xx_wdt_busy;
-static char expect_release;
-static struct timer_list wdt_timer;
-static atomic_t ticks;
-
-static inline void bcm47xx_wdt_hw_start(void)
+static inline struct bcm47xx_wdt *bcm47xx_wdt_get(struct watchdog_device *wdd)
 {
-	/* this is 2,5s on 100Mhz clock  and 2s on 133 Mhz */
-	switch (bcm47xx_bus_type) {
-#ifdef CONFIG_BCM47XX_SSB
-	case BCM47XX_BUS_TYPE_SSB:
-		ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0xfffffff);
-		break;
-#endif
-#ifdef CONFIG_BCM47XX_BCMA
-	case BCM47XX_BUS_TYPE_BCMA:
-		bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc,
-					       0xfffffff);
-		break;
-#endif
-	}
+	return container_of(wdd, struct bcm47xx_wdt, wdd);
 }
 
-static inline int bcm47xx_wdt_hw_stop(void)
+static int bcm47xx_wdt_hard_keepalive(struct watchdog_device *wdd)
 {
-	switch (bcm47xx_bus_type) {
-#ifdef CONFIG_BCM47XX_SSB
-	case BCM47XX_BUS_TYPE_SSB:
-		return ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0);
-#endif
-#ifdef CONFIG_BCM47XX_BCMA
-	case BCM47XX_BUS_TYPE_BCMA:
-		bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, 0);
-		return 0;
-#endif
-	}
-	return -EINVAL;
-}
+	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
 
-static void bcm47xx_timer_tick(unsigned long unused)
-{
-	if (!atomic_dec_and_test(&ticks)) {
-		bcm47xx_wdt_hw_start();
-		mod_timer(&wdt_timer, jiffies + HZ);
-	} else {
-		pr_crit("Watchdog will fire soon!!!\n");
-	}
+	wdt->timer_set_ms(wdt, wdd->timeout * 1000);
+
+	return 0;
 }
 
-static inline void bcm47xx_wdt_pet(void)
+static int bcm47xx_wdt_hard_start(struct watchdog_device *wdd)
 {
-	atomic_set(&ticks, wdt_time);
+	return 0;
 }
 
-static void bcm47xx_wdt_start(void)
+static int bcm47xx_wdt_hard_stop(struct watchdog_device *wdd)
 {
-	bcm47xx_wdt_pet();
-	bcm47xx_timer_tick(0);
+	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
+
+	wdt->timer_set(wdt, 0);
+
+	return 0;
 }
 
-static void bcm47xx_wdt_pause(void)
+static int bcm47xx_wdt_hard_set_timeout(struct watchdog_device *wdd,
+					unsigned int new_time)
 {
-	del_timer_sync(&wdt_timer);
-	bcm47xx_wdt_hw_stop();
+	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
+	u32 max_timer = wdt->max_timer_ms;
+
+	if (new_time < 1 || new_time > max_timer / 1000) {
+		pr_warn("timeout value must be 1<=x<=%d, using %d\n",
+			max_timer / 1000, new_time);
+		return -EINVAL;
+	}
+
+	wdd->timeout = new_time;
+	return 0;
 }
 
-static void bcm47xx_wdt_stop(void)
+static struct watchdog_ops bcm47xx_wdt_hard_ops = {
+	.owner		= THIS_MODULE,
+	.start		= bcm47xx_wdt_hard_start,
+	.stop		= bcm47xx_wdt_hard_stop,
+	.ping		= bcm47xx_wdt_hard_keepalive,
+	.set_timeout	= bcm47xx_wdt_hard_set_timeout,
+};
+
+static void bcm47xx_wdt_soft_timer_tick(unsigned long data)
 {
-	bcm47xx_wdt_pause();
+	struct bcm47xx_wdt *wdt = (struct bcm47xx_wdt *)data;
+	u32 next_tick = min(wdt->wdd.timeout * 1000, wdt->max_timer_ms);
+
+	if (!atomic_dec_and_test(&wdt->soft_ticks)) {
+		wdt->timer_set_ms(wdt, next_tick);
+		mod_timer(&wdt->soft_timer, jiffies + HZ);
+	} else {
+		pr_crit("Watchdog will fire soon!!!\n");
+	}
 }
 
-static int bcm47xx_wdt_settimeout(int new_time)
+static int bcm47xx_wdt_soft_keepalive(struct watchdog_device *wdd)
 {
-	if ((new_time <= 0) || (new_time > WDT_MAX_TIME))
-		return -EINVAL;
+	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
+
+	atomic_set(&wdt->soft_ticks, wdd->timeout);
 
-	wdt_time = new_time;
 	return 0;
 }
 
-static int bcm47xx_wdt_open(struct inode *inode, struct file *file)
+static int bcm47xx_wdt_soft_start(struct watchdog_device *wdd)
 {
-	if (test_and_set_bit(0, &bcm47xx_wdt_busy))
-		return -EBUSY;
+	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
+
+	bcm47xx_wdt_soft_keepalive(wdd);
+	bcm47xx_wdt_soft_timer_tick((unsigned long)wdt);
 
-	bcm47xx_wdt_start();
-	return nonseekable_open(inode, file);
+	return 0;
 }
 
-static int bcm47xx_wdt_release(struct inode *inode, struct file *file)
+static int bcm47xx_wdt_soft_stop(struct watchdog_device *wdd)
 {
-	if (expect_release == 42) {
-		bcm47xx_wdt_stop();
-	} else {
-		pr_crit("Unexpected close, not stopping watchdog!\n");
-		bcm47xx_wdt_start();
-	}
+	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
+
+	del_timer_sync(&wdt->soft_timer);
+	wdt->timer_set(wdt, 0);
 
-	clear_bit(0, &bcm47xx_wdt_busy);
-	expect_release = 0;
 	return 0;
 }
 
-static ssize_t bcm47xx_wdt_write(struct file *file, const char __user *data,
-				size_t len, loff_t *ppos)
+static int bcm47xx_wdt_soft_set_timeout(struct watchdog_device *wdd,
+					unsigned int new_time)
 {
-	if (len) {
-		if (!nowayout) {
-			size_t i;
-
-			expect_release = 0;
-
-			for (i = 0; i != len; i++) {
-				char c;
-				if (get_user(c, data + i))
-					return -EFAULT;
-				if (c == 'V')
-					expect_release = 42;
-			}
-		}
-		bcm47xx_wdt_pet();
+	if (new_time < 1 || new_time > WDT_SOFTTIMER_MAX) {
+		pr_warn("timeout value must be 1<=x<=%d, using %d\n",
+			WDT_SOFTTIMER_MAX, new_time);
+		return -EINVAL;
 	}
-	return len;
+
+	wdd->timeout = new_time;
+	return 0;
 }
 
 static const struct watchdog_info bcm47xx_wdt_info = {
@@ -180,130 +159,100 @@ static const struct watchdog_info bcm47xx_wdt_info = {
 				WDIOF_MAGICCLOSE,
 };
 
-static long bcm47xx_wdt_ioctl(struct file *file,
-					unsigned int cmd, unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-	int new_value, retval = -EINVAL;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		return copy_to_user(argp, &bcm47xx_wdt_info,
-				sizeof(bcm47xx_wdt_info)) ? -EFAULT : 0;
-
-	case WDIOC_GETSTATUS:
-	case WDIOC_GETBOOTSTATUS:
-		return put_user(0, p);
-
-	case WDIOC_SETOPTIONS:
-		if (get_user(new_value, p))
-			return -EFAULT;
-
-		if (new_value & WDIOS_DISABLECARD) {
-			bcm47xx_wdt_stop();
-			retval = 0;
-		}
-
-		if (new_value & WDIOS_ENABLECARD) {
-			bcm47xx_wdt_start();
-			retval = 0;
-		}
-
-		return retval;
-
-	case WDIOC_KEEPALIVE:
-		bcm47xx_wdt_pet();
-		return 0;
-
-	case WDIOC_SETTIMEOUT:
-		if (get_user(new_value, p))
-			return -EFAULT;
-
-		if (bcm47xx_wdt_settimeout(new_value))
-			return -EINVAL;
-
-		bcm47xx_wdt_pet();
-
-	case WDIOC_GETTIMEOUT:
-		return put_user(wdt_time, p);
-
-	default:
-		return -ENOTTY;
-	}
-}
-
 static int bcm47xx_wdt_notify_sys(struct notifier_block *this,
-	unsigned long code, void *unused)
+				  unsigned long code, void *unused)
 {
+	struct bcm47xx_wdt *wdt;
+
+	wdt = container_of(this, struct bcm47xx_wdt, notifier);
 	if (code == SYS_DOWN || code == SYS_HALT)
-		bcm47xx_wdt_stop();
+		wdt->wdd.ops->stop(&wdt->wdd);
 	return NOTIFY_DONE;
 }
 
-static const struct file_operations bcm47xx_wdt_fops = {
+static struct watchdog_ops bcm47xx_wdt_soft_ops = {
 	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.unlocked_ioctl	= bcm47xx_wdt_ioctl,
-	.open		= bcm47xx_wdt_open,
-	.release	= bcm47xx_wdt_release,
-	.write		= bcm47xx_wdt_write,
-};
-
-static struct miscdevice bcm47xx_wdt_miscdev = {
-	.minor		= WATCHDOG_MINOR,
-	.name		= "watchdog",
-	.fops		= &bcm47xx_wdt_fops,
-};
-
-static struct notifier_block bcm47xx_wdt_notifier = {
-	.notifier_call = bcm47xx_wdt_notify_sys,
+	.start		= bcm47xx_wdt_soft_start,
+	.stop		= bcm47xx_wdt_soft_stop,
+	.ping		= bcm47xx_wdt_soft_keepalive,
+	.set_timeout	= bcm47xx_wdt_soft_set_timeout,
 };
 
-static int __init bcm47xx_wdt_init(void)
+static int bcm47xx_wdt_probe(struct platform_device *pdev)
 {
 	int ret;
+	bool soft;
+	struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev);
 
-	if (bcm47xx_wdt_hw_stop() < 0)
-		return -ENODEV;
+	if (!wdt)
+		return -ENXIO;
 
-	setup_timer(&wdt_timer, bcm47xx_timer_tick, 0L);
+	soft = wdt->max_timer_ms < WDT_SOFTTIMER_THRESHOLD * 1000;
 
-	if (bcm47xx_wdt_settimeout(wdt_time)) {
-		bcm47xx_wdt_settimeout(WDT_DEFAULT_TIME);
-		pr_info("wdt_time value must be 0 < wdt_time < %d, using %d\n",
-			(WDT_MAX_TIME + 1), wdt_time);
+	if (soft) {
+		wdt->wdd.ops = &bcm47xx_wdt_soft_ops;
+		setup_timer(&wdt->soft_timer, bcm47xx_wdt_soft_timer_tick,
+			    (long unsigned int)wdt);
+	} else {
+		wdt->wdd.ops = &bcm47xx_wdt_hard_ops;
 	}
 
-	ret = register_reboot_notifier(&bcm47xx_wdt_notifier);
+	wdt->wdd.info = &bcm47xx_wdt_info;
+	wdt->wdd.timeout = WDT_DEFAULT_TIME;
+	ret = wdt->wdd.ops->set_timeout(&wdt->wdd, timeout);
 	if (ret)
-		return ret;
+		goto err_timer;
+	watchdog_set_nowayout(&wdt->wdd, nowayout);
 
-	ret = misc_register(&bcm47xx_wdt_miscdev);
-	if (ret) {
-		unregister_reboot_notifier(&bcm47xx_wdt_notifier);
-		return ret;
-	}
+	wdt->notifier.notifier_call = &bcm47xx_wdt_notify_sys;
+
+	ret = register_reboot_notifier(&wdt->notifier);
+	if (ret)
+		goto err_timer;
 
-	pr_info("BCM47xx Watchdog Timer enabled (%d seconds%s)\n",
-		wdt_time, nowayout ? ", nowayout" : "");
+	ret = watchdog_register_device(&wdt->wdd);
+	if (ret)
+		goto err_notifier;
+
+	dev_info(&pdev->dev, "BCM47xx Watchdog Timer enabled (%d seconds%s%s)\n",
+		timeout, nowayout ? ", nowayout" : "",
+		soft ? ", Software Timer" : "");
 	return 0;
+
+err_notifier:
+	unregister_reboot_notifier(&wdt->notifier);
+err_timer:
+	if (soft)
+		del_timer_sync(&wdt->soft_timer);
+
+	return ret;
 }
 
-static void __exit bcm47xx_wdt_exit(void)
+static int bcm47xx_wdt_remove(struct platform_device *pdev)
 {
-	if (!nowayout)
-		bcm47xx_wdt_stop();
+	struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev);
 
-	misc_deregister(&bcm47xx_wdt_miscdev);
+	if (!wdt)
+		return -ENXIO;
 
-	unregister_reboot_notifier(&bcm47xx_wdt_notifier);
+	watchdog_unregister_device(&wdt->wdd);
+	unregister_reboot_notifier(&wdt->notifier);
+
+	return 0;
 }
 
-module_init(bcm47xx_wdt_init);
-module_exit(bcm47xx_wdt_exit);
+static struct platform_driver bcm47xx_wdt_driver = {
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "bcm47xx-wdt",
+	},
+	.probe		= bcm47xx_wdt_probe,
+	.remove		= bcm47xx_wdt_remove,
+};
+
+module_platform_driver(bcm47xx_wdt_driver);
 
 MODULE_AUTHOR("Aleksandar Radovanovic");
+MODULE_AUTHOR("Hauke Mehrtens <hauke@hauke-m.de>");
 MODULE_DESCRIPTION("Watchdog driver for Broadcom BCM47xx");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
index c0bc92d8e438..a8dbceb32914 100644
--- a/drivers/watchdog/booke_wdt.c
+++ b/drivers/watchdog/booke_wdt.c
@@ -15,12 +15,8 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
-#include <linux/fs.h>
 #include <linux/smp.h>
-#include <linux/miscdevice.h>
-#include <linux/notifier.h>
 #include <linux/watchdog.h>
-#include <linux/uaccess.h>
 
 #include <asm/reg_booke.h>
 #include <asm/time.h>
@@ -45,7 +41,7 @@ u32 booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT;
 #define WDTP_MASK	(TCR_WP_MASK)
 #endif
 
-static DEFINE_SPINLOCK(booke_wdt_lock);
+#ifdef CONFIG_PPC_FSL_BOOK3E
 
 /* For the specified period, determine the number of seconds
  * corresponding to the reset time.  There will be a watchdog
@@ -86,6 +82,24 @@ static unsigned int sec_to_period(unsigned int secs)
 	return 0;
 }
 
+#define MAX_WDT_TIMEOUT		period_to_sec(1)
+
+#else /* CONFIG_PPC_FSL_BOOK3E */
+
+static unsigned long long period_to_sec(unsigned int period)
+{
+	return period;
+}
+
+static unsigned int sec_to_period(unsigned int secs)
+{
+	return secs;
+}
+
+#define MAX_WDT_TIMEOUT		3	/* from Kconfig */
+
+#endif /* !CONFIG_PPC_FSL_BOOK3E */
+
 static void __booke_wdt_set(void *data)
 {
 	u32 val;
@@ -107,9 +121,11 @@ static void __booke_wdt_ping(void *data)
 	mtspr(SPRN_TSR, TSR_ENW|TSR_WIS);
 }
 
-static void booke_wdt_ping(void)
+static int booke_wdt_ping(struct watchdog_device *wdog)
 {
 	on_each_cpu(__booke_wdt_ping, NULL, 0);
+
+	return 0;
 }
 
 static void __booke_wdt_enable(void *data)
@@ -146,152 +162,81 @@ static void __booke_wdt_disable(void *data)
 
 }
 
-static ssize_t booke_wdt_write(struct file *file, const char __user *buf,
-				size_t count, loff_t *ppos)
+static void __booke_wdt_start(struct watchdog_device *wdog)
 {
-	booke_wdt_ping();
-	return count;
+	on_each_cpu(__booke_wdt_enable, NULL, 0);
+	pr_debug("watchdog enabled (timeout = %u sec)\n", wdog->timeout);
 }
 
-static struct watchdog_info ident = {
-	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
-	.identity = "PowerPC Book-E Watchdog",
-};
-
-static long booke_wdt_ioctl(struct file *file,
-				unsigned int cmd, unsigned long arg)
+static int booke_wdt_start(struct watchdog_device *wdog)
 {
-	u32 tmp = 0;
-	u32 __user *p = (u32 __user *)arg;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		return copy_to_user(p, &ident, sizeof(ident)) ? -EFAULT : 0;
-	case WDIOC_GETSTATUS:
-		return put_user(0, p);
-	case WDIOC_GETBOOTSTATUS:
-		/* XXX: something is clearing TSR */
-		tmp = mfspr(SPRN_TSR) & TSR_WRS(3);
-		/* returns CARDRESET if last reset was caused by the WDT */
-		return put_user((tmp ? WDIOF_CARDRESET : 0), p);
-	case WDIOC_SETOPTIONS:
-		if (get_user(tmp, p))
-			return -EFAULT;
-		if (tmp == WDIOS_ENABLECARD) {
-			booke_wdt_ping();
-			break;
-		} else
-			return -EINVAL;
-		return 0;
-	case WDIOC_KEEPALIVE:
-		booke_wdt_ping();
-		return 0;
-	case WDIOC_SETTIMEOUT:
-		if (get_user(tmp, p))
-			return -EFAULT;
-#ifdef	CONFIG_PPC_FSL_BOOK3E
-		/* period of 1 gives the largest possible timeout */
-		if (tmp > period_to_sec(1))
-			return -EINVAL;
-		booke_wdt_period = sec_to_period(tmp);
-#else
-		booke_wdt_period = tmp;
-#endif
-		booke_wdt_set();
-		/* Fall */
-	case WDIOC_GETTIMEOUT:
-#ifdef	CONFIG_FSL_BOOKE
-		return put_user(period_to_sec(booke_wdt_period), p);
-#else
-		return put_user(booke_wdt_period, p);
-#endif
-	default:
-		return -ENOTTY;
-	}
-
-	return 0;
-}
-
-/* wdt_is_active stores whether or not the /dev/watchdog device is opened */
-static unsigned long wdt_is_active;
-
-static int booke_wdt_open(struct inode *inode, struct file *file)
-{
-	/* /dev/watchdog can only be opened once */
-	if (test_and_set_bit(0, &wdt_is_active))
-		return -EBUSY;
-
-	spin_lock(&booke_wdt_lock);
 	if (booke_wdt_enabled == 0) {
 		booke_wdt_enabled = 1;
-		on_each_cpu(__booke_wdt_enable, NULL, 0);
-		pr_debug("watchdog enabled (timeout = %llu sec)\n",
-			 period_to_sec(booke_wdt_period));
+		__booke_wdt_start(wdog);
 	}
-	spin_unlock(&booke_wdt_lock);
-
-	return nonseekable_open(inode, file);
+	return 0;
 }
 
-static int booke_wdt_release(struct inode *inode, struct file *file)
+static int booke_wdt_stop(struct watchdog_device *wdog)
 {
-#ifndef CONFIG_WATCHDOG_NOWAYOUT
-	/* Normally, the watchdog is disabled when /dev/watchdog is closed, but
-	 * if CONFIG_WATCHDOG_NOWAYOUT is defined, then it means that the
-	 * watchdog should remain enabled.  So we disable it only if
-	 * CONFIG_WATCHDOG_NOWAYOUT is not defined.
-	 */
 	on_each_cpu(__booke_wdt_disable, NULL, 0);
 	booke_wdt_enabled = 0;
 	pr_debug("watchdog disabled\n");
-#endif
 
-	clear_bit(0, &wdt_is_active);
+	return 0;
+}
+
+static int booke_wdt_set_timeout(struct watchdog_device *wdt_dev,
+				 unsigned int timeout)
+{
+	if (timeout > MAX_WDT_TIMEOUT)
+		return -EINVAL;
+	booke_wdt_period = sec_to_period(timeout);
+	wdt_dev->timeout = timeout;
+	booke_wdt_set();
 
 	return 0;
 }
 
-static const struct file_operations booke_wdt_fops = {
+static struct watchdog_info booke_wdt_info = {
+	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
+	.identity = "PowerPC Book-E Watchdog",
+};
+
+static struct watchdog_ops booke_wdt_ops = {
 	.owner = THIS_MODULE,
-	.llseek = no_llseek,
-	.write = booke_wdt_write,
-	.unlocked_ioctl = booke_wdt_ioctl,
-	.open = booke_wdt_open,
-	.release = booke_wdt_release,
+	.start = booke_wdt_start,
+	.stop = booke_wdt_stop,
+	.ping = booke_wdt_ping,
+	.set_timeout = booke_wdt_set_timeout,
 };
 
-static struct miscdevice booke_wdt_miscdev = {
-	.minor = WATCHDOG_MINOR,
-	.name = "watchdog",
-	.fops = &booke_wdt_fops,
+static struct watchdog_device booke_wdt_dev = {
+	.info = &booke_wdt_info,
+	.ops = &booke_wdt_ops,
+	.min_timeout = 1,
+	.max_timeout = 0xFFFF
 };
 
 static void __exit booke_wdt_exit(void)
 {
-	misc_deregister(&booke_wdt_miscdev);
+	watchdog_unregister_device(&booke_wdt_dev);
 }
 
 static int __init booke_wdt_init(void)
 {
 	int ret = 0;
+	bool nowayout = WATCHDOG_NOWAYOUT;
 
 	pr_info("powerpc book-e watchdog driver loaded\n");
-	ident.firmware_version = cur_cpu_spec->pvr_value;
-
-	ret = misc_register(&booke_wdt_miscdev);
-	if (ret) {
-		pr_err("cannot register device (minor=%u, ret=%i)\n",
-		       WATCHDOG_MINOR, ret);
-		return ret;
-	}
-
-	spin_lock(&booke_wdt_lock);
-	if (booke_wdt_enabled == 1) {
-		pr_info("watchdog enabled (timeout = %llu sec)\n",
-			period_to_sec(booke_wdt_period));
-		on_each_cpu(__booke_wdt_enable, NULL, 0);
-	}
-	spin_unlock(&booke_wdt_lock);
+	booke_wdt_info.firmware_version = cur_cpu_spec->pvr_value;
+	booke_wdt_set_timeout(&booke_wdt_dev,
+			      period_to_sec(CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT));
+	watchdog_set_nowayout(&booke_wdt_dev, nowayout);
+	if (booke_wdt_enabled)
+		__booke_wdt_start(&booke_wdt_dev);
+
+	ret = watchdog_register_device(&booke_wdt_dev);
 
 	return ret;
 }
diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index e8e87246ea6d..7df1fdca9e78 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -69,7 +69,6 @@ static unsigned long wdt_status;
 #define WDT_REGION_INITED 2
 #define WDT_DEVICE_INITED 3
 
-static struct resource	*wdt_mem;
 static void __iomem	*wdt_base;
 struct clk		*wdt_clk;
 
@@ -201,10 +200,11 @@ static struct miscdevice davinci_wdt_miscdev = {
 
 static int davinci_wdt_probe(struct platform_device *pdev)
 {
-	int ret = 0, size;
+	int ret = 0;
 	struct device *dev = &pdev->dev;
+	struct resource  *wdt_mem;
 
-	wdt_clk = clk_get(dev, NULL);
+	wdt_clk = devm_clk_get(dev, NULL);
 	if (WARN_ON(IS_ERR(wdt_clk)))
 		return PTR_ERR(wdt_clk);
 
@@ -221,43 +221,26 @@ static int davinci_wdt_probe(struct platform_device *pdev)
 		return -ENOENT;
 	}
 
-	size = resource_size(wdt_mem);
-	if (!request_mem_region(wdt_mem->start, size, pdev->name)) {
-		dev_err(dev, "failed to get memory region\n");
-		return -ENOENT;
-	}
-
-	wdt_base = ioremap(wdt_mem->start, size);
+	wdt_base = devm_request_and_ioremap(dev, wdt_mem);
 	if (!wdt_base) {
-		dev_err(dev, "failed to map memory region\n");
-		release_mem_region(wdt_mem->start, size);
-		wdt_mem = NULL;
-		return -ENOMEM;
+		dev_err(dev, "ioremap failed\n");
+		return -EADDRNOTAVAIL;
 	}
 
 	ret = misc_register(&davinci_wdt_miscdev);
 	if (ret < 0) {
 		dev_err(dev, "cannot register misc device\n");
-		release_mem_region(wdt_mem->start, size);
-		wdt_mem = NULL;
 	} else {
 		set_bit(WDT_DEVICE_INITED, &wdt_status);
 	}
 
-	iounmap(wdt_base);
 	return ret;
 }
 
 static int davinci_wdt_remove(struct platform_device *pdev)
 {
 	misc_deregister(&davinci_wdt_miscdev);
-	if (wdt_mem) {
-		release_mem_region(wdt_mem->start, resource_size(wdt_mem));
-		wdt_mem = NULL;
-	}
-
 	clk_disable_unprepare(wdt_clk);
-	clk_put(wdt_clk);
 
 	return 0;
 }
diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c
index b9c5b58e59d3..257cfbad21da 100644
--- a/drivers/watchdog/gef_wdt.c
+++ b/drivers/watchdog/gef_wdt.c
@@ -310,6 +310,7 @@ static struct platform_driver gef_wdt_driver = {
 		.of_match_table = gef_wdt_ids,
 	},
 	.probe		= gef_wdt_probe,
+	.remove		= gef_wdt_remove,
 };
 
 static int __init gef_wdt_init(void)
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index b0e541d022e6..af88ffd1068f 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -45,6 +45,11 @@
 
 #include "omap_wdt.h"
 
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
+	"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
 static unsigned timer_margin;
 module_param(timer_margin, uint, 0);
 MODULE_PARM_DESC(timer_margin, "initial watchdog timeout (in seconds)");
@@ -201,7 +206,6 @@ static const struct watchdog_ops omap_wdt_ops = {
 static int omap_wdt_probe(struct platform_device *pdev)
 {
 	struct omap_wd_timer_platform_data *pdata = pdev->dev.platform_data;
-	bool nowayout = WATCHDOG_NOWAYOUT;
 	struct watchdog_device *omap_wdt;
 	struct resource *res, *mem;
 	struct omap_wdt_dev *wdev;
diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index 7c18b3bffcf7..da577980d390 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c
@@ -140,6 +140,7 @@ static const struct watchdog_ops orion_wdt_ops = {
 static struct watchdog_device orion_wdt = {
 	.info = &orion_wdt_info,
 	.ops = &orion_wdt_ops,
+	.min_timeout = 1,
 };
 
 static int orion_wdt_probe(struct platform_device *pdev)
@@ -164,12 +165,9 @@ static int orion_wdt_probe(struct platform_device *pdev)
 
 	wdt_max_duration = WDT_MAX_CYCLE_COUNT / wdt_tclk;
 
-	if ((heartbeat < 1) || (heartbeat > wdt_max_duration))
-		heartbeat = wdt_max_duration;
-
-	orion_wdt.timeout = heartbeat;
-	orion_wdt.min_timeout = 1;
+	orion_wdt.timeout = wdt_max_duration;
 	orion_wdt.max_timeout = wdt_max_duration;
+	watchdog_init_timeout(&orion_wdt, heartbeat, &pdev->dev);
 
 	watchdog_set_nowayout(&orion_wdt, nowayout);
 	ret = watchdog_register_device(&orion_wdt);
@@ -179,7 +177,7 @@ static int orion_wdt_probe(struct platform_device *pdev)
 	}
 
 	pr_info("Initial timeout %d sec%s\n",
-		heartbeat, nowayout ? ", nowayout" : "");
+		orion_wdt.timeout, nowayout ? ", nowayout" : "");
 	return 0;
 }
 
@@ -225,4 +223,5 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:orion_wdt");
 MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c
index de1f3fa1d787..a3684a30eb69 100644
--- a/drivers/watchdog/pnx4008_wdt.c
+++ b/drivers/watchdog/pnx4008_wdt.c
@@ -142,6 +142,7 @@ static const struct watchdog_ops pnx4008_wdt_ops = {
 static struct watchdog_device pnx4008_wdd = {
 	.info = &pnx4008_wdt_ident,
 	.ops = &pnx4008_wdt_ops,
+	.timeout = DEFAULT_HEARTBEAT,
 	.min_timeout = 1,
 	.max_timeout = MAX_HEARTBEAT,
 };
@@ -151,8 +152,7 @@ static int pnx4008_wdt_probe(struct platform_device *pdev)
 	struct resource *r;
 	int ret = 0;
 
-	if (heartbeat < 1 || heartbeat > MAX_HEARTBEAT)
-		heartbeat = DEFAULT_HEARTBEAT;
+	watchdog_init_timeout(&pnx4008_wdd, heartbeat, &pdev->dev);
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	wdt_base = devm_ioremap_resource(&pdev->dev, r);
@@ -167,7 +167,6 @@ static int pnx4008_wdt_probe(struct platform_device *pdev)
 	if (ret)
 		goto out;
 
-	pnx4008_wdd.timeout = heartbeat;
 	pnx4008_wdd.bootstatus = (readl(WDTIM_RES(wdt_base)) & WDOG_RESET) ?
 			WDIOF_CARDRESET : 0;
 	watchdog_set_nowayout(&pnx4008_wdd, nowayout);
@@ -181,7 +180,7 @@ static int pnx4008_wdt_probe(struct platform_device *pdev)
 	}
 
 	dev_info(&pdev->dev, "PNX4008 Watchdog Timer: heartbeat %d sec\n",
-			heartbeat);
+		 pnx4008_wdd.timeout);
 
 	return 0;
 
diff --git a/drivers/watchdog/retu_wdt.c b/drivers/watchdog/retu_wdt.c
new file mode 100644
index 000000000000..f53615dc633d
--- /dev/null
+++ b/drivers/watchdog/retu_wdt.c
@@ -0,0 +1,178 @@
+/*
+ * Retu watchdog driver
+ *
+ * Copyright (C) 2004, 2005 Nokia Corporation
+ *
+ * Based on code written by Amit Kucheria and Michael Buesch.
+ * Rewritten by Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mfd/retu.h>
+#include <linux/watchdog.h>
+#include <linux/platform_device.h>
+
+/* Watchdog timer values in seconds */
+#define RETU_WDT_MAX_TIMER	63
+
+struct retu_wdt_dev {
+	struct retu_dev		*rdev;
+	struct device		*dev;
+	struct delayed_work	ping_work;
+};
+
+/*
+ * Since Retu watchdog cannot be disabled in hardware, we must kick it
+ * with a timer until userspace watchdog software takes over. If
+ * CONFIG_WATCHDOG_NOWAYOUT is set, we never start the feeding.
+ */
+static void retu_wdt_ping_enable(struct retu_wdt_dev *wdev)
+{
+	retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER);
+	schedule_delayed_work(&wdev->ping_work,
+			round_jiffies_relative(RETU_WDT_MAX_TIMER * HZ / 2));
+}
+
+static void retu_wdt_ping_disable(struct retu_wdt_dev *wdev)
+{
+	retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER);
+	cancel_delayed_work_sync(&wdev->ping_work);
+}
+
+static void retu_wdt_ping_work(struct work_struct *work)
+{
+	struct retu_wdt_dev *wdev = container_of(to_delayed_work(work),
+						struct retu_wdt_dev, ping_work);
+	retu_wdt_ping_enable(wdev);
+}
+
+static int retu_wdt_start(struct watchdog_device *wdog)
+{
+	struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
+
+	retu_wdt_ping_disable(wdev);
+
+	return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout);
+}
+
+static int retu_wdt_stop(struct watchdog_device *wdog)
+{
+	struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
+
+	retu_wdt_ping_enable(wdev);
+
+	return 0;
+}
+
+static int retu_wdt_ping(struct watchdog_device *wdog)
+{
+	struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
+
+	return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout);
+}
+
+static int retu_wdt_set_timeout(struct watchdog_device *wdog,
+				unsigned int timeout)
+{
+	struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
+
+	wdog->timeout = timeout;
+	return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout);
+}
+
+static const struct watchdog_info retu_wdt_info = {
+	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
+	.identity = "Retu watchdog",
+};
+
+static const struct watchdog_ops retu_wdt_ops = {
+	.owner		= THIS_MODULE,
+	.start		= retu_wdt_start,
+	.stop		= retu_wdt_stop,
+	.ping		= retu_wdt_ping,
+	.set_timeout	= retu_wdt_set_timeout,
+};
+
+static int retu_wdt_probe(struct platform_device *pdev)
+{
+	struct retu_dev *rdev = dev_get_drvdata(pdev->dev.parent);
+	bool nowayout = WATCHDOG_NOWAYOUT;
+	struct watchdog_device *retu_wdt;
+	struct retu_wdt_dev *wdev;
+	int ret;
+
+	retu_wdt = devm_kzalloc(&pdev->dev, sizeof(*retu_wdt), GFP_KERNEL);
+	if (!retu_wdt)
+		return -ENOMEM;
+
+	wdev = devm_kzalloc(&pdev->dev, sizeof(*wdev), GFP_KERNEL);
+	if (!wdev)
+		return -ENOMEM;
+
+	retu_wdt->info		= &retu_wdt_info;
+	retu_wdt->ops		= &retu_wdt_ops;
+	retu_wdt->timeout	= RETU_WDT_MAX_TIMER;
+	retu_wdt->min_timeout	= 0;
+	retu_wdt->max_timeout	= RETU_WDT_MAX_TIMER;
+
+	watchdog_set_drvdata(retu_wdt, wdev);
+	watchdog_set_nowayout(retu_wdt, nowayout);
+
+	wdev->rdev		= rdev;
+	wdev->dev		= &pdev->dev;
+
+	INIT_DELAYED_WORK(&wdev->ping_work, retu_wdt_ping_work);
+
+	ret = watchdog_register_device(retu_wdt);
+	if (ret < 0)
+		return ret;
+
+	if (nowayout)
+		retu_wdt_ping(retu_wdt);
+	else
+		retu_wdt_ping_enable(wdev);
+
+	platform_set_drvdata(pdev, retu_wdt);
+
+	return 0;
+}
+
+static int retu_wdt_remove(struct platform_device *pdev)
+{
+	struct watchdog_device *wdog = platform_get_drvdata(pdev);
+	struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
+
+	watchdog_unregister_device(wdog);
+	cancel_delayed_work_sync(&wdev->ping_work);
+
+	return 0;
+}
+
+static struct platform_driver retu_wdt_driver = {
+	.probe		= retu_wdt_probe,
+	.remove		= retu_wdt_remove,
+	.driver		= {
+		.name	= "retu-wdt",
+	},
+};
+module_platform_driver(retu_wdt_driver);
+
+MODULE_ALIAS("platform:retu-wdt");
+MODULE_DESCRIPTION("Retu watchdog");
+MODULE_AUTHOR("Amit Kucheria");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 27bcd4e2c4a4..c1a221cbeae4 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -53,7 +53,7 @@
 #define CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME	(15)
 
 static bool nowayout	= WATCHDOG_NOWAYOUT;
-static int tmr_margin	= CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME;
+static int tmr_margin;
 static int tmr_atboot	= CONFIG_S3C2410_WATCHDOG_ATBOOT;
 static int soft_noboot;
 static int debug;
@@ -226,6 +226,7 @@ static struct watchdog_ops s3c2410wdt_ops = {
 static struct watchdog_device s3c2410_wdd = {
 	.info = &s3c2410_wdt_ident,
 	.ops = &s3c2410wdt_ops,
+	.timeout = CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME,
 };
 
 /* interrupt handler code */
@@ -309,7 +310,6 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	unsigned int wtcon;
 	int started = 0;
 	int ret;
-	int size;
 
 	DBG("%s: probe=%p\n", __func__, pdev);
 
@@ -330,28 +330,20 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	}
 
 	/* get the memory region for the watchdog timer */
-
-	size = resource_size(wdt_mem);
-	if (!request_mem_region(wdt_mem->start, size, pdev->name)) {
-		dev_err(dev, "failed to get memory region\n");
-		ret = -EBUSY;
-		goto err;
-	}
-
-	wdt_base = ioremap(wdt_mem->start, size);
+	wdt_base = devm_request_and_ioremap(dev, wdt_mem);
 	if (wdt_base == NULL) {
-		dev_err(dev, "failed to ioremap() region\n");
-		ret = -EINVAL;
-		goto err_req;
+		dev_err(dev, "failed to devm_request_and_ioremap() region\n");
+		ret = -ENOMEM;
+		goto err;
 	}
 
 	DBG("probe: mapped wdt_base=%p\n", wdt_base);
 
-	wdt_clock = clk_get(&pdev->dev, "watchdog");
+	wdt_clock = devm_clk_get(dev, "watchdog");
 	if (IS_ERR(wdt_clock)) {
 		dev_err(dev, "failed to find watchdog clock source\n");
 		ret = PTR_ERR(wdt_clock);
-		goto err_map;
+		goto err;
 	}
 
 	clk_prepare_enable(wdt_clock);
@@ -365,7 +357,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	/* see if we can actually set the requested timer margin, and if
 	 * not, try the default value */
 
-	if (s3c2410wdt_set_heartbeat(&s3c2410_wdd, tmr_margin)) {
+	watchdog_init_timeout(&s3c2410_wdd, tmr_margin,  &pdev->dev);
+	if (s3c2410wdt_set_heartbeat(&s3c2410_wdd, s3c2410_wdd.timeout)) {
 		started = s3c2410wdt_set_heartbeat(&s3c2410_wdd,
 					CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME);
 
@@ -378,7 +371,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 							"cannot start\n");
 	}
 
-	ret = request_irq(wdt_irq->start, s3c2410wdt_irq, 0, pdev->name, pdev);
+	ret = devm_request_irq(dev, wdt_irq->start, s3c2410wdt_irq, 0,
+				pdev->name, pdev);
 	if (ret != 0) {
 		dev_err(dev, "failed to install irq (%d)\n", ret);
 		goto err_cpufreq;
@@ -389,7 +383,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	ret = watchdog_register_device(&s3c2410_wdd);
 	if (ret) {
 		dev_err(dev, "cannot register watchdog (%d)\n", ret);
-		goto err_irq;
+		goto err_cpufreq;
 	}
 
 	if (tmr_atboot && started == 0) {
@@ -414,23 +408,13 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 
 	return 0;
 
- err_irq:
-	free_irq(wdt_irq->start, pdev);
-
  err_cpufreq:
 	s3c2410wdt_cpufreq_deregister();
 
  err_clk:
 	clk_disable_unprepare(wdt_clock);
-	clk_put(wdt_clock);
 	wdt_clock = NULL;
 
- err_map:
-	iounmap(wdt_base);
-
- err_req:
-	release_mem_region(wdt_mem->start, size);
-
  err:
 	wdt_irq = NULL;
 	wdt_mem = NULL;
@@ -441,17 +425,11 @@ static int s3c2410wdt_remove(struct platform_device *dev)
 {
 	watchdog_unregister_device(&s3c2410_wdd);
 
-	free_irq(wdt_irq->start, dev);
-
 	s3c2410wdt_cpufreq_deregister();
 
 	clk_disable_unprepare(wdt_clock);
-	clk_put(wdt_clock);
 	wdt_clock = NULL;
 
-	iounmap(wdt_base);
-
-	release_mem_region(wdt_mem->start, resource_size(wdt_mem));
 	wdt_irq = NULL;
 	wdt_mem = NULL;
 	return 0;
diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c
index 2b0e000d4377..e3b8f757d2d3 100644
--- a/drivers/watchdog/sp5100_tco.c
+++ b/drivers/watchdog/sp5100_tco.c
@@ -361,7 +361,7 @@ static unsigned char sp5100_tco_setupdevice(void)
 {
 	struct pci_dev *dev = NULL;
 	const char *dev_name = NULL;
-	u32 val;
+	u32 val, tmp_val;
 	u32 index_reg, data_reg, base_addr;
 
 	/* Match the PCI device */
@@ -497,30 +497,19 @@ static unsigned char sp5100_tco_setupdevice(void)
 		pr_debug("Got 0x%04x from resource tree\n", val);
 	}
 
-	/* Restore to the low three bits, if chipset is SB8x0(or later) */
-	if (sp5100_tco_pci->revision >= 0x40) {
-		u8 reserved_bit;
-		reserved_bit = inb(base_addr) & 0x7;
-		val |= (u32)reserved_bit;
-	}
+	/* Restore to the low three bits */
+	outb(base_addr+0, index_reg);
+	tmp_val = val | (inb(data_reg) & 0x7);
 
 	/* Re-programming the watchdog timer base address */
 	outb(base_addr+0, index_reg);
-	/* Low three bits of BASE are reserved */
-	outb((val >>  0) & 0xf8, data_reg);
+	outb((tmp_val >>  0) & 0xff, data_reg);
 	outb(base_addr+1, index_reg);
-	outb((val >>  8) & 0xff, data_reg);
+	outb((tmp_val >>  8) & 0xff, data_reg);
 	outb(base_addr+2, index_reg);
-	outb((val >> 16) & 0xff, data_reg);
+	outb((tmp_val >> 16) & 0xff, data_reg);
 	outb(base_addr+3, index_reg);
-	outb((val >> 24) & 0xff, data_reg);
-
-	/*
-	 * Clear unnecessary the low three bits,
-	 * if chipset is SB8x0(or later)
-	 */
-	if (sp5100_tco_pci->revision >= 0x40)
-		val &= ~0x7;
+	outb((tmp_val >> 24) & 0xff, data_reg);
 
 	if (!request_mem_region_exclusive(val, SP5100_WDT_MEM_MAP_SIZE,
 								   dev_name)) {
diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c
new file mode 100644
index 000000000000..c97e98dcde62
--- /dev/null
+++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c
@@ -0,0 +1,111 @@
+/*
+ * Watchdog driver for the RTC based watchdog in STMP3xxx and i.MX23/28
+ *
+ * Author: Wolfram Sang <w.sang@pengutronix.de>
+ *
+ * Copyright (C) 2011-12 Wolfram Sang, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/watchdog.h>
+#include <linux/platform_device.h>
+#include <linux/stmp3xxx_rtc_wdt.h>
+
+#define WDOG_TICK_RATE 1000 /* 1 kHz clock */
+#define STMP3XXX_DEFAULT_TIMEOUT 19
+#define STMP3XXX_MAX_TIMEOUT (UINT_MAX / WDOG_TICK_RATE)
+
+static int heartbeat = STMP3XXX_DEFAULT_TIMEOUT;
+module_param(heartbeat, uint, 0);
+MODULE_PARM_DESC(heartbeat, "Watchdog heartbeat period in seconds from 1 to "
+		 __MODULE_STRING(STMP3XXX_MAX_TIMEOUT) ", default "
+		 __MODULE_STRING(STMP3XXX_DEFAULT_TIMEOUT));
+
+static int wdt_start(struct watchdog_device *wdd)
+{
+	struct device *dev = watchdog_get_drvdata(wdd);
+	struct stmp3xxx_wdt_pdata *pdata = dev->platform_data;
+
+	pdata->wdt_set_timeout(dev->parent, wdd->timeout * WDOG_TICK_RATE);
+	return 0;
+}
+
+static int wdt_stop(struct watchdog_device *wdd)
+{
+	struct device *dev = watchdog_get_drvdata(wdd);
+	struct stmp3xxx_wdt_pdata *pdata = dev->platform_data;
+
+	pdata->wdt_set_timeout(dev->parent, 0);
+	return 0;
+}
+
+static int wdt_set_timeout(struct watchdog_device *wdd, unsigned new_timeout)
+{
+	wdd->timeout = new_timeout;
+	return wdt_start(wdd);
+}
+
+static const struct watchdog_info stmp3xxx_wdt_ident = {
+	.options = WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
+	.identity = "STMP3XXX RTC Watchdog",
+};
+
+static const struct watchdog_ops stmp3xxx_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = wdt_start,
+	.stop = wdt_stop,
+	.set_timeout = wdt_set_timeout,
+};
+
+static struct watchdog_device stmp3xxx_wdd = {
+	.info = &stmp3xxx_wdt_ident,
+	.ops = &stmp3xxx_wdt_ops,
+	.min_timeout = 1,
+	.max_timeout = STMP3XXX_MAX_TIMEOUT,
+	.status = WATCHDOG_NOWAYOUT_INIT_STATUS,
+};
+
+static int stmp3xxx_wdt_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	watchdog_set_drvdata(&stmp3xxx_wdd, &pdev->dev);
+
+	stmp3xxx_wdd.timeout = clamp_t(unsigned, heartbeat, 1, STMP3XXX_MAX_TIMEOUT);
+
+	ret = watchdog_register_device(&stmp3xxx_wdd);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cannot register watchdog device\n");
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "initialized watchdog with heartbeat %ds\n",
+			stmp3xxx_wdd.timeout);
+	return 0;
+}
+
+static int stmp3xxx_wdt_remove(struct platform_device *pdev)
+{
+	watchdog_unregister_device(&stmp3xxx_wdd);
+	return 0;
+}
+
+static struct platform_driver stmp3xxx_wdt_driver = {
+	.driver = {
+		.name = "stmp3xxx_rtc_wdt",
+	},
+	.probe = stmp3xxx_wdt_probe,
+	.remove = stmp3xxx_wdt_remove,
+};
+module_platform_driver(stmp3xxx_wdt_driver);
+
+MODULE_DESCRIPTION("STMP3XXX RTC Watchdog Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/stmp3xxx_wdt.c b/drivers/watchdog/stmp3xxx_wdt.c
deleted file mode 100644
index 1f4f69728fee..000000000000
--- a/drivers/watchdog/stmp3xxx_wdt.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Watchdog driver for Freescale STMP37XX/STMP378X
- *
- * Author: Vitaly Wool <vital@embeddedalley.com>
- *
- * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved.
- * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-#include <linux/watchdog.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
-#include <linux/module.h>
-
-#include <mach/platform.h>
-#include <mach/regs-rtc.h>
-
-#define DEFAULT_HEARTBEAT	19
-#define MAX_HEARTBEAT		(0x10000000 >> 6)
-
-/* missing bitmask in headers */
-#define BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER     0x80000000
-
-#define WDT_IN_USE		0
-#define WDT_OK_TO_CLOSE		1
-
-#define WDOG_COUNTER_RATE	1000 /* 1 kHz clock */
-
-static DEFINE_SPINLOCK(stmp3xxx_wdt_io_lock);
-static unsigned long wdt_status;
-static const bool nowayout = WATCHDOG_NOWAYOUT;
-static int heartbeat = DEFAULT_HEARTBEAT;
-static unsigned long boot_status;
-
-static void wdt_enable(u32 value)
-{
-	spin_lock(&stmp3xxx_wdt_io_lock);
-	__raw_writel(value, REGS_RTC_BASE + HW_RTC_WATCHDOG);
-	stmp3xxx_setl(BM_RTC_CTRL_WATCHDOGEN, REGS_RTC_BASE + HW_RTC_CTRL);
-	stmp3xxx_setl(BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER,
-			REGS_RTC_BASE + HW_RTC_PERSISTENT1);
-	spin_unlock(&stmp3xxx_wdt_io_lock);
-}
-
-static void wdt_disable(void)
-{
-	spin_lock(&stmp3xxx_wdt_io_lock);
-	stmp3xxx_clearl(BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER,
-			REGS_RTC_BASE + HW_RTC_PERSISTENT1);
-	stmp3xxx_clearl(BM_RTC_CTRL_WATCHDOGEN, REGS_RTC_BASE + HW_RTC_CTRL);
-	spin_unlock(&stmp3xxx_wdt_io_lock);
-}
-
-static void wdt_ping(void)
-{
-	wdt_enable(heartbeat * WDOG_COUNTER_RATE);
-}
-
-static int stmp3xxx_wdt_open(struct inode *inode, struct file *file)
-{
-	if (test_and_set_bit(WDT_IN_USE, &wdt_status))
-		return -EBUSY;
-
-	clear_bit(WDT_OK_TO_CLOSE, &wdt_status);
-	wdt_ping();
-
-	return nonseekable_open(inode, file);
-}
-
-static ssize_t stmp3xxx_wdt_write(struct file *file, const char __user *data,
-	size_t len, loff_t *ppos)
-{
-	if (len) {
-		if (!nowayout) {
-			size_t i;
-
-			clear_bit(WDT_OK_TO_CLOSE, &wdt_status);
-
-			for (i = 0; i != len; i++) {
-				char c;
-
-				if (get_user(c, data + i))
-					return -EFAULT;
-				if (c == 'V')
-					set_bit(WDT_OK_TO_CLOSE, &wdt_status);
-			}
-		}
-		wdt_ping();
-	}
-
-	return len;
-}
-
-static const struct watchdog_info ident = {
-	.options	= WDIOF_CARDRESET |
-			  WDIOF_MAGICCLOSE |
-			  WDIOF_SETTIMEOUT |
-			  WDIOF_KEEPALIVEPING,
-	.identity	= "STMP3XXX Watchdog",
-};
-
-static long stmp3xxx_wdt_ioctl(struct file *file, unsigned int cmd,
-	unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-	int new_heartbeat, opts;
-	int ret = -ENOTTY;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		ret = copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
-		break;
-
-	case WDIOC_GETSTATUS:
-		ret = put_user(0, p);
-		break;
-
-	case WDIOC_GETBOOTSTATUS:
-		ret = put_user(boot_status, p);
-		break;
-
-	case WDIOC_SETOPTIONS:
-		if (get_user(opts, p)) {
-			ret = -EFAULT;
-			break;
-		}
-		if (opts & WDIOS_DISABLECARD)
-			wdt_disable();
-		else if (opts & WDIOS_ENABLECARD)
-			wdt_ping();
-		else {
-			pr_debug("%s: unknown option 0x%x\n", __func__, opts);
-			ret = -EINVAL;
-			break;
-		}
-		ret = 0;
-		break;
-
-	case WDIOC_KEEPALIVE:
-		wdt_ping();
-		ret = 0;
-		break;
-
-	case WDIOC_SETTIMEOUT:
-		if (get_user(new_heartbeat, p)) {
-			ret = -EFAULT;
-			break;
-		}
-		if (new_heartbeat <= 0 || new_heartbeat > MAX_HEARTBEAT) {
-			ret = -EINVAL;
-			break;
-		}
-
-		heartbeat = new_heartbeat;
-		wdt_ping();
-		/* Fall through */
-
-	case WDIOC_GETTIMEOUT:
-		ret = put_user(heartbeat, p);
-		break;
-	}
-	return ret;
-}
-
-static int stmp3xxx_wdt_release(struct inode *inode, struct file *file)
-{
-	int ret = 0;
-
-	if (!nowayout) {
-		if (!test_bit(WDT_OK_TO_CLOSE, &wdt_status)) {
-			wdt_ping();
-			pr_debug("%s: Device closed unexpectedly\n", __func__);
-			ret = -EINVAL;
-		} else {
-			wdt_disable();
-			clear_bit(WDT_OK_TO_CLOSE, &wdt_status);
-		}
-	}
-	clear_bit(WDT_IN_USE, &wdt_status);
-
-	return ret;
-}
-
-static const struct file_operations stmp3xxx_wdt_fops = {
-	.owner = THIS_MODULE,
-	.llseek = no_llseek,
-	.write = stmp3xxx_wdt_write,
-	.unlocked_ioctl = stmp3xxx_wdt_ioctl,
-	.open = stmp3xxx_wdt_open,
-	.release = stmp3xxx_wdt_release,
-};
-
-static struct miscdevice stmp3xxx_wdt_miscdev = {
-	.minor = WATCHDOG_MINOR,
-	.name = "watchdog",
-	.fops = &stmp3xxx_wdt_fops,
-};
-
-static int stmp3xxx_wdt_probe(struct platform_device *pdev)
-{
-	int ret = 0;
-
-	if (heartbeat < 1 || heartbeat > MAX_HEARTBEAT)
-		heartbeat = DEFAULT_HEARTBEAT;
-
-	boot_status = __raw_readl(REGS_RTC_BASE + HW_RTC_PERSISTENT1) &
-			BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER;
-	boot_status = !!boot_status;
-	stmp3xxx_clearl(BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER,
-			REGS_RTC_BASE + HW_RTC_PERSISTENT1);
-	wdt_disable();		/* disable for now */
-
-	ret = misc_register(&stmp3xxx_wdt_miscdev);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "cannot register misc device\n");
-		return ret;
-	}
-
-	pr_info("initialized, heartbeat %d sec\n", heartbeat);
-
-	return ret;
-}
-
-static int stmp3xxx_wdt_remove(struct platform_device *pdev)
-{
-	misc_deregister(&stmp3xxx_wdt_miscdev);
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int wdt_suspended;
-static u32 wdt_saved_time;
-
-static int stmp3xxx_wdt_suspend(struct platform_device *pdev,
-				pm_message_t state)
-{
-	if (__raw_readl(REGS_RTC_BASE + HW_RTC_CTRL) &
-		BM_RTC_CTRL_WATCHDOGEN) {
-		wdt_suspended = 1;
-		wdt_saved_time = __raw_readl(REGS_RTC_BASE + HW_RTC_WATCHDOG);
-		wdt_disable();
-	}
-	return 0;
-}
-
-static int stmp3xxx_wdt_resume(struct platform_device *pdev)
-{
-	if (wdt_suspended) {
-		wdt_enable(wdt_saved_time);
-		wdt_suspended = 0;
-	}
-	return 0;
-}
-#else
-#define stmp3xxx_wdt_suspend	NULL
-#define stmp3xxx_wdt_resume	NULL
-#endif
-
-static struct platform_driver platform_wdt_driver = {
-	.driver = {
-		.name = "stmp3xxx_wdt",
-	},
-	.probe = stmp3xxx_wdt_probe,
-	.remove = stmp3xxx_wdt_remove,
-	.suspend = stmp3xxx_wdt_suspend,
-	.resume = stmp3xxx_wdt_resume,
-};
-
-module_platform_driver(platform_wdt_driver);
-
-MODULE_DESCRIPTION("STMP3XXX Watchdog Driver");
-MODULE_LICENSE("GPL");
-
-module_param(heartbeat, int, 0);
-MODULE_PARM_DESC(heartbeat,
-		 "Watchdog heartbeat period in seconds from 1 to "
-		 __MODULE_STRING(MAX_HEARTBEAT) ", default "
-		 __MODULE_STRING(DEFAULT_HEARTBEAT));
-
-MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index 3796434991fa..05d18b4c661b 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -36,12 +36,68 @@
 #include <linux/init.h>		/* For __init/__exit/... */
 #include <linux/idr.h>		/* For ida_* macros */
 #include <linux/err.h>		/* For IS_ERR macros */
+#include <linux/of.h>		/* For of_get_timeout_sec */
 
 #include "watchdog_core.h"	/* For watchdog_dev_register/... */
 
 static DEFINE_IDA(watchdog_ida);
 static struct class *watchdog_class;
 
+static void watchdog_check_min_max_timeout(struct watchdog_device *wdd)
+{
+	/*
+	 * Check that we have valid min and max timeout values, if
+	 * not reset them both to 0 (=not used or unknown)
+	 */
+	if (wdd->min_timeout > wdd->max_timeout) {
+		pr_info("Invalid min and max timeout values, resetting to 0!\n");
+		wdd->min_timeout = 0;
+		wdd->max_timeout = 0;
+	}
+}
+
+/**
+ * watchdog_init_timeout() - initialize the timeout field
+ * @timeout_parm: timeout module parameter
+ * @dev: Device that stores the timeout-sec property
+ *
+ * Initialize the timeout field of the watchdog_device struct with either the
+ * timeout module parameter (if it is valid value) or the timeout-sec property
+ * (only if it is a valid value and the timeout_parm is out of bounds).
+ * If none of them are valid then we keep the old value (which should normally
+ * be the default timeout value.
+ *
+ * A zero is returned on success and -EINVAL for failure.
+ */
+int watchdog_init_timeout(struct watchdog_device *wdd,
+				unsigned int timeout_parm, struct device *dev)
+{
+	unsigned int t = 0;
+	int ret = 0;
+
+	watchdog_check_min_max_timeout(wdd);
+
+	/* try to get the tiemout module parameter first */
+	if (!watchdog_timeout_invalid(wdd, timeout_parm)) {
+		wdd->timeout = timeout_parm;
+		return ret;
+	}
+	if (timeout_parm)
+		ret = -EINVAL;
+
+	/* try to get the timeout_sec property */
+	if (dev == NULL || dev->of_node == NULL)
+		return ret;
+	of_property_read_u32(dev->of_node, "timeout-sec", &t);
+	if (!watchdog_timeout_invalid(wdd, t))
+		wdd->timeout = t;
+	else
+		ret = -EINVAL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(watchdog_init_timeout);
+
 /**
  * watchdog_register_device() - register a watchdog device
  * @wdd: watchdog device
@@ -63,15 +119,7 @@ int watchdog_register_device(struct watchdog_device *wdd)
 	if (wdd->ops->start == NULL || wdd->ops->stop == NULL)
 		return -EINVAL;
 
-	/*
-	 * Check that we have valid min and max timeout values, if
-	 * not reset them both to 0 (=not used or unknown)
-	 */
-	if (wdd->min_timeout > wdd->max_timeout) {
-		pr_info("Invalid min and max timeout values, resetting to 0!\n");
-		wdd->min_timeout = 0;
-		wdd->max_timeout = 0;
-	}
+	watchdog_check_min_max_timeout(wdd);
 
 	/*
 	 * Note: now that all watchdog_device data has been verified, we
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index ef8edecfc526..08b48bbf9f4b 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -200,8 +200,7 @@ static int watchdog_set_timeout(struct watchdog_device *wddev,
 	    !(wddev->info->options & WDIOF_SETTIMEOUT))
 		return -EOPNOTSUPP;
 
-	if ((wddev->max_timeout != 0) &&
-	    (timeout < wddev->min_timeout || timeout > wddev->max_timeout))
+	if (watchdog_timeout_invalid(wddev, timeout))
 		return -EINVAL;
 
 	mutex_lock(&wddev->lock);
diff --git a/drivers/xen/xen-acpi-cpuhotplug.c b/drivers/xen/xen-acpi-cpuhotplug.c
index 757827966e34..18c742bec91b 100644
--- a/drivers/xen/xen-acpi-cpuhotplug.c
+++ b/drivers/xen/xen-acpi-cpuhotplug.c
@@ -239,24 +239,6 @@ static acpi_status xen_acpi_cpu_hotadd(struct acpi_processor *pr)
 	return AE_OK;
 }
 
-static
-int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device)
-{
-	acpi_handle phandle;
-	struct acpi_device *pdev;
-
-	if (acpi_get_parent(handle, &phandle))
-		return -ENODEV;
-
-	if (acpi_bus_get_device(phandle, &pdev))
-		return -ENODEV;
-
-	if (acpi_bus_scan(handle))
-		return -ENODEV;
-
-	return 0;
-}
-
 static int acpi_processor_device_remove(struct acpi_device *device)
 {
 	pr_debug(PREFIX "Xen does not support CPU hotremove\n");
@@ -272,6 +254,8 @@ static void acpi_processor_hotplug_notify(acpi_handle handle,
 	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; /* default */
 	int result;
 
+	acpi_scan_lock_acquire();
+
 	switch (event) {
 	case ACPI_NOTIFY_BUS_CHECK:
 	case ACPI_NOTIFY_DEVICE_CHECK:
@@ -286,12 +270,16 @@ static void acpi_processor_hotplug_notify(acpi_handle handle,
 		if (!acpi_bus_get_device(handle, &device))
 			break;
 
-		result = acpi_processor_device_add(handle, &device);
+		result = acpi_bus_scan(handle);
 		if (result) {
 			pr_err(PREFIX "Unable to add the device\n");
 			break;
 		}
-
+		result = acpi_bus_get_device(handle, &device);
+		if (result) {
+			pr_err(PREFIX "Missing device object\n");
+			break;
+		}
 		ost_code = ACPI_OST_SC_SUCCESS;
 		break;
 
@@ -321,11 +309,13 @@ static void acpi_processor_hotplug_notify(acpi_handle handle,
 				  "Unsupported event [0x%x]\n", event));
 
 		/* non-hotplug event; possibly handled by other handler */
-		return;
+		goto out;
 	}
 
 	(void) acpi_evaluate_hotplug_ost(handle, event, ost_code, NULL);
-	return;
+
+out:
+	acpi_scan_lock_release();
 }
 
 static acpi_status is_processor_device(acpi_handle handle)
diff --git a/drivers/xen/xen-acpi-memhotplug.c b/drivers/xen/xen-acpi-memhotplug.c
index 853b12dba5bb..faef5b396051 100644
--- a/drivers/xen/xen-acpi-memhotplug.c
+++ b/drivers/xen/xen-acpi-memhotplug.c
@@ -158,31 +158,17 @@ acpi_memory_get_device_resources(struct acpi_memory_device *mem_device)
 	return 0;
 }
 
-static int
-acpi_memory_get_device(acpi_handle handle,
-		       struct acpi_memory_device **mem_device)
+static int acpi_memory_get_device(acpi_handle handle,
+				  struct acpi_memory_device **mem_device)
 {
-	acpi_status status;
-	acpi_handle phandle;
 	struct acpi_device *device = NULL;
-	struct acpi_device *pdevice = NULL;
-	int result;
+	int result = 0;
 
-	if (!acpi_bus_get_device(handle, &device) && device)
-		goto end;
+	acpi_scan_lock_acquire();
 
-	status = acpi_get_parent(handle, &phandle);
-	if (ACPI_FAILURE(status)) {
-		pr_warn(PREFIX "Cannot find acpi parent\n");
-		return -EINVAL;
-	}
-
-	/* Get the parent device */
-	result = acpi_bus_get_device(phandle, &pdevice);
-	if (result) {
-		pr_warn(PREFIX "Cannot get acpi bus device\n");
-		return -EINVAL;
-	}
+	acpi_bus_get_device(handle, &device);
+	if (device)
+		goto end;
 
 	/*
 	 * Now add the notified device.  This creates the acpi_device
@@ -190,18 +176,28 @@ acpi_memory_get_device(acpi_handle handle,
 	 */
 	result = acpi_bus_scan(handle);
 	if (result) {
-		pr_warn(PREFIX "Cannot add acpi bus\n");
-		return -EINVAL;
+		pr_warn(PREFIX "ACPI namespace scan failed\n");
+		result = -EINVAL;
+		goto out;
+	}
+	result = acpi_bus_get_device(handle, &device);
+	if (result) {
+		pr_warn(PREFIX "Missing device object\n");
+		result = -EINVAL;
+		goto out;
 	}
 
 end:
 	*mem_device = acpi_driver_data(device);
 	if (!(*mem_device)) {
-		pr_err(PREFIX "Driver data not found\n");
-		return -ENODEV;
+		pr_err(PREFIX "driver data not found\n");
+		result = -ENODEV;
+		goto out;
 	}
 
-	return 0;
+out:
+	acpi_scan_lock_release();
+	return result;
 }
 
 static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
@@ -259,12 +255,15 @@ static void acpi_memory_device_notify(acpi_handle handle, u32 event, void *data)
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 			"\nReceived EJECT REQUEST notification for device\n"));
 
+		acpi_scan_lock_acquire();
 		if (acpi_bus_get_device(handle, &device)) {
+			acpi_scan_lock_release();
 			pr_err(PREFIX "Device doesn't exist\n");
 			break;
 		}
 		mem_device = acpi_driver_data(device);
 		if (!mem_device) {
+			acpi_scan_lock_release();
 			pr_err(PREFIX "Driver Data is NULL\n");
 			break;
 		}
@@ -274,6 +273,7 @@ static void acpi_memory_device_notify(acpi_handle handle, u32 event, void *data)
 		 * acpi_bus_remove if Xen support hotremove in the future
 		 */
 		acpi_memory_disable_device(mem_device);
+		acpi_scan_lock_release();
 		break;
 
 	default:
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index bcf3ba4a6ec1..61786be9138b 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -30,6 +30,7 @@
  * IN THE SOFTWARE.
  */
 
+#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>