Merge branch 'master' into for-next

author: Jiri Kosina 2014-02-20 14:54:28 +0100
committer: Jiri Kosina 2014-02-20 14:54:28 +0100
commit: d4263348f796f29546f90802177865dd4379dd0a (patch)
tree: adcbdaebae584eee2f32fab95e826e8e49eef385 /arch/s390
parent: be873ac782f5ff5ee6675f83929f4fe6737eead2 (diff)
parent: 6d0abeca3242a88cab8232e4acd7e2bf088f3bc2 (diff)
61 files changed, 3215 insertions, 373 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1e1a03d2d19f..65a07750f4f9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -135,7 +135,6 @@ config S390
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UID16 if 32BIT
 	select HAVE_VIRT_CPU_ACCOUNTING
-	select INIT_ALL_POSSIBLE
 	select KTIME_SCALAR if 32BIT
 	select MODULES_USE_ELF_RELA
 	select OLD_SIGACTION
@@ -335,10 +334,10 @@ config SMP
 	  a system with only one CPU, like most personal computers, say N. If
 	  you have a system with more than one CPU, say Y.
 
-	  If you say N here, the kernel will run on single and multiprocessor
+	  If you say N here, the kernel will run on uni- and multiprocessor
 	  machines, but will use only one CPU of a multiprocessor machine. If
 	  you say Y here, the kernel will run on many, but not all,
-	  singleprocessor machines. On a singleprocessor machine, the kernel
+	  uniprocessor machines. On a uniprocessor machine, the kernel
 	  will run faster if you say N here.
 
 	  See also the SMP-HOWTO available at
@@ -597,7 +596,7 @@ config CRASH_DUMP
 config ZFCPDUMP
 	def_bool n
 	prompt "zfcpdump support"
-	depends on SMP
+	depends on 64BIT && SMP
 	help
 	  Select this option if you want to build an zfcpdump enabled kernel.
 	  Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 4c4a1cef5208..47c8630c93cd 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -529,6 +529,7 @@ static int __init appldata_init(void)
 {
 	int rc;
 
+	init_virt_timer(&appldata_timer);
 	appldata_timer.function = appldata_timer_function;
 	appldata_timer.data = (unsigned long) &appldata_work;
 
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index b3feabd39f31..cf3c0089bef2 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -25,6 +25,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/spinlock.h>
 #include "crypt_s390.h"
 
 #define AES_KEYLEN_128		1
@@ -32,6 +33,7 @@
 #define AES_KEYLEN_256		4
 
 static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
 static char keylen_flag;
 
 struct s390_aes_ctx {
@@ -758,43 +760,67 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return aes_set_key(tfm, in_key, key_len);
 }
 
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* only use complete blocks, max. PAGE_SIZE */
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+	for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
+		memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE,
+		       AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + i, AES_BLOCK_SIZE);
+	}
+	return n;
+}
+
 static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
 			 struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
-	unsigned int i, n, nbytes;
-	u8 buf[AES_BLOCK_SIZE];
-	u8 *out, *in;
+	unsigned int n, nbytes;
+	u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE];
+	u8 *out, *in, *ctrptr = ctrbuf;
 
 	if (!walk->nbytes)
 		return ret;
 
-	memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE);
+	if (spin_trylock(&ctrblk_lock))
+		ctrptr = ctrblk;
+
+	memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		while (nbytes >= AES_BLOCK_SIZE) {
-			/* only use complete blocks, max. PAGE_SIZE */
-			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
-						 nbytes & ~(AES_BLOCK_SIZE - 1);
-			for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
-				memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE,
-				       AES_BLOCK_SIZE);
-				crypto_inc(ctrblk + i, AES_BLOCK_SIZE);
-			}
-			ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk);
-			if (ret < 0 || ret != n)
+			if (ctrptr == ctrblk)
+				n = __ctrblk_init(ctrptr, nbytes);
+			else
+				n = AES_BLOCK_SIZE;
+			ret = crypt_s390_kmctr(func, sctx->key, out, in,
+					       n, ctrptr);
+			if (ret < 0 || ret != n) {
+				if (ctrptr == ctrblk)
+					spin_unlock(&ctrblk_lock);
 				return -EIO;
+			}
 			if (n > AES_BLOCK_SIZE)
-				memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE,
+				memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
 				       AES_BLOCK_SIZE);
-			crypto_inc(ctrblk, AES_BLOCK_SIZE);
+			crypto_inc(ctrptr, AES_BLOCK_SIZE);
 			out += n;
 			in += n;
 			nbytes -= n;
 		}
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	}
+	if (ctrptr == ctrblk) {
+		if (nbytes)
+			memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE);
+		else
+			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+		spin_unlock(&ctrblk_lock);
+	}
 	/*
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
@@ -802,14 +828,15 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		ret = crypt_s390_kmctr(func, sctx->key, buf, in,
-				       AES_BLOCK_SIZE, ctrblk);
+				       AES_BLOCK_SIZE, ctrbuf);
 		if (ret < 0 || ret != AES_BLOCK_SIZE)
 			return -EIO;
 		memcpy(out, buf, nbytes);
-		crypto_inc(ctrblk, AES_BLOCK_SIZE);
+		crypto_inc(ctrbuf, AES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE);
 	}
-	memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE);
+
 	return ret;
 }
 
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index bcca01c9989d..0a5aac8a9412 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -25,6 +25,7 @@
 #define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
 
 static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
 
 struct s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
@@ -105,29 +106,35 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
 }
 
 static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
-			    u8 *iv, struct blkcipher_walk *walk)
+			    struct blkcipher_walk *walk)
 {
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	int ret = blkcipher_walk_virt(desc, walk);
 	unsigned int nbytes = walk->nbytes;
+	struct {
+		u8 iv[DES_BLOCK_SIZE];
+		u8 key[DES3_KEY_SIZE];
+	} param;
 
 	if (!nbytes)
 		goto out;
 
-	memcpy(iv, walk->iv, DES_BLOCK_SIZE);
+	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
 	do {
 		/* only use complete blocks */
 		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = crypt_s390_kmc(func, iv, out, in, n);
+		ret = crypt_s390_kmc(func, &param, out, in, n);
 		if (ret < 0 || ret != n)
 			return -EIO;
 
 		nbytes &= DES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	} while ((nbytes = walk->nbytes));
-	memcpy(walk->iv, iv, DES_BLOCK_SIZE);
+	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
 
 out:
 	return ret;
@@ -179,22 +186,20 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk);
 }
 
 static int cbc_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_des_alg = {
@@ -237,9 +242,9 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *flags = &tfm->crt_flags;
 
-	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
-	    memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
-		   DES_KEY_SIZE)) &&
+	if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
+	    crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
+			  DES_KEY_SIZE)) &&
 	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
@@ -327,22 +332,20 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk);
 }
 
 static int cbc_des3_decrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_des3_alg = {
@@ -366,54 +369,80 @@ static struct crypto_alg cbc_des3_alg = {
 	}
 };
 
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* align to block size, max. PAGE_SIZE */
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
+	for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
+		memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+		crypto_inc(ctrptr + i, DES_BLOCK_SIZE);
+	}
+	return n;
+}
+
 static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
-			    struct s390_des_ctx *ctx, struct blkcipher_walk *walk)
+			    struct s390_des_ctx *ctx,
+			    struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
-	unsigned int i, n, nbytes;
-	u8 buf[DES_BLOCK_SIZE];
-	u8 *out, *in;
+	unsigned int n, nbytes;
+	u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE];
+	u8 *out, *in, *ctrptr = ctrbuf;
+
+	if (!walk->nbytes)
+		return ret;
 
-	memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE);
+	if (spin_trylock(&ctrblk_lock))
+		ctrptr = ctrblk;
+
+	memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		while (nbytes >= DES_BLOCK_SIZE) {
-			/* align to block size, max. PAGE_SIZE */
-			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
-				nbytes & ~(DES_BLOCK_SIZE - 1);
-			for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
-				memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE,
-				       DES_BLOCK_SIZE);
-				crypto_inc(ctrblk + i, DES_BLOCK_SIZE);
-			}
-			ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk);
-			if (ret < 0 || ret != n)
+			if (ctrptr == ctrblk)
+				n = __ctrblk_init(ctrptr, nbytes);
+			else
+				n = DES_BLOCK_SIZE;
+			ret = crypt_s390_kmctr(func, ctx->key, out, in,
+					       n, ctrptr);
+			if (ret < 0 || ret != n) {
+				if (ctrptr == ctrblk)
+					spin_unlock(&ctrblk_lock);
 				return -EIO;
+			}
 			if (n > DES_BLOCK_SIZE)
-				memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE,
+				memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
 				       DES_BLOCK_SIZE);
-			crypto_inc(ctrblk, DES_BLOCK_SIZE);
+			crypto_inc(ctrptr, DES_BLOCK_SIZE);
 			out += n;
 			in += n;
 			nbytes -= n;
 		}
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	}
-
+	if (ctrptr == ctrblk) {
+		if (nbytes)
+			memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE);
+		else
+			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+		spin_unlock(&ctrblk_lock);
+	}
 	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
 	if (nbytes) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 		ret = crypt_s390_kmctr(func, ctx->key, buf, in,
-				       DES_BLOCK_SIZE, ctrblk);
+				       DES_BLOCK_SIZE, ctrbuf);
 		if (ret < 0 || ret != DES_BLOCK_SIZE)
 			return -EIO;
 		memcpy(out, buf, nbytes);
-		crypto_inc(ctrblk, DES_BLOCK_SIZE);
+		crypto_inc(ctrbuf, DES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE);
 	}
-	memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE);
 	return ret;
 }
 
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
index 2e671d5004ca..06f8d95a16cd 100644
--- a/arch/s390/hypfs/Makefile
+++ b/arch/s390/hypfs/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o
 
-s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o
+s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index 79f2ac55253f..b34b5ab90a31 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -13,6 +13,7 @@
 #include <linux/debugfs.h>
 #include <linux/workqueue.h>
 #include <linux/kref.h>
+#include <asm/hypfs.h>
 
 #define REG_FILE_MODE    0440
 #define UPDATE_FILE_MODE 0220
@@ -36,6 +37,10 @@ extern int hypfs_vm_init(void);
 extern void hypfs_vm_exit(void);
 extern int hypfs_vm_create_files(struct dentry *root);
 
+/* Set Partition-Resource Parameter */
+int hypfs_sprp_init(void);
+void hypfs_sprp_exit(void);
+
 /* debugfs interface */
 struct hypfs_dbfs_file;
 
@@ -52,6 +57,8 @@ struct hypfs_dbfs_file {
 	int		(*data_create)(void **data, void **data_free_ptr,
 				       size_t *size);
 	void		(*data_free)(const void *buf_free_ptr);
+	long		(*unlocked_ioctl) (struct file *, unsigned int,
+					   unsigned long);
 
 	/* Private data for hypfs_dbfs.c */
 	struct hypfs_dbfs_data	*data;
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index 17ab8b7b53cc..2badf2bf9cd7 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -81,9 +81,25 @@ static ssize_t dbfs_read(struct file *file, char __user *buf,
 	return rc;
 }
 
+static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct hypfs_dbfs_file *df;
+	long rc;
+
+	df = file->f_path.dentry->d_inode->i_private;
+	mutex_lock(&df->lock);
+	if (df->unlocked_ioctl)
+		rc = df->unlocked_ioctl(file, cmd, arg);
+	else
+		rc = -ENOTTY;
+	mutex_unlock(&df->lock);
+	return rc;
+}
+
 static const struct file_operations dbfs_ops = {
 	.read		= dbfs_read,
 	.llseek		= no_llseek,
+	.unlocked_ioctl = dbfs_ioctl,
 };
 
 int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
new file mode 100644
index 000000000000..f043c3c7e73c
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -0,0 +1,141 @@
+/*
+ *    Hypervisor filesystem for Linux on s390.
+ *    Set Partition-Resource Parameter interface.
+ *
+ *    Copyright IBM Corp. 2013
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/compat.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <asm/compat.h>
+#include <asm/sclp.h>
+#include "hypfs.h"
+
+#define DIAG304_SET_WEIGHTS	0
+#define DIAG304_QUERY_PRP	1
+#define DIAG304_SET_CAPPING	2
+
+#define DIAG304_CMD_MAX		2
+
+static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+	register unsigned long _data asm("2") = (unsigned long) data;
+	register unsigned long _rc asm("3");
+	register unsigned long _cmd asm("4") = cmd;
+
+	asm volatile("diag %1,%2,0x304\n"
+		     : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory");
+
+	return _rc;
+}
+
+static void hypfs_sprp_free(const void *data)
+{
+	free_page((unsigned long) data);
+}
+
+static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size)
+{
+	unsigned long rc;
+	void *data;
+
+	data = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP);
+	if (rc != 1) {
+		*data_ptr = *free_ptr = NULL;
+		*size = 0;
+		free_page((unsigned long) data);
+		return -EIO;
+	}
+	*data_ptr = *free_ptr = data;
+	*size = PAGE_SIZE;
+	return 0;
+}
+
+static int __hypfs_sprp_ioctl(void __user *user_area)
+{
+	struct hypfs_diag304 diag304;
+	unsigned long cmd;
+	void __user *udata;
+	void *data;
+	int rc;
+
+	if (copy_from_user(&diag304, user_area, sizeof(diag304)))
+		return -EFAULT;
+	if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX)
+		return -EINVAL;
+
+	data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!data)
+		return -ENOMEM;
+
+	udata = (void __user *)(unsigned long) diag304.data;
+	if (diag304.args[1] == DIAG304_SET_WEIGHTS ||
+	    diag304.args[1] == DIAG304_SET_CAPPING)
+		if (copy_from_user(data, udata, PAGE_SIZE)) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+	cmd = *(unsigned long *) &diag304.args[0];
+	diag304.rc = hypfs_sprp_diag304(data, cmd);
+
+	if (diag304.args[1] == DIAG304_QUERY_PRP)
+		if (copy_to_user(udata, data, PAGE_SIZE)) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+	rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0;
+out:
+	free_page((unsigned long) data);
+	return rc;
+}
+
+static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd,
+			       unsigned long arg)
+{
+	void __user *argp;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (is_compat_task())
+		argp = compat_ptr(arg);
+	else
+		argp = (void __user *) arg;
+	switch (cmd) {
+	case HYPFS_DIAG304:
+		return __hypfs_sprp_ioctl(argp);
+	default: /* unknown ioctl number */
+		return -ENOTTY;
+	}
+	return 0;
+}
+
+static struct hypfs_dbfs_file hypfs_sprp_file = {
+	.name		= "diag_304",
+	.data_create	= hypfs_sprp_create,
+	.data_free	= hypfs_sprp_free,
+	.unlocked_ioctl = hypfs_sprp_ioctl,
+};
+
+int hypfs_sprp_init(void)
+{
+	if (!sclp_has_sprp())
+		return 0;
+	return hypfs_dbfs_create_file(&hypfs_sprp_file);
+}
+
+void hypfs_sprp_exit(void)
+{
+	if (!sclp_has_sprp())
+		return;
+	hypfs_dbfs_remove_file(&hypfs_sprp_file);
+}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index ddfe09b45134..c952b981e4f2 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -478,10 +478,14 @@ static int __init hypfs_init(void)
 		rc = -ENODATA;
 		goto fail_hypfs_diag_exit;
 	}
+	if (hypfs_sprp_init()) {
+		rc = -ENODATA;
+		goto fail_hypfs_vm_exit;
+	}
 	s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
 	if (!s390_kobj) {
 		rc = -ENOMEM;
-		goto fail_hypfs_vm_exit;
+		goto fail_hypfs_sprp_exit;
 	}
 	rc = register_filesystem(&hypfs_type);
 	if (rc)
@@ -490,6 +494,8 @@ static int __init hypfs_init(void)
 
 fail_filesystem:
 	kobject_put(s390_kobj);
+fail_hypfs_sprp_exit:
+	hypfs_sprp_exit();
 fail_hypfs_vm_exit:
 	hypfs_vm_exit();
 fail_hypfs_diag_exit:
@@ -502,11 +508,12 @@ fail_dbfs_exit:
 
 static void __exit hypfs_exit(void)
 {
-	hypfs_diag_exit();
-	hypfs_vm_exit();
-	hypfs_dbfs_exit();
 	unregister_filesystem(&hypfs_type);
 	kobject_put(s390_kobj);
+	hypfs_sprp_exit();
+	hypfs_vm_exit();
+	hypfs_diag_exit();
+	hypfs_dbfs_exit();
 }
 
 module_init(hypfs_init)
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 7a5288f3479a..8386a4a1f19a 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -3,3 +3,4 @@
 generic-y += clkdev.h
 generic-y += trace_clock.h
 generic-y += preempt.h
+generic-y += hash.h
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 16760eeb79b0..578680f6207a 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -32,4 +32,19 @@
 
 #define set_mb(var, value)		do { var = value; mb(); } while (0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 0f636cbdf342..4236408070e5 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -185,11 +185,12 @@ static inline unsigned long long __cmpxchg64(void *ptr,
 {
 	register_pair rp_old = {.pair = old};
 	register_pair rp_new = {.pair = new};
+	unsigned long long *ullptr = ptr;
 
 	asm volatile(
 		"	cds	%0,%2,%1"
-		: "+&d" (rp_old), "=Q" (ptr)
-		: "d" (rp_new), "Q" (ptr)
+		: "+d" (rp_old), "+Q" (*ullptr)
+		: "d" (rp_new)
 		: "memory", "cc");
 	return rp_old.pair;
 }
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 4bf9da03591e..5d7e8cf83bd6 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -38,7 +38,8 @@
 
 #define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
 			 PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
-			 PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | PSW32_ASC_HOME)
+			 PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
+			 PSW32_ASC_PRIMARY)
 
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"s390\0\0\0\0"
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index c879fad404c8..cb700d54bd83 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -56,6 +56,96 @@ struct cpumf_ctr_info {
 	u32   reserved2[12];
 } __packed;
 
+/* QUERY SAMPLING INFORMATION block */
+struct hws_qsi_info_block {	    /* Bit(s) */
+	unsigned int b0_13:14;	    /* 0-13: zeros			 */
+	unsigned int as:1;	    /* 14: basic-sampling authorization	 */
+	unsigned int ad:1;	    /* 15: diag-sampling authorization	 */
+	unsigned int b16_21:6;	    /* 16-21: zeros			 */
+	unsigned int es:1;	    /* 22: basic-sampling enable control */
+	unsigned int ed:1;	    /* 23: diag-sampling enable control	 */
+	unsigned int b24_29:6;	    /* 24-29: zeros			 */
+	unsigned int cs:1;	    /* 30: basic-sampling activation control */
+	unsigned int cd:1;	    /* 31: diag-sampling activation control */
+	unsigned int bsdes:16;	    /* 4-5: size of basic sampling entry */
+	unsigned int dsdes:16;	    /* 6-7: size of diagnostic sampling entry */
+	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+	unsigned long tear;	    /* 24-31: TEAR contents		 */
+	unsigned long dear;	    /* 32-39: DEAR contents		 */
+	unsigned int rsvrd0;	    /* 40-43: reserved			 */
+	unsigned int cpu_speed;     /* 44-47: CPU speed 		 */
+	unsigned long long rsvrd1;  /* 48-55: reserved			 */
+	unsigned long long rsvrd2;  /* 56-63: reserved			 */
+} __packed;
+
+/* SET SAMPLING CONTROLS request block */
+struct hws_lsctl_request_block {
+	unsigned int s:1;	    /* 0: maximum buffer indicator	 */
+	unsigned int h:1;	    /* 1: part. level reserved for VM use*/
+	unsigned long long b2_53:52;/* 2-53: zeros			 */
+	unsigned int es:1;	    /* 54: basic-sampling enable control */
+	unsigned int ed:1;	    /* 55: diag-sampling enable control	 */
+	unsigned int b56_61:6;	    /* 56-61: - zeros			 */
+	unsigned int cs:1;	    /* 62: basic-sampling activation control */
+	unsigned int cd:1;	    /* 63: diag-sampling activation control  */
+	unsigned long interval;     /* 8-15: sampling interval		 */
+	unsigned long tear;	    /* 16-23: TEAR contents		 */
+	unsigned long dear;	    /* 24-31: DEAR contents		 */
+	/* 32-63:							 */
+	unsigned long rsvrd1;	    /* reserved 			 */
+	unsigned long rsvrd2;	    /* reserved 			 */
+	unsigned long rsvrd3;	    /* reserved 			 */
+	unsigned long rsvrd4;	    /* reserved 			 */
+} __packed;
+
+struct hws_basic_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:4;	    /* 16-19 reserved			 */
+	unsigned int U:4;	    /* 20-23 Number of unique instruct.  */
+	unsigned int z:2;	    /* zeros				 */
+	unsigned int T:1;	    /* 26 PSW DAT mode			 */
+	unsigned int W:1;	    /* 27 PSW wait state		 */
+	unsigned int P:1;	    /* 28 PSW Problem state		 */
+	unsigned int AS:2;	    /* 29-30 PSW address-space control	 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	unsigned int:16;
+	unsigned int prim_asn:16;   /* primary ASN			 */
+	unsigned long long ia;	    /* Instruction Address		 */
+	unsigned long long gpp;     /* Guest Program Parameter		 */
+	unsigned long long hpp;     /* Host Program Parameter		 */
+} __packed;
+
+struct hws_diag_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:14;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	u8	     data[];	    /* Machine-dependent sample data	 */
+} __packed;
+
+struct hws_combined_entry {
+	struct hws_basic_entry	basic;	/* Basic-sampling data entry */
+	struct hws_diag_entry	diag;	/* Diagnostic-sampling data entry */
+} __packed;
+
+struct hws_trailer_entry {
+	union {
+		struct {
+			unsigned int f:1;	/* 0 - Block Full Indicator   */
+			unsigned int a:1;	/* 1 - Alert request control  */
+			unsigned int t:1;	/* 2 - Timestamp format	      */
+			unsigned long long:61;	/* 3 - 63: Reserved	      */
+		};
+		unsigned long long flags;	/* 0 - 63: All indicators     */
+	};
+	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
+	unsigned char timestamp[16];	 /* 16 - 31 timestamp		      */
+	unsigned long long reserved1;	 /* 32 -Reserved		      */
+	unsigned long long reserved2;	 /*				      */
+	unsigned long long progusage1;	 /* 48 - reserved for programming use */
+	unsigned long long progusage2;	 /*				      */
+} __packed;
+
 /* Query counter information */
 static inline int qctri(struct cpumf_ctr_info *info)
 {
@@ -99,4 +189,95 @@ static inline int ecctr(u64 ctr, u64 *val)
 	return cc;
 }
 
+/* Query sampling information */
+static inline int qsi(struct hws_qsi_info_block *info)
+{
+	int cc;
+	cc = 1;
+
+	asm volatile(
+		"0:	.insn	s,0xb2860000,0(%1)\n"
+		"1:	lhi	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (cc), "+a" (info)
+		: "m" (*info)
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+/* Load sampling controls */
+static inline int lsctl(struct hws_lsctl_request_block *req)
+{
+	int cc;
+
+	cc = 1;
+	asm volatile(
+		"0:	.insn	s,0xb2870000,0(%1)\n"
+		"1:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (cc), "+a" (req)
+		: "m" (*req)
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+/* Sampling control helper functions */
+
+#include <linux/time.h>
+
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+						unsigned long freq)
+{
+	return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+						unsigned long rate)
+{
+	return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+#define SDB_TE_ALERT_REQ_MASK	0x4000000000000000UL
+#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+	/* TOD in STCKE format */
+	if (te->t)
+		return *((unsigned long long *) &te->timestamp[1]);
+
+	/* TOD in STCK format */
+	return *((unsigned long long *) &te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+	void *ret;
+
+	ret = (void *) v;
+	ret += PAGE_SIZE;
+	ret -= sizeof(struct hws_trailer_entry);
+
+	return (unsigned long *) ret;
+}
+
+/* Return if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt */
+static inline int is_link_entry(unsigned long *s)
+{
+	return *s & 0x1ul ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+	return (unsigned long *) (*s & ~0x1ul);
+}
 #endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 7e1c917bbba2..09d1dd46bd57 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -29,6 +29,8 @@ struct css_general_char {
 	u32 fcx : 1;	 /* bit 88 */
 	u32 : 19;
 	u32 alt_ssi : 1; /* bit 108 */
+	u32:1;
+	u32 narf:1;	 /* bit 110 */
 } __packed;
 
 extern struct css_general_char css_general_characteristics;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d5bc3750616e..eef3dd3fd9a9 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -106,9 +106,22 @@ struct kvm_s390_sie_block {
 	__u64	gbea;			/* 0x0180 */
 	__u8	reserved188[24];	/* 0x0188 */
 	__u32	fac;			/* 0x01a0 */
-	__u8	reserved1a4[92];	/* 0x01a4 */
+	__u8	reserved1a4[68];	/* 0x01a4 */
+	__u64	itdba;			/* 0x01e8 */
+	__u8	reserved1f0[16];	/* 0x01f0 */
 } __attribute__((packed));
 
+struct kvm_s390_itdb {
+	__u8	data[256];
+} __packed;
+
+struct sie_page {
+	struct kvm_s390_sie_block sie_block;
+	__u8 reserved200[1024];		/* 0x0200 */
+	struct kvm_s390_itdb itdb;	/* 0x0600 */
+	__u8 reserved700[2304];		/* 0x0700 */
+} __packed;
+
 struct kvm_vcpu_stat {
 	u32 exit_userspace;
 	u32 exit_null;
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c129ab2ac731..2583466f576b 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -144,6 +144,7 @@ int clp_disable_fh(struct zpci_dev *);
 void zpci_event_error(void *);
 void zpci_event_availability(void *);
 void zpci_rescan(void);
+bool zpci_is_enabled(void);
 #else /* CONFIG_PCI */
 static inline void zpci_event_error(void *e) {}
 static inline void zpci_event_availability(void *e) {}
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 1141fb3e7b21..159a8ec6da9a 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -1,21 +1,40 @@
 /*
  * Performance event support - s390 specific definitions.
  *
- * Copyright IBM Corp. 2009, 2012
+ * Copyright IBM Corp. 2009, 2013
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *	      Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
 
-#include <asm/cpu_mf.h>
+#ifndef _ASM_S390_PERF_EVENT_H
+#define _ASM_S390_PERF_EVENT_H
 
-/* CPU-measurement counter facility */
-#define PERF_CPUM_CF_MAX_CTR		256
+#ifdef CONFIG_64BIT
+
+#include <linux/perf_event.h>
+#include <linux/device.h>
+#include <asm/cpu_mf.h>
 
 /* Per-CPU flags for PMU states */
 #define PMU_F_RESERVED			0x1000
 #define PMU_F_ENABLED			0x2000
+#define PMU_F_IN_USE			0x4000
+#define PMU_F_ERR_IBE			0x0100
+#define PMU_F_ERR_LSDA			0x0200
+#define PMU_F_ERR_MASK			(PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
+
+/* Perf defintions for PMU event attributes in sysfs */
+extern __init const struct attribute_group **cpumf_cf_event_group(void);
+extern ssize_t cpumf_events_sysfs_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *page);
+#define EVENT_VAR(_cat, _name)		event_attr_##_cat##_##_name
+#define EVENT_PTR(_cat, _name)		(&EVENT_VAR(_cat, _name).attr.attr)
+
+#define CPUMF_EVENT_ATTR(cat, name, id)			\
+	PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show)
+#define CPUMF_EVENT_PTR(cat, name)	EVENT_PTR(cat, name)
 
-#ifdef CONFIG_64BIT
 
 /* Perf callbacks */
 struct pt_regs;
@@ -23,4 +42,55 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs) perf_misc_flags(regs)
 
+/* Perf pt_regs extension for sample-data-entry indicators */
+struct perf_sf_sde_regs {
+	unsigned char in_guest:1;	  /* guest sample */
+	unsigned long reserved:63;	  /* reserved */
+};
+
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR		256
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR		2
+#define PERF_EVENT_CPUM_SF		0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG		0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE		0x0001	  /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_MODE_MASK		(PERF_CPUM_SF_BASIC_MODE| \
+					 PERF_CPUM_SF_DIAG_MODE)
+#define PERF_CPUM_SF_FULL_BLOCKS	0x0004	  /* Process full SDBs only */
+
+#define REG_NONE		0
+#define REG_OVERFLOW		1
+#define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
+#define RAWSAMPLE_REG(hwc)	((hwc)->config)
+#define TEAR_REG(hwc)		((hwc)->last_tag)
+#define SAMPL_RATE(hwc)		((hwc)->event_base)
+#define SAMPL_FLAGS(hwc)	((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SDB_FULL_BLOCKS(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
+
+/* Structure for sampling data entries to be passed as perf raw sample data
+ * to user space.  Note that raw sample data must be aligned and, thus, might
+ * be padded with zeros.
+ */
+struct sf_raw_sample {
+#define SF_RAW_SAMPLE_BASIC	PERF_CPUM_SF_BASIC_MODE
+#define SF_RAW_SAMPLE_DIAG	PERF_CPUM_SF_DIAG_MODE
+	u64			format;
+	u32			 size;	  /* Size of sf_raw_sample */
+	u16			bsdes;	  /* Basic-sampling data entry size */
+	u16			dsdes;	  /* Diagnostic-sampling data entry size */
+	struct hws_basic_entry	basic;	  /* Basic-sampling data entry */
+	struct hws_diag_entry	 diag;	  /* Diagnostic-sampling data entry */
+	u8		    padding[];	  /* Padding to next multiple of 8 */
+} __packed;
+
+/* Perf hardware reserve and release functions */
+int perf_reserve_sampling(void);
+void perf_release_sampling(void);
+
 #endif /* CONFIG_64BIT */
+#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 57d0d7e794b1..d786c634e052 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -336,7 +336,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
 #define QDIO_FLAG_CLEANUP_USING_HALT		0x02
 
 /**
- * struct qdio_initialize - qdio initalization data
+ * struct qdio_initialize - qdio initialization data
  * @cdev: associated ccw device
  * @q_format: queue format
  * @adapter_name: name for the adapter
@@ -378,6 +378,34 @@ struct qdio_initialize {
 	struct qdio_outbuf_state *output_sbal_state_array;
 };
 
+/**
+ * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
+ * @l3_ipv6_addr: entry contains IPv6 address
+ * @l3_ipv4_addr: entry contains IPv4 address
+ * @l2_addr_lnid: entry contains MAC address and VLAN ID
+ */
+enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
+
+/**
+ * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
+ * @nit:  Network interface token
+ * @addr: Address of one of the three types
+ *
+ * The struct is passed to the callback function by qdio_brinfo_desc()
+ */
+struct qdio_brinfo_entry_l3_ipv6 {
+	u64 nit;
+	struct { unsigned char _s6_addr[16]; } addr;
+} __packed;
+struct qdio_brinfo_entry_l3_ipv4 {
+	u64 nit;
+	struct { uint32_t _s_addr; } addr;
+} __packed;
+struct qdio_brinfo_entry_l2 {
+	u64 nit;
+	struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
 #define QDIO_STATE_INACTIVE		0x00000002 /* after qdio_cleanup */
 #define QDIO_STATE_ESTABLISHED		0x00000004 /* after qdio_establish */
 #define QDIO_STATE_ACTIVE		0x00000008 /* after qdio_activate */
@@ -399,5 +427,10 @@ extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
 extern int qdio_shutdown(struct ccw_device *, int);
 extern int qdio_free(struct ccw_device *);
 extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+extern int qdio_pnso_brinfo(struct subchannel_id schid,
+		int cnc, u16 *response,
+		void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+				void *entry),
+		void *priv);
 
 #endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 2f390956c7c1..abaca2275c7a 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -52,8 +52,9 @@ int sclp_chp_configure(struct chp_id chpid);
 int sclp_chp_deconfigure(struct chp_id chpid);
 int sclp_chp_read_info(struct sclp_chp_info *info);
 void sclp_get_ipl_info(struct sclp_ipl_info *info);
-bool sclp_has_linemode(void);
-bool sclp_has_vt220(void);
+bool __init sclp_has_linemode(void);
+bool __init sclp_has_vt220(void);
+bool sclp_has_sprp(void);
 int sclp_pci_configure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 5a87d16d3e7c..d091aa1aaf11 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -5,6 +5,7 @@
 #define SIGP_SENSE		      1
 #define SIGP_EXTERNAL_CALL	      2
 #define SIGP_EMERGENCY_SIGNAL	      3
+#define SIGP_START		      4
 #define SIGP_STOP		      5
 #define SIGP_RESTART		      6
 #define SIGP_STOP_AND_STORE_STATUS    9
@@ -12,6 +13,7 @@
 #define SIGP_SET_PREFIX		     13
 #define SIGP_STORE_STATUS_AT_ADDRESS 14
 #define SIGP_SET_ARCHITECTURE	     18
+#define SIGP_COND_EMERGENCY_SIGNAL   19
 #define SIGP_SENSE_RUNNING	     21
 
 /* SIGP condition codes */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index ac9bed8e103f..160779394096 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -31,6 +31,7 @@ extern void smp_yield(void);
 extern void smp_stop_cpu(void);
 extern void smp_cpu_set_polarization(int cpu, int val);
 extern int smp_cpu_get_polarization(int cpu);
+extern void smp_fill_possible_mask(void);
 
 #else /* CONFIG_SMP */
 
@@ -50,6 +51,7 @@ static inline int smp_vcpu_scheduled(int cpu) { return 1; }
 static inline void smp_yield_cpu(int cpu) { }
 static inline void smp_yield(void) { }
 static inline void smp_stop_cpu(void) { }
+static inline void smp_fill_possible_mask(void) { }
 
 #endif /* CONFIG_SMP */
 
diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h
new file mode 100644
index 000000000000..37998b449531
--- /dev/null
+++ b/arch/s390/include/uapi/asm/hypfs.h
@@ -0,0 +1,25 @@
+/*
+ * IOCTL interface for hypfs
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_HYPFS_CTL_H
+#define _ASM_HYPFS_CTL_H
+
+#include <linux/types.h>
+
+struct hypfs_diag304 {
+	__u32	args[2];
+	__u64	data;
+	__u64	rc;
+} __attribute__((packed));
+
+#define HYPFS_IOCTL_MAGIC 0x10
+
+#define HYPFS_DIAG304 \
+	_IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index c286c2e868f0..e031332096d7 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -84,4 +84,6 @@
 
 #define SO_MAX_PACING_RATE	47
 
+#define SO_BPF_EXTENSIONS	48
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h
index a61d538756f2..471eb09184d4 100644
--- a/arch/s390/include/uapi/asm/statfs.h
+++ b/arch/s390/include/uapi/asm/statfs.h
@@ -35,11 +35,11 @@ struct statfs {
 struct statfs64 {
 	unsigned int	f_type;
 	unsigned int	f_bsize;
-	unsigned long	f_blocks;
-	unsigned long	f_bfree;
-	unsigned long	f_bavail;
-	unsigned long	f_files;
-	unsigned long	f_ffree;
+	unsigned long long f_blocks;
+	unsigned long long f_bfree;
+	unsigned long long f_bavail;
+	unsigned long long f_files;
+	unsigned long long f_ffree;
 	__kernel_fsid_t f_fsid;
 	unsigned int	f_namelen;
 	unsigned int	f_frsize;
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 864f693c237f..5eb5c9ddb120 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -280,6 +280,8 @@
 #define __NR_s390_runtime_instr 342
 #define __NR_kcmp		343
 #define __NR_finit_module	344
+#define __NR_sched_setattr	345
+#define __NR_sched_getattr	346
 #define NR_syscalls 345
 
 /* 
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index e83fc116f5bf..f2b18eacaca8 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -154,6 +154,67 @@ struct ica_xcRB {
 	unsigned short	priority_window;
 	unsigned int	status;
 } __attribute__((packed));
+
+/**
+ * struct ep11_cprb - EP11 connectivity programming request block
+ * @cprb_len:		CPRB header length [0x0020]
+ * @cprb_ver_id:	CPRB version id.   [0x04]
+ * @pad_000:		Alignment pad bytes
+ * @flags:		Admin cmd [0x80] or functional cmd [0x00]
+ * @func_id:		Function id / subtype [0x5434]
+ * @source_id:		Source id [originator id]
+ * @target_id:		Target id [usage/ctrl domain id]
+ * @ret_code:		Return code
+ * @reserved1:		Reserved
+ * @reserved2:		Reserved
+ * @payload_len:	Payload length
+ */
+struct ep11_cprb {
+	uint16_t	cprb_len;
+	unsigned char	cprb_ver_id;
+	unsigned char	pad_000[2];
+	unsigned char	flags;
+	unsigned char	func_id[2];
+	uint32_t	source_id;
+	uint32_t	target_id;
+	uint32_t	ret_code;
+	uint32_t	reserved1;
+	uint32_t	reserved2;
+	uint32_t	payload_len;
+} __attribute__((packed));
+
+/**
+ * struct ep11_target_dev - EP11 target device list
+ * @ap_id:	AP device id
+ * @dom_id:	Usage domain id
+ */
+struct ep11_target_dev {
+	uint16_t ap_id;
+	uint16_t dom_id;
+};
+
+/**
+ * struct ep11_urb - EP11 user request block
+ * @targets_num:	Number of target adapters
+ * @targets:		Addr to target adapter list
+ * @weight:		Level of request priority
+ * @req_no:		Request id/number
+ * @req_len:		Request length
+ * @req:		Addr to request block
+ * @resp_len:		Response length
+ * @resp:		Addr to response block
+ */
+struct ep11_urb {
+	uint16_t		targets_num;
+	uint64_t		targets;
+	uint64_t		weight;
+	uint64_t		req_no;
+	uint64_t		req_len;
+	uint64_t		req;
+	uint64_t		resp_len;
+	uint64_t		resp;
+} __attribute__((packed));
+
 #define AUTOSELECT ((unsigned int)0xFFFFFFFF)
 
 #define ZCRYPT_IOCTL_MAGIC 'z'
@@ -183,6 +244,9 @@ struct ica_xcRB {
  *   ZSECSENDCPRB
  *     Send an arbitrary CPRB to a crypto card.
  *
+ *   ZSENDEP11CPRB
+ *     Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
+ *
  *   Z90STAT_STATUS_MASK
  *     Return an 64 element array of unsigned chars for the status of
  *     all devices.
@@ -256,6 +320,7 @@ struct ica_xcRB {
 #define ICARSAMODEXPO	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
 #define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
 #define ZSECSENDCPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
 
 /* New status calls */
 #define Z90STAT_TOTALCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2403303cfed7..1b3ac09c11b6 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -60,7 +60,8 @@ obj-$(CONFIG_FTRACE_SYSCALLS)  += ftrace.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 
 ifdef CONFIG_64BIT
-obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o perf_cpum_sf.o \
+						perf_cpum_cf_events.o
 obj-y				+= runtime_instr.o cache.o
 endif
 
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index e030d2bdec1b..db02052bd137 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -286,8 +286,8 @@ asmlinkage long sys32_getegid16(void)
 }
 
 #ifdef CONFIG_SYSVIPC
-COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
-		unsigned long, third, compat_uptr_t, ptr)
+COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
+		compat_ulong_t, third, compat_uptr_t, ptr)
 {
 	if (call >> 16)		/* hack for backward compatibility */
 		return -EINVAL;
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 95e7ba0fbb7e..8b84bc373e94 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -412,8 +412,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
 		regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
 	} else {
 		regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
-		err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
-				  (u16 __force __user *)(frame->retcode));
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
+			       (u16 __force __user *)(frame->retcode)))
+			goto give_sigsegv;
 	}
 
 	/* Set up backchain. */
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 9cb1b975b353..59c8efce1b99 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1412,3 +1412,14 @@ ENTRY(sys_finit_module_wrapper)
 	llgtr	%r3,%r3			# const char __user *
 	lgfr	%r4,%r4			# int
 	jg	sys_finit_module
+
+ENTRY(sys_sched_setattr_wrapper)
+	lgfr	%r2,%r2			# pid_t
+	llgtr	%r3,%r3			# struct sched_attr __user *
+	jg	sys_sched_setattr
+
+ENTRY(sys_sched_getattr_wrapper)
+	lgfr	%r2,%r2			# pid_t
+	llgtr	%r3,%r3			# const char __user *
+	llgfr	%r3,%r3			# unsigned int
+	jg	sys_sched_getattr
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index e5b43c97a834..384e609b4711 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -74,7 +74,7 @@ _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 	.endm
 
 	.macro LPP newpp
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
 	jz	.+8
 	.insn	s,0xb2800000,\newpp
@@ -82,7 +82,7 @@ _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 	.endm
 
 	.macro	HANDLE_SIE_INTERCEPT scratch,reason
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jnz	.+62
 	lgr	\scratch,%r9
@@ -946,7 +946,7 @@ cleanup_idle_insn:
 	.quad	__critical_end - __critical_start
 
 
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 /*
  * sie64a calling convention:
  * %r2 pointer to sie control block
@@ -975,7 +975,7 @@ sie_done:
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 # some program checks are suppressing. C code (e.g. do_protection_exception)
 # will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions beween sie64a and sie_done should not cause program
+# instructions between sie64a and sie_done should not cause program
 # interrupts. So lets use a nop (47 00 00 00) as a landing pad.
 # See also HANDLE_SIE_INTERCEPT
 rewind_pad:
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index b9e25ae2579c..d7c00507568a 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -59,7 +59,7 @@ ENTRY(startup_continue)
 	.quad	0			# cr12: tracing off
 	.quad	0			# cr13: home space segment table
 	.quad	0xc0000000		# cr14: machine check handling off
-	.quad	0			# cr15: linkage stack operations
+	.quad	.Llinkage_stack		# cr15: linkage stack operations
 .Lpcmsk:.quad	0x0000000180000000
 .L4malign:.quad 0xffffffffffc00000
 .Lscan2g:.quad	0x80000000 + 0x20000 - 8	# 2GB + 128K - 8
@@ -67,12 +67,15 @@ ENTRY(startup_continue)
 .Lparmaddr:
 	.quad	PARMAREA
 	.align	64
-.Lduct: .long	0,0,0,0,.Lduald,0,0,0
+.Lduct: .long	0,.Laste,.Laste,0,.Lduald,0,0,0
 	.long	0,0,0,0,0,0,0,0
+.Laste:	.quad	0,0xffffffffffffffff,0,0,0,0,0,0
 	.align	128
 .Lduald:.rept	8
 	.long	0x80000000,0,0,0	# invalid access-list entries
 	.endr
+.Llinkage_stack:
+	.long	0,0,0x89000000,0,0,0,0x8a000000,0
 
 ENTRY(_ehead)
 
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1105502bf6e9..f51214c04858 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -680,6 +680,7 @@ static int __init cpumf_pmu_init(void)
 		goto out;
 	}
 
+	cpumf_pmu.attr_groups = cpumf_cf_event_group();
 	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
 	if (rc) {
 		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 000000000000..4554a4bae39e
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,322 @@
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+
+static struct attribute *cpumcf_pmu_event_attr[] = {
+	CPUMF_EVENT_PTR(cf, CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+	NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+	NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumcf_pmu_event_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+					    struct attribute **b)
+{
+	struct attribute **new;
+	int j, i;
+
+	for (j = 0; a[j]; j++)
+		;
+	for (i = 0; b[i]; i++)
+		j++;
+	j++;
+
+	new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+	if (!new)
+		return NULL;
+	j = 0;
+	for (i = 0; a[i]; i++)
+		new[j++] = a[i];
+	for (i = 0; b[i]; i++)
+		new[j++] = b[i];
+	new[j] = NULL;
+
+	return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+	struct attribute **combined, **model;
+	struct cpuid cpu_id;
+
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x2097:
+	case 0x2098:
+		model = cpumcf_z10_pmu_event_attr;
+		break;
+	case 0x2817:
+	case 0x2818:
+		model = cpumcf_z196_pmu_event_attr;
+		break;
+	case 0x2827:
+	case 0x2828:
+		model = cpumcf_zec12_pmu_event_attr;
+		break;
+	default:
+		model = NULL;
+		break;
+	};
+
+	if (!model)
+		goto out;
+
+	combined = merge_attr(cpumcf_pmu_event_attr, model);
+	if (combined)
+		cpumsf_pmu_events_group.attrs = combined;
+out:
+	return cpumsf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 000000000000..6c0d29827cb6
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,1641 @@
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT	"cpum_sf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT	1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE	((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET	(CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+	return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account.  Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size		Buffer characteristics
+ * ---------------------------------------------------
+ *	 64KB		    ==	  16 pages (4KB per page)
+ *				   1 page  for SDB-tables
+ *				  15 pages for SDBs
+ *
+ *  32MB		    ==	8192 pages (4KB per page)
+ *				  16 pages for SDB-tables
+ *				8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+	unsigned long	 *sdbt;	    /* Sample-data-block-table origin */
+	/* buffer characteristics (required for buffer increments) */
+	unsigned long  num_sdb;	    /* Number of sample-data-blocks */
+	unsigned long num_sdbt;	    /* Number of sample-data-block-tables */
+	unsigned long	 *tail;	    /* last sample-data-block-table */
+};
+
+struct cpu_hw_sf {
+	/* CPU-measurement sampling information block */
+	struct hws_qsi_info_block qsi;
+	/* CPU-measurement sampling control block */
+	struct hws_lsctl_request_block lsctl;
+	struct sf_buffer sfb;	    /* Sampling buffer */
+	unsigned int flags;	    /* Status flags */
+	struct perf_event *event;   /* Scheduled perf event */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+	struct hws_lsctl_request_block sreq;
+
+	memset(&sreq, 0, sizeof(sreq));
+	return lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+	return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+	unsigned long *sdbt, *curr;
+
+	if (!sfb->sdbt)
+		return;
+
+	sdbt = sfb->sdbt;
+	curr = sdbt;
+
+	/* Free the SDBT after all SDBs are processed... */
+	while (1) {
+		if (!*curr || !sdbt)
+			break;
+
+		/* Process table-link entries */
+		if (is_link_entry(curr)) {
+			curr = get_next_sdbt(curr);
+			if (sdbt)
+				free_page((unsigned long) sdbt);
+
+			/* If the origin is reached, sampling buffer is freed */
+			if (curr == sfb->sdbt)
+				break;
+			else
+				sdbt = curr;
+		} else {
+			/* Process SDB pointer */
+			if (*curr) {
+				free_page(*curr);
+				curr++;
+			}
+		}
+	}
+
+	debug_sprintf_event(sfdbg, 5,
+			    "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+	memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+	unsigned long sdb, *trailer;
+
+	/* Allocate and initialize sample-data-block */
+	sdb = get_zeroed_page(gfp_flags);
+	if (!sdb)
+		return -ENOMEM;
+	trailer = trailer_entry_ptr(sdb);
+	*trailer = SDB_TE_ALERT_REQ_MASK;
+
+	/* Link SDB into the sample-data-block-table */
+	*sdbt = sdb;
+
+	return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ *	      sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+				   unsigned long num_sdb, gfp_t gfp_flags)
+{
+	int i, rc;
+	unsigned long *new, *tail;
+
+	if (!sfb->sdbt || !sfb->tail)
+		return -EINVAL;
+
+	if (!is_link_entry(sfb->tail))
+		return -EINVAL;
+
+	/* Append to the existing sampling buffer, overwriting the table-link
+	 * register.
+	 * The tail variables always points to the "tail" (last and table-link)
+	 * entry in an SDB-table.
+	 */
+	tail = sfb->tail;
+
+	/* Do a sanity check whether the table-link entry points to
+	 * the sampling buffer origin.
+	 */
+	if (sfb->sdbt != get_next_sdbt(tail)) {
+		debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+				    "sampling buffer is not linked: origin=%p"
+				    "tail=%p\n",
+				    (void *) sfb->sdbt, (void *) tail);
+		return -EINVAL;
+	}
+
+	/* Allocate remaining SDBs */
+	rc = 0;
+	for (i = 0; i < num_sdb; i++) {
+		/* Allocate a new SDB-table if it is full. */
+		if (require_table_link(tail)) {
+			new = (unsigned long *) get_zeroed_page(gfp_flags);
+			if (!new) {
+				rc = -ENOMEM;
+				break;
+			}
+			sfb->num_sdbt++;
+			/* Link current page to tail of chain */
+			*tail = (unsigned long)(void *) new + 1;
+			tail = new;
+		}
+
+		/* Allocate a new sample-data-block.
+		 * If there is not enough memory, stop the realloc process
+		 * and simply use what was allocated.  If this is a temporary
+		 * issue, a new realloc call (if required) might succeed.
+		 */
+		rc = alloc_sample_data_block(tail, gfp_flags);
+		if (rc)
+			break;
+		sfb->num_sdb++;
+		tail++;
+	}
+
+	/* Link sampling buffer to its origin */
+	*tail = (unsigned long) sfb->sdbt + 1;
+	sfb->tail = tail;
+
+	debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
+			    " settings: sdbt=%lu sdb=%lu\n",
+			    sfb->num_sdbt, sfb->num_sdb);
+	return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB).  For each allocation,
+ * a 4K page is used.  The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+	int rc;
+
+	if (sfb->sdbt)
+		return -EINVAL;
+
+	/* Allocate the sample-data-block-table origin */
+	sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+	if (!sfb->sdbt)
+		return -ENOMEM;
+	sfb->num_sdb = 0;
+	sfb->num_sdbt = 1;
+
+	/* Link the table origin to point to itself to prepare for
+	 * realloc_sampling_buffer() invocation.
+	 */
+	sfb->tail = sfb->sdbt;
+	*sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+
+	/* Allocate requested number of sample-data-blocks */
+	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+	if (rc) {
+		free_sampling_buffer(sfb);
+		debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
+			"realloc_sampling_buffer failed with rc=%i\n", rc);
+	} else
+		debug_sprintf_event(sfdbg, 4,
+			"alloc_sampling_buffer: tear=%p dear=%p\n",
+			sfb->sdbt, (void *) *sfb->sdbt);
+	return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+	struct hws_qsi_info_block si;
+
+	CPUM_SF_MIN_SDB = min;
+	CPUM_SF_MAX_SDB = max;
+
+	memset(&si, 0, sizeof(si));
+	if (!qsi(&si))
+		CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+	return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+				    : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+					struct hw_perf_event *hwc)
+{
+	if (!sfb->sdbt)
+		return SFB_ALLOC_REG(hwc);
+	if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+		return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+	return 0;
+}
+
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	return sfb_pending_allocs(sfb, hwc) > 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	/* Limit the number of SDBs to not exceed the maximum */
+	num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+	if (num)
+		SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	SFB_ALLOC_REG(hwc) = 0;
+	sfb_account_allocs(num, hwc);
+}
+
+static size_t event_sample_size(struct hw_perf_event *hwc)
+{
+	struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+	size_t sample_size;
+
+	/* The sample size depends on the sampling function: The basic-sampling
+	 * function must be always enabled, diagnostic-sampling function is
+	 * optional.
+	 */
+	sample_size = sfr->bsdes;
+	if (SAMPL_DIAG_MODE(hwc))
+		sample_size += sfr->dsdes;
+
+	return sample_size;
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+	if (cpuhw->sfb.sdbt)
+		free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+	unsigned long n_sdb, freq, factor;
+	size_t sfr_size, sample_size;
+	struct sf_raw_sample *sfr;
+
+	/* Allocate raw sample buffer
+	 *
+	 *    The raw sample buffer is used to temporarily store sampling data
+	 *    entries for perf raw sample processing.  The buffer size mainly
+	 *    depends on the size of diagnostic-sampling data entries which is
+	 *    machine-specific.  The exact size calculation includes:
+	 *	1. The first 4 bytes of diagnostic-sampling data entries are
+	 *	   already reflected in the sf_raw_sample structure.  Subtract
+	 *	   these bytes.
+	 *	2. The perf raw sample data must be 8-byte aligned (u64) and
+	 *	   perf's internal data size must be considered too.  So add
+	 *	   an additional u32 for correct alignment and subtract before
+	 *	   allocating the buffer.
+	 *	3. Store the raw sample buffer pointer in the perf event
+	 *	   hardware structure.
+	 */
+	sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
+			 sizeof(u32), sizeof(u64));
+	sfr_size -= sizeof(u32);
+	sfr = kzalloc(sfr_size, GFP_KERNEL);
+	if (!sfr)
+		return -ENOMEM;
+	sfr->size = sfr_size;
+	sfr->bsdes = cpuhw->qsi.bsdes;
+	sfr->dsdes = cpuhw->qsi.dsdes;
+	RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
+
+	/* Calculate sampling buffers using 4K pages
+	 *
+	 *    1. Determine the sample data size which depends on the used
+	 *	 sampling functions, for example, basic-sampling or
+	 *	 basic-sampling with diagnostic-sampling.
+	 *
+	 *    2. Use the sampling frequency as input.  The sampling buffer is
+	 *	 designed for almost one second.  This can be adjusted through
+	 *	 the "factor" variable.
+	 *	 In any case, alloc_sampling_buffer() sets the Alert Request
+	 *	 Control indicator to trigger a measurement-alert to harvest
+	 *	 sample-data-blocks (sdb).
+	 *
+	 *    3. Compute the number of sample-data-blocks and ensure a minimum
+	 *	 of CPUM_SF_MIN_SDB.  Also ensure the upper limit does not
+	 *	 exceed a "calculated" maximum.  The symbolic maximum is
+	 *	 designed for basic-sampling only and needs to be increased if
+	 *	 diagnostic-sampling is active.
+	 *	 See also the remarks for these symbolic constants.
+	 *
+	 *    4. Compute the number of sample-data-block-tables (SDBT) and
+	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+	 *	 to 511 SDBs).
+	 */
+	sample_size = event_sample_size(hwc);
+	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+	factor = 1;
+	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
+	if (n_sdb < CPUM_SF_MIN_SDB)
+		n_sdb = CPUM_SF_MIN_SDB;
+
+	/* If there is already a sampling buffer allocated, it is very likely
+	 * that the sampling facility is enabled too.  If the event to be
+	 * initialized requires a greater sampling buffer, the allocation must
+	 * be postponed.  Changing the sampling buffer requires the sampling
+	 * facility to be in the disabled state.  So, account the number of
+	 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+	 * before the event is started.
+	 */
+	sfb_init_allocs(n_sdb, hwc);
+	if (sf_buffer_available(cpuhw))
+		return 0;
+
+	debug_sprintf_event(sfdbg, 3,
+			    "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
+			    " sample_size=%lu cpuhw=%p\n",
+			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+			    sample_size, cpuhw);
+
+	return alloc_sampling_buffer(&cpuhw->sfb,
+				     sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+				 unsigned long min)
+{
+	return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+	/* Use a percentage-based approach to extend the sampling facility
+	 * buffer.  Accept up to 5% sample data loss.
+	 * Vary the extents between 1% to 5% of the current number of
+	 * sample-data-blocks.
+	 */
+	if (ratio <= 5)
+		return 0;
+	if (ratio <= 25)
+		return min_percent(1, base, 1);
+	if (ratio <= 50)
+		return min_percent(1, base, 1);
+	if (ratio <= 75)
+		return min_percent(2, base, 2);
+	if (ratio <= 100)
+		return min_percent(3, base, 3);
+	if (ratio <= 250)
+		return min_percent(4, base, 4);
+
+	return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+				  struct hw_perf_event *hwc)
+{
+	unsigned long ratio, num;
+
+	if (!OVERFLOW_REG(hwc))
+		return;
+
+	/* The sample_overflow contains the average number of sample data
+	 * that has been lost because sample-data-blocks were full.
+	 *
+	 * Calculate the total number of sample data entries that has been
+	 * discarded.  Then calculate the ratio of lost samples to total samples
+	 * per second in percent.
+	 */
+	ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+			     sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+	/* Compute number of sample-data-blocks */
+	num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+	if (num)
+		sfb_account_allocs(num, hwc);
+
+	debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
+			    " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+	OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb:	Sampling buffer structure (for local CPU)
+ * @hwc:	Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ *	      change the sampling buffer structure.  Do not call this function
+ *	      when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	unsigned long num, num_old;
+	int rc;
+
+	num = sfb_pending_allocs(sfb, hwc);
+	if (!num)
+		return;
+	num_old = sfb->num_sdb;
+
+	/* Disable the sampling facility to reset any states and also
+	 * clear pending measurement alerts.
+	 */
+	sf_disable();
+
+	/* Extend the sampling buffer.
+	 * This memory allocation typically happens in an atomic context when
+	 * called by perf.  Because this is a reallocation, it is fine if the
+	 * new SDB-request cannot be satisfied immediately.
+	 */
+	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+	if (rc)
+		debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
+				    "failed with rc=%i\n", rc);
+
+	if (sfb_has_pending_allocs(sfb, hwc))
+		debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+				    "req=%lu alloc=%lu remaining=%lu\n",
+				    num, sfb->num_sdb - num_old,
+				    sfb_pending_allocs(sfb, hwc));
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT      0
+#define PMC_RELEASE   1
+#define PMC_FAILURE   2
+static void setup_pmc_cpu(void *flags)
+{
+	int err;
+	struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
+
+	err = 0;
+	switch (*((int *) flags)) {
+	case PMC_INIT:
+		memset(cpusf, 0, sizeof(*cpusf));
+		err = qsi(&cpusf->qsi);
+		if (err)
+			break;
+		cpusf->flags |= PMU_F_RESERVED;
+		err = sf_disable();
+		if (err)
+			pr_err("Switching off the sampling facility failed "
+			       "with rc=%i\n", err);
+		debug_sprintf_event(sfdbg, 5,
+				    "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+		break;
+	case PMC_RELEASE:
+		cpusf->flags &= ~PMU_F_RESERVED;
+		err = sf_disable();
+		if (err) {
+			pr_err("Switching off the sampling facility failed "
+			       "with rc=%i\n", err);
+		} else
+			deallocate_buffers(cpusf);
+		debug_sprintf_event(sfdbg, 5,
+				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+		break;
+	}
+	if (err)
+		*((int *) flags) |= PMC_FAILURE;
+}
+
+static void release_pmc_hardware(void)
+{
+	int flags = PMC_RELEASE;
+
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	perf_release_sampling();
+}
+
+static int reserve_pmc_hardware(void)
+{
+	int flags = PMC_INIT;
+	int err;
+
+	err = perf_reserve_sampling();
+	if (err)
+		return err;
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	if (flags & PMC_FAILURE) {
+		release_pmc_hardware();
+		return -ENODEV;
+	}
+	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+	return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	/* Free raw sample buffer */
+	if (RAWSAMPLE_REG(&event->hw))
+		kfree((void *) RAWSAMPLE_REG(&event->hw));
+
+	/* Release PMC if this is the last perf event */
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+	hwc->sample_period = period;
+	hwc->last_period = hwc->sample_period;
+	local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static void hw_reset_registers(struct hw_perf_event *hwc,
+			       unsigned long *sdbt_origin)
+{
+	struct sf_raw_sample *sfr;
+
+	/* (Re)set to first sample-data-block-table */
+	TEAR_REG(hwc) = (unsigned long) sdbt_origin;
+
+	/* (Re)set raw sampling buffer register */
+	sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+	memset(&sfr->basic, 0, sizeof(sfr->basic));
+	memset(&sfr->diag, 0, sfr->dsdes);
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+				   unsigned long rate)
+{
+	return clamp_t(unsigned long, rate,
+		       si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct cpu_hw_sf *cpuhw;
+	struct hws_qsi_info_block si;
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long rate;
+	int cpu, err;
+
+	/* Reserve CPU-measurement sampling facility */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		goto out;
+
+	/* Access per-CPU sampling information (query sampling info) */
+	/*
+	 * The event->cpu value can be -1 to count on every CPU, for example,
+	 * when attaching to a task.  If this is specified, use the query
+	 * sampling info from the current CPU, otherwise use event->cpu to
+	 * retrieve the per-CPU information.
+	 * Later, cpuhw indicates whether to allocate sampling buffers for a
+	 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+	 */
+	memset(&si, 0, sizeof(si));
+	cpuhw = NULL;
+	if (event->cpu == -1)
+		qsi(&si);
+	else {
+		/* Event is pinned to a particular CPU, retrieve the per-CPU
+		 * sampling structure for accessing the CPU-specific QSI.
+		 */
+		cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+		si = cpuhw->qsi;
+	}
+
+	/* Check sampling facility authorization and, if not authorized,
+	 * fall back to other PMUs.  It is safe to check any CPU because
+	 * the authorization is identical for all configured CPUs.
+	 */
+	if (!si.as) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	/* Always enable basic sampling */
+	SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+	/* Check if diagnostic sampling is requested.  Deny if the required
+	 * sampling authorization is missing.
+	 */
+	if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+		if (!si.ad) {
+			err = -EPERM;
+			goto out;
+		}
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+	}
+
+	/* Check and set other sampling flags */
+	if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
+
+	/* The sampling information (si) contains information about the
+	 * min/max sampling intervals and the CPU speed.  So calculate the
+	 * correct sampling interval and avoid the whole period adjust
+	 * feedback loop.
+	 */
+	rate = 0;
+	if (attr->freq) {
+		rate = freq_to_sample_rate(&si, attr->sample_freq);
+		rate = hw_limit_rate(&si, rate);
+		attr->freq = 0;
+		attr->sample_period = rate;
+	} else {
+		/* The min/max sampling rates specifies the valid range
+		 * of sample periods.  If the specified sample period is
+		 * out of range, limit the period to the range boundary.
+		 */
+		rate = hw_limit_rate(&si, hwc->sample_period);
+
+		/* The perf core maintains a maximum sample rate that is
+		 * configurable through the sysctl interface.  Ensure the
+		 * sampling rate does not exceed this value.  This also helps
+		 * to avoid throttling when pushing samples with
+		 * perf_event_overflow().
+		 */
+		if (sample_rate_to_freq(&si, rate) >
+		      sysctl_perf_event_sample_rate) {
+			err = -EINVAL;
+			debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
+			goto out;
+		}
+	}
+	SAMPL_RATE(hwc) = rate;
+	hw_init_period(hwc, SAMPL_RATE(hwc));
+
+	/* Initialize sample data overflow accounting */
+	hwc->extra_reg.reg = REG_OVERFLOW;
+	OVERFLOW_REG(hwc) = 0;
+
+	/* Allocate the per-CPU sampling buffer using the CPU information
+	 * from the event.  If the event is not pinned to a particular
+	 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+	 * buffers for each online CPU.
+	 */
+	if (cpuhw)
+		/* Event is pinned to a particular CPU */
+		err = allocate_buffers(cpuhw, hwc);
+	else {
+		/* Event is not pinned, allocate sampling buffer on
+		 * each online CPU
+		 */
+		for_each_online_cpu(cpu) {
+			cpuhw = &per_cpu(cpu_hw_sf, cpu);
+			err = allocate_buffers(cpuhw, hwc);
+			if (err)
+				break;
+		}
+	}
+out:
+	return err;
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	/* No support for taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+		if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+		    (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+			return -ENOENT;
+		break;
+	case PERF_TYPE_HARDWARE:
+		/* Support sampling of CPU cycles in addition to the
+		 * counter facility.  However, the counter facility
+		 * is more precise and, hence, restrict this PMU to
+		 * sampling events only.
+		 */
+		if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+			return -ENOENT;
+		if (!is_sampling_event(event))
+			return -ENOENT;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	/* Check online status of the CPU to which the event is pinned */
+	if (event->cpu >= nr_cpumask_bits ||
+	    (event->cpu >= 0 && !cpu_online(event->cpu)))
+		return -ENODEV;
+
+	/* Force reset of idle/hv excludes regardless of what the
+	 * user requested.
+	 */
+	if (event->attr.exclude_hv)
+		event->attr.exclude_hv = 0;
+	if (event->attr.exclude_idle)
+		event->attr.exclude_idle = 0;
+
+	err = __hw_perf_event_init(event);
+	if (unlikely(err))
+		if (event->destroy)
+			event->destroy(event);
+	return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	struct hw_perf_event *hwc;
+	int err;
+
+	if (cpuhw->flags & PMU_F_ENABLED)
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Check whether to extent the sampling buffer.
+	 *
+	 * Two conditions trigger an increase of the sampling buffer for a
+	 * perf event:
+	 *    1. Postponed buffer allocations from the event initialization.
+	 *    2. Sampling overflows that contribute to pending allocations.
+	 *
+	 * Note that the extend_sampling_buffer() function disables the sampling
+	 * facility, but it can be fully re-enabled using sampling controls that
+	 * have been saved in cpumsf_pmu_disable().
+	 */
+	if (cpuhw->event) {
+		hwc = &cpuhw->event->hw;
+		/* Account number of overflow-designated buffer extents */
+		sfb_account_overflows(cpuhw, hwc);
+		if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+			extend_sampling_buffer(&cpuhw->sfb, hwc);
+	}
+
+	/* (Re)enable the PMU and sampling facility */
+	cpuhw->flags |= PMU_F_ENABLED;
+	barrier();
+
+	err = lsctl(&cpuhw->lsctl);
+	if (err) {
+		cpuhw->flags &= ~PMU_F_ENABLED;
+		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+			1, err);
+		return;
+	}
+
+	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
+			    "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
+			    cpuhw->lsctl.ed, cpuhw->lsctl.cd,
+			    (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	struct hws_lsctl_request_block inactive;
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!(cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Switch off sampling activation control */
+	inactive = cpuhw->lsctl;
+	inactive.cs = 0;
+	inactive.cd = 0;
+
+	err = lsctl(&inactive);
+	if (err) {
+		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+			2, err);
+		return;
+	}
+
+	/* Save state of TEAR and DEAR register contents */
+	if (!qsi(&si)) {
+		/* TEAR/DEAR values are valid only if the sampling facility is
+		 * enabled.  Note that cpumsf_pmu_disable() might be called even
+		 * for a disabled sampling facility because cpumsf_pmu_enable()
+		 * controls the enable/disable state.
+		 */
+		if (si.es) {
+			cpuhw->lsctl.tear = si.tear;
+			cpuhw->lsctl.dear = si.dear;
+		}
+	} else
+		debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
+				    "qsi() failed with err=%i\n", err);
+
+	cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_exclude_event() - Filter event
+ * @event:	The perf event
+ * @regs:	pt_regs structure
+ * @sde_regs:	Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+			      struct perf_sf_sde_regs *sde_regs)
+{
+	if (event->attr.exclude_user && user_mode(regs))
+		return 1;
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return 1;
+	if (event->attr.exclude_guest && sde_regs->in_guest)
+		return 1;
+	if (event->attr.exclude_host && !sde_regs->in_guest)
+		return 1;
+	return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event:	The perf event
+ * @sample:	Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample.  The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows.  If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
+{
+	int overflow;
+	struct pt_regs regs;
+	struct perf_sf_sde_regs *sde_regs;
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+
+	/* Setup perf sample */
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	raw.size = sfr->size;
+	raw.data = sfr;
+	data.raw = &raw;
+
+	/* Setup pt_regs to look like an CPU-measurement external interrupt
+	 * using the Program Request Alert code.  The regs.int_parm_long
+	 * field which is unused contains additional sample-data-entry related
+	 * indicators.
+	 */
+	memset(&regs, 0, sizeof(regs));
+	regs.int_code = 0x1407;
+	regs.int_parm = CPU_MF_INT_SF_PRA;
+	sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+	regs.psw.addr = sfr->basic.ia;
+	if (sfr->basic.T)
+		regs.psw.mask |= PSW_MASK_DAT;
+	if (sfr->basic.W)
+		regs.psw.mask |= PSW_MASK_WAIT;
+	if (sfr->basic.P)
+		regs.psw.mask |= PSW_MASK_PSTATE;
+	switch (sfr->basic.AS) {
+	case 0x0:
+		regs.psw.mask |= PSW_ASC_PRIMARY;
+		break;
+	case 0x1:
+		regs.psw.mask |= PSW_ASC_ACCREG;
+		break;
+	case 0x2:
+		regs.psw.mask |= PSW_ASC_SECONDARY;
+		break;
+	case 0x3:
+		regs.psw.mask |= PSW_ASC_HOME;
+		break;
+	}
+
+	/* The host-program-parameter (hpp) contains the sie control
+	 * block that is set by sie64a() in entry64.S.	Check if hpp
+	 * refers to a valid control block and set sde_regs flags
+	 * accordingly.  This would allow to use hpp values for other
+	 * purposes too.
+	 * For now, simply use a non-zero value as guest indicator.
+	 */
+	if (sfr->basic.hpp)
+		sde_regs->in_guest = 1;
+
+	overflow = 0;
+	if (perf_exclude_event(event, &regs, sde_regs))
+		goto out;
+	if (perf_event_overflow(event, &data, &regs)) {
+		overflow = 1;
+		event->pmu->stop(event, 0);
+	}
+	perf_event_update_userpage(event);
+out:
+	return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+	local64_add(count, &event->count);
+}
+
+static int sample_format_is_valid(struct hws_combined_entry *sample,
+				   unsigned int flags)
+{
+	if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+		/* Only basic-sampling data entries with data-entry-format
+		 * version of 0x0001 can be processed.
+		 */
+		if (sample->basic.def != 0x0001)
+			return 0;
+	if (flags & PERF_CPUM_SF_DIAG_MODE)
+		/* The data-entry-format number of diagnostic-sampling data
+		 * entries can vary.  Because diagnostic data is just passed
+		 * through, do only a sanity check on the DEF.
+		 */
+		if (sample->diag.def < 0x8001)
+			return 0;
+	return 1;
+}
+
+static int sample_is_consistent(struct hws_combined_entry *sample,
+				unsigned long flags)
+{
+	/* This check applies only to basic-sampling data entries of potentially
+	 * combined-sampling data entries.  Invalid entries cannot be processed
+	 * by the PMU and, thus, do not deliver an associated
+	 * diagnostic-sampling data entry.
+	 */
+	if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
+		return 0;
+	/*
+	 * Samples are skipped, if they are invalid or for which the
+	 * instruction address is not predictable, i.e., the wait-state bit is
+	 * set.
+	 */
+	if (sample->basic.I || sample->basic.W)
+		return 0;
+	return 1;
+}
+
+static void reset_sample_slot(struct hws_combined_entry *sample,
+			      unsigned long flags)
+{
+	if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+		sample->basic.def = 0;
+	if (flags & PERF_CPUM_SF_DIAG_MODE)
+		sample->diag.def = 0;
+}
+
+static void sfr_store_sample(struct sf_raw_sample *sfr,
+			     struct hws_combined_entry *sample)
+{
+	if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
+		sfr->basic = sample->basic;
+	if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
+		memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
+}
+
+static void debug_sample_entry(struct hws_combined_entry *sample,
+			       struct hws_trailer_entry *te,
+			       unsigned long flags)
+{
+	debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
+			    "sampling data entry: te->f=%i basic.def=%04x (%p)"
+			    " diag.def=%04x (%p)\n", te->f,
+			    sample->basic.def, &sample->basic,
+			    (flags & PERF_CPUM_SF_DIAG_MODE)
+					? sample->diag.def : 0xFFFF,
+			    (flags & PERF_CPUM_SF_DIAG_MODE)
+					?  &sample->diag : NULL);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event:	The perf event
+ * @sdbt:	Sample-data-block table
+ * @overflow:	Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem.  Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries.  A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry.	The sampling function is determined by the flags in the perf
+ * event hardware structure.  The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+			       unsigned long long *overflow)
+{
+	unsigned long flags = SAMPL_FLAGS(&event->hw);
+	struct hws_combined_entry *sample;
+	struct hws_trailer_entry *te;
+	struct sf_raw_sample *sfr;
+	size_t sample_size;
+
+	/* Prepare and initialize raw sample data */
+	sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
+	sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
+
+	sample_size = event_sample_size(&event->hw);
+	te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+	sample = (struct hws_combined_entry *) *sdbt;
+	while ((unsigned long *) sample < (unsigned long *) te) {
+		/* Check for an empty sample */
+		if (!sample->basic.def)
+			break;
+
+		/* Update perf event period */
+		perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+		/* Check sampling data entry */
+		if (sample_format_is_valid(sample, flags)) {
+			/* If an event overflow occurred, the PMU is stopped to
+			 * throttle event delivery.  Remaining sample data is
+			 * discarded.
+			 */
+			if (!*overflow) {
+				if (sample_is_consistent(sample, flags)) {
+					/* Deliver sample data to perf */
+					sfr_store_sample(sfr, sample);
+					*overflow = perf_push_sample(event, sfr);
+				}
+			} else
+				/* Count discarded samples */
+				*overflow += 1;
+		} else {
+			debug_sample_entry(sample, te, flags);
+			/* Sample slot is not yet written or other record.
+			 *
+			 * This condition can occur if the buffer was reused
+			 * from a combined basic- and diagnostic-sampling.
+			 * If only basic-sampling is then active, entries are
+			 * written into the larger diagnostic entries.
+			 * This is typically the case for sample-data-blocks
+			 * that are not full.  Stop processing if the first
+			 * invalid format was detected.
+			 */
+			if (!te->f)
+				break;
+		}
+
+		/* Reset sample slot and advance to next sample */
+		reset_sample_slot(sample, flags);
+		sample += sample_size;
+	}
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event:	The perf event
+ * @flush_all:	Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed.	Specify the flash_all flag
+ * to also walk through partially filled sample-data-blocks.  It is ignored
+ * if PERF_CPUM_SF_FULL_BLOCKS is set.	The PERF_CPUM_SF_FULL_BLOCKS flag
+ * enforces the processing of full sample-data-blocks only (trailer entries
+ * with the block-full-indicator bit set).
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hws_trailer_entry *te;
+	unsigned long *sdbt;
+	unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+	int done;
+
+	if (flush_all && SDB_FULL_BLOCKS(hwc))
+		flush_all = 0;
+
+	sdbt = (unsigned long *) TEAR_REG(hwc);
+	done = event_overflow = sampl_overflow = num_sdb = 0;
+	while (!done) {
+		/* Get the trailer entry of the sample-data-block */
+		te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+		/* Leave loop if no more work to do (block full indicator) */
+		if (!te->f) {
+			done = 1;
+			if (!flush_all)
+				break;
+		}
+
+		/* Check the sample overflow count */
+		if (te->overflow)
+			/* Account sample overflows and, if a particular limit
+			 * is reached, extend the sampling buffer.
+			 * For details, see sfb_account_overflows().
+			 */
+			sampl_overflow += te->overflow;
+
+		/* Timestamps are valid for full sample-data-blocks only */
+		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
+				    "overflow=%llu timestamp=0x%llx\n",
+				    sdbt, te->overflow,
+				    (te->f) ? trailer_timestamp(te) : 0ULL);
+
+		/* Collect all samples from a single sample-data-block and
+		 * flag if an (perf) event overflow happened.  If so, the PMU
+		 * is stopped and remaining samples will be discarded.
+		 */
+		hw_collect_samples(event, sdbt, &event_overflow);
+		num_sdb++;
+
+		/* Reset trailer (using compare-double-and-swap) */
+		do {
+			te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+			te_flags |= SDB_TE_ALERT_REQ_MASK;
+		} while (!cmpxchg_double(&te->flags, &te->overflow,
+					 te->flags, te->overflow,
+					 te_flags, 0ULL));
+
+		/* Advance to next sample-data-block */
+		sdbt++;
+		if (is_link_entry(sdbt))
+			sdbt = get_next_sdbt(sdbt);
+
+		/* Update event hardware registers */
+		TEAR_REG(hwc) = (unsigned long) sdbt;
+
+		/* Stop processing sample-data if all samples of the current
+		 * sample-data-block were flushed even if it was not full.
+		 */
+		if (flush_all && done)
+			break;
+
+		/* If an event overflow happened, discard samples by
+		 * processing any remaining sample-data-blocks.
+		 */
+		if (event_overflow)
+			flush_all = 1;
+	}
+
+	/* Account sample overflows in the event hardware structure */
+	if (sampl_overflow)
+		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+						 sampl_overflow, 1 + num_sdb);
+	if (sampl_overflow || event_overflow)
+		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+				    "overflow stats: sample=%llu event=%llu\n",
+				    sampl_overflow, event_overflow);
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+	/* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	perf_pmu_disable(event->pmu);
+	event->hw.state = 0;
+	cpuhw->lsctl.cs = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.cd = 1;
+	perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	perf_pmu_disable(event->pmu);
+	cpuhw->lsctl.cs = 0;
+	cpuhw->lsctl.cd = 0;
+	event->hw.state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+		hw_perf_event_update(event, 1);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+	perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	int err;
+
+	if (cpuhw->flags & PMU_F_IN_USE)
+		return -EAGAIN;
+
+	if (!cpuhw->sfb.sdbt)
+		return -EINVAL;
+
+	err = 0;
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	/* Set up sampling controls.  Always program the sampling register
+	 * using the SDB-table start.  Reset TEAR_REG event hardware register
+	 * that is used by hw_perf_event_update() to store the sampling buffer
+	 * position after samples have been flushed.
+	 */
+	cpuhw->lsctl.s = 0;
+	cpuhw->lsctl.h = 1;
+	cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+	cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+	cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+	hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+
+	/* Ensure sampling functions are in the disabled state.  If disabled,
+	 * switch on sampling enable control. */
+	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+		err = -EAGAIN;
+		goto out;
+	}
+	cpuhw->lsctl.es = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.ed = 1;
+
+	/* Set in_use flag and store event */
+	event->hw.idx = 0;	  /* only one sampling event per CPU supported */
+	cpuhw->event = event;
+	cpuhw->flags |= PMU_F_IN_USE;
+
+	if (flags & PERF_EF_START)
+		cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	perf_pmu_disable(event->pmu);
+	cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+	cpuhw->lsctl.es = 0;
+	cpuhw->lsctl.ed = 0;
+	cpuhw->flags &= ~PMU_F_IN_USE;
+	cpuhw->event = NULL;
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_event_idx(struct perf_event *event)
+{
+	return event->hw.idx;
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+static struct attribute *cpumsf_pmu_events_attr[] = {
+	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
+	NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumsf_pmu_events_attr,
+};
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+static struct pmu cpumf_sampling = {
+	.pmu_enable   = cpumsf_pmu_enable,
+	.pmu_disable  = cpumsf_pmu_disable,
+
+	.event_init   = cpumsf_pmu_event_init,
+	.add	      = cpumsf_pmu_add,
+	.del	      = cpumsf_pmu_del,
+
+	.start	      = cpumsf_pmu_start,
+	.stop	      = cpumsf_pmu_stop,
+	.read	      = cpumsf_pmu_read,
+
+	.event_idx    = cpumsf_pmu_event_idx,
+	.attr_groups  = cpumsf_pmu_attr_groups,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_hw_sf *cpuhw;
+
+	if (!(alert & CPU_MF_INT_SF_MASK))
+		return;
+	inc_irq_stat(IRQEXT_CMS);
+	cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	/* Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case. */
+	if (!(cpuhw->flags & PMU_F_RESERVED))
+		return;
+
+	/* The processing below must take care of multiple alert events that
+	 * might be indicated concurrently. */
+
+	/* Program alert request */
+	if (alert & CPU_MF_INT_SF_PRA) {
+		if (cpuhw->flags & PMU_F_IN_USE)
+			hw_perf_event_update(cpuhw->event, 0);
+		else
+			WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+	}
+
+	/* Report measurement alerts only for non-PRA codes */
+	if (alert != CPU_MF_INT_SF_PRA)
+		debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+
+	/* Sampling authorization change request */
+	if (alert & CPU_MF_INT_SF_SACA)
+		qsi(&cpuhw->qsi);
+
+	/* Loss of sample data due to high-priority machine activities */
+	if (alert & CPU_MF_INT_SF_LSDA) {
+		pr_err("Sample data was lost\n");
+		cpuhw->flags |= PMU_F_ERR_LSDA;
+		sf_disable();
+	}
+
+	/* Invalid sampling buffer entry */
+	if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+		pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+		       alert);
+		cpuhw->flags |= PMU_F_ERR_IBE;
+		sf_disable();
+	}
+}
+
+static int cpumf_pmu_notifier(struct notifier_block *self,
+			      unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long) hcpu;
+	int flags;
+
+	/* Ignore the notification if no events are scheduled on the PMU.
+	 * This might be racy...
+	 */
+	if (!atomic_read(&num_events))
+		return NOTIFY_OK;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		flags = PMC_INIT;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	case CPU_DOWN_PREPARE:
+		flags = PMC_RELEASE;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+	int rc;
+	unsigned long min, max;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	if (!val || !strlen(val))
+		return -EINVAL;
+
+	/* Valid parameter values: "min,max" or "max" */
+	min = CPUM_SF_MIN_SDB;
+	max = CPUM_SF_MAX_SDB;
+	if (strchr(val, ','))
+		rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+	else
+		rc = kstrtoul(val, 10, &max);
+
+	if (min < 2 || min >= max || max > get_num_physpages())
+		rc = -EINVAL;
+	if (rc)
+		return rc;
+
+	sfb_set_limits(min, max);
+	pr_info("The sampling buffer limits have changed to: "
+		"min=%lu max=%lu (diag=x%lu)\n",
+		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+	return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_sfb_size = {
+	.set = param_set_sfb_size,
+	.get = param_get_sfb_size,
+};
+
+#define RS_INIT_FAILURE_QSI	  0x0001
+#define RS_INIT_FAILURE_BSDES	  0x0002
+#define RS_INIT_FAILURE_ALRT	  0x0003
+#define RS_INIT_FAILURE_PERF	  0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+	pr_err("Sampling facility support for perf is not available: "
+	       "reason=%04x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+		return -ENODEV;
+	}
+
+	if (si.bsdes != sizeof(struct hws_basic_entry)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+		return -EINVAL;
+	}
+
+	if (si.ad)
+		sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+
+	sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+	if (!sfdbg)
+		pr_err("Registering for s390dbf failed\n");
+	debug_register_view(sfdbg, &debug_sprintf_view);
+
+	err = register_external_interrupt(0x1407, cpumf_measurement_alert);
+	if (err) {
+		pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+		goto out;
+	}
+
+	err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+	if (err) {
+		pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+		unregister_external_interrupt(0x1407, cpumf_measurement_alert);
+		goto out;
+	}
+	perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+	return err;
+}
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 2343c218b8f9..5d2dfa31c4ef 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -1,7 +1,7 @@
 /*
  * Performance event support for s390x
  *
- *  Copyright IBM Corp. 2012
+ *  Copyright IBM Corp. 2012, 2013
  *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -16,15 +16,19 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
 #include <asm/lowcore.h>
 #include <asm/processor.h>
+#include <asm/sysinfo.h>
 
 const char *perf_pmu_name(void)
 {
 	if (cpum_cf_avail() || cpum_sf_avail())
-		return "CPU-measurement facilities (CPUMF)";
+		return "CPU-Measurement Facilities (CPU-MF)";
 	return "pmu";
 }
 EXPORT_SYMBOL(perf_pmu_name);
@@ -35,6 +39,8 @@ int perf_num_counters(void)
 
 	if (cpum_cf_avail())
 		num += PERF_CPUM_CF_MAX_CTR;
+	if (cpum_sf_avail())
+		num += PERF_CPUM_SF_MAX_CTR;
 
 	return num;
 }
@@ -54,7 +60,7 @@ static bool is_in_guest(struct pt_regs *regs)
 {
 	if (user_mode(regs))
 		return false;
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 	return instruction_pointer(regs) == (unsigned long) &sie_exit;
 #else
 	return false;
@@ -83,8 +89,31 @@ static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
 					: PERF_RECORD_MISC_GUEST_KERNEL;
 }
 
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+	struct perf_sf_sde_regs *sde_regs;
+	unsigned long flags;
+
+	sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+	if (sde_regs->in_guest)
+		flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+	else
+		flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+					: PERF_RECORD_MISC_KERNEL;
+	return flags;
+}
+
 unsigned long perf_misc_flags(struct pt_regs *regs)
 {
+	/* Check if the cpum_sf PMU has created the pt_regs structure.
+	 * In this case, perf misc flags can be easily extracted.  Otherwise,
+	 * do regular checks on the pt_regs content.
+	 */
+	if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+		if (!regs->gprs[15])
+			return perf_misc_flags_sf(regs);
+
 	if (is_in_guest(regs))
 		return perf_misc_guest_flags(regs);
 
@@ -92,27 +121,107 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
 			       : PERF_RECORD_MISC_KERNEL;
 }
 
-void perf_event_print_debug(void)
+void print_debug_cf(void)
 {
 	struct cpumf_ctr_info cf_info;
-	unsigned long flags;
-	int cpu;
-
-	if (!cpum_cf_avail())
-		return;
-
-	local_irq_save(flags);
+	int cpu = smp_processor_id();
 
-	cpu = smp_processor_id();
 	memset(&cf_info, 0, sizeof(cf_info));
 	if (!qctri(&cf_info))
 		pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
 			cpu, cf_info.cfvn, cf_info.csvn,
 			cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+	struct hws_qsi_info_block si;
+	int cpu = smp_processor_id();
 
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+		cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+		si.cpu_speed);
+
+	if (si.as)
+		pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+			" bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+			si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+	if (si.ad)
+		pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+			" dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+			si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (cpum_cf_avail())
+		print_debug_cf();
+	if (cpum_sf_avail())
+		print_debug_sf();
 	local_irq_restore(flags);
 }
 
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+	struct cpumf_ctr_info ci;
+
+	memset(&ci, 0, sizeof(ci));
+	if (qctri(&ci))
+		return;
+
+	seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+		   "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+	struct hws_qsi_info_block si;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	if (!si.as && !si.ad)
+		return;
+
+	seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+		   " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+		   si.cpu_speed);
+	if (si.as)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+			   " sample_size=%u\n", si.bsdes);
+	if (si.ad)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+			   " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+				     struct service_level *sl)
+{
+	if (cpum_cf_avail())
+		sl_print_counter(m);
+	if (cpum_sf_avail())
+		sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+	.seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+	return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
 /* See also arch/s390/kernel/traps.c */
 static unsigned long __store_trace(struct perf_callchain_entry *entry,
 				   unsigned long sp,
@@ -172,3 +281,44 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	__store_trace(entry, head, S390_lowcore.thread_info,
 		      S390_lowcore.thread_info + THREAD_SIZE);
 }
+
+/* Perf defintions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%04llx,name=%s\n",
+		       pmu_attr->id, attr->attr.name);
+}
+
+/* Reserve/release functions for sharing perf hardware */
+static DEFINE_SPINLOCK(perf_hw_owner_lock);
+static void *perf_sampling_owner;
+
+int perf_reserve_sampling(void)
+{
+	int err;
+
+	err = 0;
+	spin_lock(&perf_hw_owner_lock);
+	if (perf_sampling_owner) {
+		pr_warn("The sampling facility is already reserved by %p\n",
+			perf_sampling_owner);
+		err = -EBUSY;
+	} else
+		perf_sampling_owner = __builtin_return_address(0);
+	spin_unlock(&perf_hw_owner_lock);
+	return err;
+}
+EXPORT_SYMBOL(perf_reserve_sampling);
+
+void perf_release_sampling(void)
+{
+	spin_lock(&perf_hw_owner_lock);
+	WARN_ON(!perf_sampling_owner);
+	perf_sampling_owner = NULL;
+	spin_unlock(&perf_hw_owner_lock);
+}
+EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7ed0d4e2a435..dd145321d215 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -261,20 +261,18 @@ static inline unsigned long brk_rnd(void)
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-	unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd());
+	unsigned long ret;
 
-	if (ret < mm->brk)
-		return mm->brk;
-	return ret;
+	ret = PAGE_ALIGN(mm->brk + brk_rnd());
+	return (ret > mm->brk) ? ret : mm->brk;
 }
 
 unsigned long randomize_et_dyn(unsigned long base)
 {
-	unsigned long ret = PAGE_ALIGN(base + brk_rnd());
+	unsigned long ret;
 
 	if (!(current->flags & PF_RANDOMIZE))
 		return base;
-	if (ret < base)
-		return base;
-	return ret;
+	ret = PAGE_ALIGN(base + brk_rnd());
+	return (ret > base) ? ret : base;
 }
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index e65c91c591e8..f6be6087a0e9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -56,25 +56,26 @@ void update_cr_regs(struct task_struct *task)
 #ifdef CONFIG_64BIT
 	/* Take care of the enable/disable of transactional execution. */
 	if (MACHINE_HAS_TE) {
-		unsigned long cr[3], cr_new[3];
+		unsigned long cr, cr_new;
 
-		__ctl_store(cr, 0, 2);
-		cr_new[1] = cr[1];
+		__ctl_store(cr, 0, 0);
 		/* Set or clear transaction execution TXC bit 8. */
+		cr_new = cr | (1UL << 55);
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
-			cr_new[0] = cr[0] & ~(1UL << 55);
-		else
-			cr_new[0] = cr[0] | (1UL << 55);
+			cr_new &= ~(1UL << 55);
+		if (cr_new != cr)
+			__ctl_load(cr, 0, 0);
 		/* Set or clear transaction execution TDC bits 62 and 63. */
-		cr_new[2] = cr[2] & ~3UL;
+		__ctl_store(cr, 2, 2);
+		cr_new = cr & ~3UL;
 		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
 			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
-				cr_new[2] |= 1UL;
+				cr_new |= 1UL;
 			else
-				cr_new[2] |= 2UL;
+				cr_new |= 2UL;
 		}
-		if (memcmp(&cr_new, &cr, sizeof(cr)))
-			__ctl_load(cr_new, 0, 2);
+		if (cr_new != cr)
+			__ctl_load(cr_new, 2, 2);
 	}
 #endif
 	/* Copy user specified PER registers */
@@ -107,15 +108,11 @@ void update_cr_regs(struct task_struct *task)
 void user_enable_single_step(struct task_struct *task)
 {
 	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
-	if (task == current)
-		update_cr_regs(task);
 }
 
 void user_disable_single_step(struct task_struct *task)
 {
 	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
-	if (task == current)
-		update_cr_regs(task);
 }
 
 /*
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 3bac589844a7..9f60467938d1 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -5,7 +5,7 @@
 #ifdef CONFIG_FUNCTION_TRACER
 EXPORT_SYMBOL(_mcount);
 #endif
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
 #endif
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 4444875266ee..09e2f468f48b 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -373,7 +373,7 @@ static void __init setup_lowcore(void)
 
 	/*
 	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
-	 * restart data to the absolute zero lowcore. This is necesary if
+	 * restart data to the absolute zero lowcore. This is necessary if
 	 * PSW restart is done on an offline CPU that has lowcore zero.
 	 */
 	lc->restart_stack = (unsigned long) restart_stack;
@@ -1023,6 +1023,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_vmcoreinfo();
 	setup_lowcore();
 
+	smp_fill_possible_mask();
         cpu_init();
 	s390_init_cpu_topology();
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index dc4a53465060..a7125b62a9a6 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -59,7 +59,7 @@ enum {
 };
 
 struct pcpu {
-	struct cpu cpu;
+	struct cpu *cpu;
 	struct _lowcore *lowcore;	/* lowcore page(s) for the cpu */
 	unsigned long async_stack;	/* async stack for the cpu */
 	unsigned long panic_stack;	/* panic stack for the cpu */
@@ -159,9 +159,9 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
 {
 	int order;
 
-	set_bit(ec_bit, &pcpu->ec_mask);
-	order = pcpu_running(pcpu) ?
-		SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+	if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+		return;
+	order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
 	pcpu_sigp_retry(pcpu, order, 0);
 }
 
@@ -721,18 +721,14 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	return 0;
 }
 
-static int __init setup_possible_cpus(char *s)
-{
-	int max, cpu;
+static unsigned int setup_possible_cpus __initdata;
 
-	if (kstrtoint(s, 0, &max) < 0)
-		return 0;
-	init_cpu_possible(cpumask_of(0));
-	for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++)
-		set_cpu_possible(cpu, true);
+static int __init _setup_possible_cpus(char *s)
+{
+	get_option(&s, &setup_possible_cpus);
 	return 0;
 }
-early_param("possible_cpus", setup_possible_cpus);
+early_param("possible_cpus", _setup_possible_cpus);
 
 #ifdef CONFIG_HOTPLUG_CPU
 
@@ -775,6 +771,17 @@ void __noreturn cpu_die(void)
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
+void __init smp_fill_possible_mask(void)
+{
+	unsigned int possible, cpu;
+
+	possible = setup_possible_cpus;
+	if (!possible)
+		possible = MACHINE_IS_VM ? 64 : nr_cpu_ids;
+	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+		set_cpu_possible(cpu, true);
+}
+
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	/* request the 0x1201 emergency signal external interrupt */
@@ -958,7 +965,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
 			  void *hcpu)
 {
 	unsigned int cpu = (unsigned int)(long)hcpu;
-	struct cpu *c = &pcpu_devices[cpu].cpu;
+	struct cpu *c = pcpu_devices[cpu].cpu;
 	struct device *s = &c->dev;
 	int err = 0;
 
@@ -975,10 +982,15 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
 
 static int smp_add_present_cpu(int cpu)
 {
-	struct cpu *c = &pcpu_devices[cpu].cpu;
-	struct device *s = &c->dev;
+	struct device *s;
+	struct cpu *c;
 	int rc;
 
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+	pcpu_devices[cpu].cpu = c;
+	s = &c->dev;
 	c->hotpluggable = 1;
 	rc = register_cpu(c, cpu);
 	if (rc)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 913410bd74a3..143992152ec9 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -353,3 +353,5 @@ SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev
 SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper)
 SYSCALL(sys_kcmp,sys_kcmp,sys_kcmp_wrapper)
 SYSCALL(sys_finit_module,sys_finit_module,sys_finit_module_wrapper)
+SYSCALL(sys_sched_setattr,sys_sched_setattr,sys_sched_setattr_wrapper) /* 345 */
+SYSCALL(sys_sched_getattr,sys_sched_getattr,sys_sched_getattr_wrapper)
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 78d967f180f4..8216c0e0b2e2 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -121,7 +121,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 	 * - gpr 4 contains the index on the bus (optionally)
 	 */
 	ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
-				      vcpu->run->s.regs.gprs[2],
+				      vcpu->run->s.regs.gprs[2] & 0xffffffff,
 				      8, &vcpu->run->s.regs.gprs[3],
 				      vcpu->run->s.regs.gprs[4]);
 
@@ -137,7 +137,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 {
-	int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+	int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 5ddbbde6f65c..eeb1ac7d8fa4 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -112,6 +112,17 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
 	vcpu->stat.exit_program_interruption++;
+
+	/* Restore ITDB to Program-Interruption TDB in guest memory */
+	if (IS_TE_ENABLED(vcpu) &&
+	    !(current->thread.per_flags & PER_FLAG_NO_TE) &&
+	    IS_ITDB_VALID(vcpu)) {
+		copy_to_guest(vcpu, TDB_ADDR, vcpu->arch.sie_block->itdba,
+			      sizeof(struct kvm_s390_itdb));
+		memset((void *) vcpu->arch.sie_block->itdba, 0,
+		       sizeof(struct kvm_s390_itdb));
+	}
+
 	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
 	return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 569494e01ec6..e0676f390d57 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -395,6 +395,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 						    CPUSTAT_STOPPED |
 						    CPUSTAT_GED);
 	vcpu->arch.sie_block->ecb   = 6;
+	if (test_vfacility(50) && test_vfacility(73))
+		vcpu->arch.sie_block->ecb |= 0x10;
+
 	vcpu->arch.sie_block->ecb2  = 8;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
 	vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
@@ -411,6 +414,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 				      unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
+	struct sie_page *sie_page;
 	int rc = -EINVAL;
 
 	if (id >= KVM_MAX_VCPUS)
@@ -422,12 +426,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	if (!vcpu)
 		goto out;
 
-	vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
-					get_zeroed_page(GFP_KERNEL);
-
-	if (!vcpu->arch.sie_block)
+	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
+	if (!sie_page)
 		goto out_free_cpu;
 
+	vcpu->arch.sie_block = &sie_page->sie_block;
+	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+
 	vcpu->arch.sie_block->icpua = id;
 	if (!kvm_is_ucontrol(kvm)) {
 		if (!kvm->arch.sca) {
@@ -732,14 +737,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 
 	if (exit_reason >= 0) {
 		rc = 0;
+	} else if (kvm_is_ucontrol(vcpu->kvm)) {
+		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+		vcpu->run->s390_ucontrol.trans_exc_code =
+						current->thread.gmap_addr;
+		vcpu->run->s390_ucontrol.pgm_code = 0x10;
+		rc = -EREMOTE;
 	} else {
-		if (kvm_is_ucontrol(vcpu->kvm)) {
-			rc = SIE_INTERCEPT_UCONTROL;
-		} else {
-			VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
-			trace_kvm_s390_sie_fault(vcpu);
-			rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		}
+		VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+		trace_kvm_s390_sie_fault(vcpu);
+		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	}
 
 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
@@ -833,16 +840,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		rc = -EINTR;
 	}
 
-#ifdef CONFIG_KVM_S390_UCONTROL
-	if (rc == SIE_INTERCEPT_UCONTROL) {
-		kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL;
-		kvm_run->s390_ucontrol.trans_exc_code =
-			current->thread.gmap_addr;
-		kvm_run->s390_ucontrol.pgm_code = 0x10;
-		rc = 0;
-	}
-#endif
-
 	if (rc == -EOPNOTSUPP) {
 		/* intercept cannot be handled in-kernel, prepare kvm-run */
 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
@@ -885,10 +882,11 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
  */
-int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
 {
 	unsigned char archmode = 1;
 	int prefix;
+	u64 clkcomp;
 
 	if (addr == KVM_S390_STORE_STATUS_NOADDR) {
 		if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
@@ -903,15 +901,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	} else
 		prefix = 0;
 
-	/*
-	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
-	 * copying in vcpu load/put. Lets update our copies before we save
-	 * it into the save area
-	 */
-	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	save_access_regs(vcpu->run->s.regs.acrs);
-
 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
 			vcpu->arch.guest_fpregs.fprs, 128, prefix))
 		return -EFAULT;
@@ -941,8 +930,9 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 			&vcpu->arch.sie_block->cputm, 8, prefix))
 		return -EFAULT;
 
+	clkcomp = vcpu->arch.sie_block->ckc >> 8;
 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
-			&vcpu->arch.sie_block->ckc, 8, prefix))
+			&clkcomp, 8, prefix))
 		return -EFAULT;
 
 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
@@ -956,6 +946,20 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	return 0;
 }
 
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	/*
+	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
+	 * copying in vcpu load/put. Lets update our copies before we save
+	 * it into the save area
+	 */
+	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_access_regs(vcpu->run->s.regs.acrs);
+
+	return kvm_s390_store_status_unloaded(vcpu, addr);
+}
+
 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 				     struct kvm_enable_cap *cap)
 {
@@ -1183,8 +1187,8 @@ static int __init kvm_s390_init(void)
 		return -ENOMEM;
 	}
 	memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
-	vfacilities[0] &= 0xff82fff3f47c0000UL;
-	vfacilities[1] &= 0x001c000000000000UL;
+	vfacilities[0] &= 0xff82fff3f4fc2000UL;
+	vfacilities[1] &= 0x005c000000000000UL;
 	return 0;
 }
 
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index b44912a32949..f9559b0bd620 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,18 +19,19 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 
-/* The current code can have up to 256 pages for virtio */
-#define VIRTIODESCSPACE (256ul * 4096ul)
-
 typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 
 /* declare vfacilities extern */
 extern unsigned long *vfacilities;
 
-/* negativ values are error codes, positive values for internal conditions */
-#define SIE_INTERCEPT_UCONTROL		(1<<0)
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
 
+/* Transactional Memory Execution related macros */
+#define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & 0x10))
+#define TDB_ADDR		0x1800UL
+#define TDB_FORMAT1		1
+#define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
 do { \
 	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
@@ -133,7 +134,6 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm,
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
-int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
 
@@ -150,8 +150,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 
 /* implemented in kvm-s390.c */
-int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
-				 unsigned long addr);
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
 void s390_vcpu_block(struct kvm_vcpu *vcpu);
 void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
 void exit_sie(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 2440602e6df1..75beea632a10 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -197,7 +197,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 	if (addr & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	cc = 0;
-	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
+	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
 	if (!inti)
 		goto no_interrupt;
 	cc = 1;
@@ -275,7 +275,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP;
 	} else {
 		/*
-		 * Set condition code 3 to stop the guest from issueing channel
+		 * Set condition code 3 to stop the guest from issuing channel
 		 * I/O instructions.
 		 */
 		kvm_s390_set_psw_cc(vcpu, 3);
@@ -638,7 +638,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 
 static const intercept_handler_t b9_handlers[256] = {
 	[0x8d] = handle_epsw,
-	[0x9c] = handle_io_inst,
 	[0xaf] = handle_pfmf,
 };
 
@@ -731,7 +730,6 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 
 static const intercept_handler_t eb_handlers[256] = {
 	[0x2f] = handle_lctlg,
-	[0x8a] = handle_io_inst,
 };
 
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index bec398c57acf..87c2b3a3bd3e 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,7 +1,7 @@
 /*
  * handling interprocessor communication
  *
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2013
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -89,6 +89,37 @@ unlock:
 	return rc;
 }
 
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+					u16 asn, u64 *reg)
+{
+	struct kvm_vcpu *dst_vcpu = NULL;
+	const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
+	u16 p_asn, s_asn;
+	psw_t *psw;
+	u32 flags;
+
+	if (cpu_addr < KVM_MAX_VCPUS)
+		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+	if (!dst_vcpu)
+		return SIGP_CC_NOT_OPERATIONAL;
+	flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
+	psw = &dst_vcpu->arch.sie_block->gpsw;
+	p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
+	s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
+
+	/* Deliver the emergency signal? */
+	if (!(flags & CPUSTAT_STOPPED)
+	    || (psw->mask & psw_int_mask) != psw_int_mask
+	    || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
+	    || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
+		return __sigp_emergency(vcpu, cpu_addr);
+	} else {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	}
+}
+
 static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
 {
 	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
@@ -130,6 +161,7 @@ unlock:
 static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
 {
 	struct kvm_s390_interrupt_info *inti;
+	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
 	inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
 	if (!inti)
@@ -139,6 +171,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
 	spin_lock_bh(&li->lock);
 	if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
 		kfree(inti);
+		if ((action & ACTION_STORE_ON_STOP) != 0)
+			rc = -ESHUTDOWN;
 		goto out;
 	}
 	list_add_tail(&inti->list, &li->list);
@@ -150,7 +184,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
 out:
 	spin_unlock_bh(&li->lock);
 
-	return SIGP_CC_ORDER_CODE_ACCEPTED;
+	return rc;
 }
 
 static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
@@ -174,13 +208,17 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
 unlock:
 	spin_unlock(&fi->lock);
 	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
-	return rc;
-}
 
-int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action)
-{
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	return __inject_sigp_stop(li, action);
+	if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+		/* If the CPU has already been stopped, we still have
+		 * to save the status when doing stop-and-store. This
+		 * has to be done after unlocking all spinlocks. */
+		struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+		rc = kvm_s390_store_status_unloaded(dst_vcpu,
+						KVM_S390_STORE_STATUS_NOADDR);
+	}
+
+	return rc;
 }
 
 static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
@@ -262,6 +300,37 @@ out_fi:
 	return rc;
 }
 
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
+					u32 addr, u64 *reg)
+{
+	struct kvm_vcpu *dst_vcpu = NULL;
+	int flags;
+	int rc;
+
+	if (cpu_id < KVM_MAX_VCPUS)
+		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
+	if (!dst_vcpu)
+		return SIGP_CC_NOT_OPERATIONAL;
+
+	spin_lock_bh(&dst_vcpu->arch.local_int.lock);
+	flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
+	spin_unlock_bh(&dst_vcpu->arch.local_int.lock);
+	if (!(flags & CPUSTAT_STOPPED)) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	}
+
+	addr &= 0x7ffffe00;
+	rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
+	if (rc == -EFAULT) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INVALID_PARAMETER;
+		rc = SIGP_CC_STATUS_STORED;
+	}
+	return rc;
+}
+
 static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
 				u64 *reg)
 {
@@ -294,7 +363,8 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
 	return rc;
 }
 
-static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
+/* Test whether the destination CPU is available and not busy */
+static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
 {
 	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
 	struct kvm_s390_local_interrupt *li;
@@ -313,9 +383,6 @@ static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
 	spin_lock_bh(&li->lock);
 	if (li->action_bits & ACTION_STOP_ON_STOP)
 		rc = SIGP_CC_BUSY;
-	else
-		VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace",
-			cpu_addr);
 	spin_unlock_bh(&li->lock);
 out:
 	spin_unlock(&fi->lock);
@@ -366,6 +433,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
 						 ACTION_STOP_ON_STOP);
 		break;
+	case SIGP_STORE_STATUS_AT_ADDRESS:
+		rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
+						 &vcpu->run->s.regs.gprs[r1]);
+		break;
 	case SIGP_SET_ARCHITECTURE:
 		vcpu->stat.instruction_sigp_arch++;
 		rc = __sigp_set_arch(vcpu, parameter);
@@ -375,17 +446,31 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
 				       &vcpu->run->s.regs.gprs[r1]);
 		break;
+	case SIGP_COND_EMERGENCY_SIGNAL:
+		rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
+						  &vcpu->run->s.regs.gprs[r1]);
+		break;
 	case SIGP_SENSE_RUNNING:
 		vcpu->stat.instruction_sigp_sense_running++;
 		rc = __sigp_sense_running(vcpu, cpu_addr,
 					  &vcpu->run->s.regs.gprs[r1]);
 		break;
+	case SIGP_START:
+		rc = sigp_check_callable(vcpu, cpu_addr);
+		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
+			rc = -EOPNOTSUPP;    /* Handle START in user space */
+		break;
 	case SIGP_RESTART:
 		vcpu->stat.instruction_sigp_restart++;
-		rc = __sigp_restart(vcpu, cpu_addr);
-		if (rc == SIGP_CC_BUSY)
-			break;
-		/* user space must know about restart */
+		rc = sigp_check_callable(vcpu, cpu_addr);
+		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
+			VCPU_EVENT(vcpu, 4,
+				   "sigp restart %x to handle userspace",
+				   cpu_addr);
+			/* user space must know about restart */
+			rc = -EOPNOTSUPP;
+		}
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -393,7 +478,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 	if (rc < 0)
 		return rc;
 
-	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
-	vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+	kvm_s390_set_psw_cc(vcpu, rc);
 	return 0;
 }
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 0c991c6748ab..3db76b2daed7 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -175,6 +175,7 @@ TRACE_EVENT(kvm_s390_intercept_validity,
 	{SIGP_STOP_AND_STORE_STATUS, "stop and store status"},	\
 	{SIGP_SET_ARCHITECTURE, "set architecture"},		\
 	{SIGP_SET_PREFIX, "set prefix"},			\
+	{SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"},	\
 	{SIGP_SENSE_RUNNING, "sense running"},			\
 	{SIGP_RESTART, "restart"}
 
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
index 315dbe09983e..b1a22173d027 100644
--- a/arch/s390/lib/uaccess.h
+++ b/arch/s390/lib/uaccess.h
@@ -6,15 +6,6 @@
 #ifndef __ARCH_S390_LIB_UACCESS_H
 #define __ARCH_S390_LIB_UACCESS_H
 
-extern size_t copy_from_user_std(size_t, const void __user *, void *);
-extern size_t copy_to_user_std(size_t, void __user *, const void *);
-extern size_t strnlen_user_std(size_t, const char __user *);
-extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
-extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32);
-extern int futex_atomic_op_std(int, u32 __user *, int, int *);
-
-extern size_t copy_from_user_pt(size_t, const void __user *, void *);
-extern size_t copy_to_user_pt(size_t, void __user *, const void *);
 extern int futex_atomic_op_pt(int, u32 __user *, int, int *);
 extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32);
 
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index dbdab3e7a1a6..61ebcc9ccb34 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -74,8 +74,8 @@ static size_t copy_in_kernel(size_t count, void __user *to,
 
 /*
  * Returns kernel address for user virtual address. If the returned address is
- * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address
- * contains the (negative) exception code.
+ * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occurred and the
+ * address contains the (negative) exception code.
  */
 #ifdef CONFIG_64BIT
 
@@ -153,6 +153,8 @@ static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr,
 	unsigned long offset, done, size, kaddr;
 	void *from, *to;
 
+	if (!mm)
+		return n;
 	done = 0;
 retry:
 	spin_lock(&mm->page_table_lock);
@@ -209,7 +211,7 @@ fault:
 	return 0;
 }
 
-size_t copy_from_user_pt(size_t n, const void __user *from, void *to)
+static size_t copy_from_user_pt(size_t n, const void __user *from, void *to)
 {
 	size_t rc;
 
@@ -221,7 +223,7 @@ size_t copy_from_user_pt(size_t n, const void __user *from, void *to)
 	return rc;
 }
 
-size_t copy_to_user_pt(size_t n, void __user *to, const void *from)
+static size_t copy_to_user_pt(size_t n, void __user *to, const void *from)
 {
 	if (segment_eq(get_fs(), KERNEL_DS))
 		return copy_in_kernel(n, to, (void __user *) from);
@@ -262,6 +264,8 @@ static size_t strnlen_user_pt(size_t count, const char __user *src)
 		return 0;
 	if (segment_eq(get_fs(), KERNEL_DS))
 		return strnlen_kernel(count, src);
+	if (!mm)
+		return 0;
 	done = 0;
 retry:
 	spin_lock(&mm->page_table_lock);
@@ -323,6 +327,8 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
 
 	if (segment_eq(get_fs(), KERNEL_DS))
 		return copy_in_kernel(n, to, from);
+	if (!mm)
+		return n;
 	done = 0;
 retry:
 	spin_lock(&mm->page_table_lock);
@@ -411,6 +417,8 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
 
 	if (segment_eq(get_fs(), KERNEL_DS))
 		return __futex_atomic_op_pt(op, uaddr, oparg, old);
+	if (unlikely(!current->mm))
+		return -EFAULT;
 	spin_lock(&current->mm->page_table_lock);
 	uaddr = (u32 __force __user *)
 		__dat_user_addr((__force unsigned long) uaddr, 1);
@@ -448,6 +456,8 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
 
 	if (segment_eq(get_fs(), KERNEL_DS))
 		return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
+	if (unlikely(!current->mm))
+		return -EFAULT;
 	spin_lock(&current->mm->page_table_lock);
 	uaddr = (u32 __force __user *)
 		__dat_user_addr((__force unsigned long) uaddr, 1);
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index a90d45e9dfb0..27c50f4d90cb 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -12,6 +12,8 @@
 #include <linux/mm.h>
 #include <linux/gfp.h>
 #include <linux/init.h>
+#include <asm/setup.h>
+#include <asm/ipl.h>
 
 #define ESSA_SET_STABLE		1
 #define ESSA_SET_UNUSED		2
@@ -41,6 +43,14 @@ void __init cmma_init(void)
 
 	if (!cmma_flag)
 		return;
+	/*
+	 * Disable CMM for dump, otherwise  the tprot based memory
+	 * detection can fail because of unstable pages.
+	 */
+	if (OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP) {
+		cmma_flag = 0;
+		return;
+	}
 	asm volatile(
 		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
 		"0:     la      %0,0\n"
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index e794c88f699a..3584ed9b20a1 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -293,7 +293,7 @@ static int gmap_alloc_table(struct gmap *gmap,
  * @addr: address in the guest address space
  * @len: length of the memory area to unmap
  *
- * Returns 0 if the unmap succeded, -EINVAL if not.
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
  */
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 {
@@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
  * @from: source address in the parent address space
  * @to: target address in the guest address space
  *
- * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
  */
 int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		     unsigned long to, unsigned long len)
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 16871da37371..708d60e40066 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -368,14 +368,16 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
 		/* lhi %r4,0 */
 		EMIT4(0xa7480000);
-		/* dr %r4,%r12 */
-		EMIT2(0x1d4c);
+		/* dlr %r4,%r12 */
+		EMIT4(0xb997004c);
 		break;
-	case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */
-		/* m %r4,<d(K)>(%r13) */
-		EMIT4_DISP(0x5c40d000, EMIT_CONST(K));
-		/* lr %r5,%r4 */
-		EMIT2(0x1854);
+	case BPF_S_ALU_DIV_K: /* A /= K */
+		if (K == 1)
+			break;
+		/* lhi %r4,0 */
+		EMIT4(0xa7480000);
+		/* dl %r4,<d(K)>(%r13) */
+		EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
 		break;
 	case BPF_S_ALU_MOD_X: /* A %= X */
 		jit->seen |= SEEN_XREG | SEEN_RET0;
@@ -385,16 +387,21 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
 		/* lhi %r4,0 */
 		EMIT4(0xa7480000);
-		/* dr %r4,%r12 */
-		EMIT2(0x1d4c);
+		/* dlr %r4,%r12 */
+		EMIT4(0xb997004c);
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
 	case BPF_S_ALU_MOD_K: /* A %= K */
+		if (K == 1) {
+			/* lhi %r5,0 */
+			EMIT4(0xa7580000);
+			break;
+		}
 		/* lhi %r4,0 */
 		EMIT4(0xa7480000);
-		/* d %r4,<d(K)>(%r13) */
-		EMIT4_DISP(0x5d40d000, EMIT_CONST(K));
+		/* dl %r4,<d(K)>(%r13) */
+		EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 231cecafc2f1..a32c96761eab 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -26,9 +26,6 @@
 #define MAX_NUM_SDB 511
 #define MIN_NUM_SDB 1
 
-#define ALERT_REQ_MASK   0x4000000000000000ul
-#define BUFFER_FULL_MASK 0x8000000000000000ul
-
 DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
 
 struct hws_execute_parms {
@@ -44,6 +41,7 @@ static DEFINE_MUTEX(hws_sem_oom);
 
 static unsigned char hws_flush_all;
 static unsigned int hws_oom;
+static unsigned int hws_alert;
 static struct workqueue_struct *hws_wq;
 
 static unsigned int hws_state;
@@ -65,43 +63,6 @@ static unsigned long interval;
 static unsigned long min_sampler_rate;
 static unsigned long max_sampler_rate;
 
-static int ssctl(void *buffer)
-{
-	int cc;
-
-	/* set in order to detect a program check */
-	cc = 1;
-
-	asm volatile(
-		"0: .insn s,0xB2870000,0(%1)\n"
-		"1: ipm %0\n"
-		"   srl %0,28\n"
-		"2:\n"
-		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
-		: "+d" (cc), "+a" (buffer)
-		: "m" (*((struct hws_ssctl_request_block *)buffer))
-		: "cc", "memory");
-
-	return cc ? -EINVAL : 0 ;
-}
-
-static int qsi(void *buffer)
-{
-	int cc;
-	cc = 1;
-
-	asm volatile(
-		"0: .insn s,0xB2860000,0(%1)\n"
-		"1: lhi %0,0\n"
-		"2:\n"
-		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
-		: "=d" (cc), "+a" (buffer)
-		: "m" (*((struct hws_qsi_info_block *)buffer))
-		: "cc", "memory");
-
-	return cc ? -EINVAL : 0;
-}
-
 static void execute_qsi(void *parms)
 {
 	struct hws_execute_parms *ep = parms;
@@ -113,7 +74,7 @@ static void execute_ssctl(void *parms)
 {
 	struct hws_execute_parms *ep = parms;
 
-	ep->rc = ssctl(ep->buffer);
+	ep->rc = lsctl(ep->buffer);
 }
 
 static int smp_ctl_ssctl_stop(int cpu)
@@ -214,17 +175,6 @@ static int smp_ctl_qsi(int cpu)
 	return ep.rc;
 }
 
-static inline unsigned long *trailer_entry_ptr(unsigned long v)
-{
-	void *ret;
-
-	ret = (void *)v;
-	ret += PAGE_SIZE;
-	ret -= sizeof(struct hws_trailer_entry);
-
-	return (unsigned long *) ret;
-}
-
 static void hws_ext_handler(struct ext_code ext_code,
 			    unsigned int param32, unsigned long param64)
 {
@@ -233,6 +183,9 @@ static void hws_ext_handler(struct ext_code ext_code,
 	if (!(param32 & CPU_MF_INT_SF_MASK))
 		return;
 
+	if (!hws_alert)
+		return;
+
 	inc_irq_stat(IRQEXT_CMS);
 	atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
 
@@ -256,16 +209,6 @@ static void init_all_cpu_buffers(void)
 	}
 }
 
-static int is_link_entry(unsigned long *s)
-{
-	return *s & 0x1ul ? 1 : 0;
-}
-
-static unsigned long *get_next_sdbt(unsigned long *s)
-{
-	return (unsigned long *) (*s & ~0x1ul);
-}
-
 static int prepare_cpu_buffers(void)
 {
 	int cpu;
@@ -353,7 +296,7 @@ static int allocate_sdbt(int cpu)
 			}
 			*sdbt = sdb;
 			trailer = trailer_entry_ptr(*sdbt);
-			*trailer = ALERT_REQ_MASK;
+			*trailer = SDB_TE_ALERT_REQ_MASK;
 			sdbt++;
 			mutex_unlock(&hws_sem_oom);
 		}
@@ -829,7 +772,7 @@ static void worker_on_interrupt(unsigned int cpu)
 
 		trailer = trailer_entry_ptr(*sdbt);
 		/* leave loop if no more work to do */
-		if (!(*trailer & BUFFER_FULL_MASK)) {
+		if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
 			done = 1;
 			if (!hws_flush_all)
 				continue;
@@ -856,7 +799,7 @@ static void worker_on_interrupt(unsigned int cpu)
 static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
 		unsigned long *dear)
 {
-	struct hws_data_entry *sample_data_ptr;
+	struct hws_basic_entry *sample_data_ptr;
 	unsigned long *trailer;
 
 	trailer = trailer_entry_ptr(*sdbt);
@@ -866,7 +809,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
 		trailer = dear;
 	}
 
-	sample_data_ptr = (struct hws_data_entry *)(*sdbt);
+	sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
 
 	while ((unsigned long *)sample_data_ptr < trailer) {
 		struct pt_regs *regs = NULL;
@@ -1002,6 +945,7 @@ int hwsampler_deallocate(void)
 		goto deallocate_exit;
 
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	hws_alert = 0;
 	deallocate_sdbt();
 
 	hws_state = HWS_DEALLOCATED;
@@ -1116,6 +1060,7 @@ int hwsampler_shutdown(void)
 
 		if (hws_state == HWS_STOPPED) {
 			irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+			hws_alert = 0;
 			deallocate_sdbt();
 		}
 		if (hws_wq) {
@@ -1190,6 +1135,7 @@ start_all_exit:
 	hws_oom = 1;
 	hws_flush_all = 0;
 	/* now let them in, 1407 CPUMF external interrupts */
+	hws_alert = 1;
 	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 
 	return 0;
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 0022e1ebfbde..a483d06f2fa7 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -9,27 +9,7 @@
 #define HWSAMPLER_H_
 
 #include <linux/workqueue.h>
-
-struct hws_qsi_info_block          /* QUERY SAMPLING information block  */
-{ /* Bit(s) */
-	unsigned int b0_13:14;      /* 0-13: zeros                       */
-	unsigned int as:1;          /* 14: sampling authorisation control*/
-	unsigned int b15_21:7;      /* 15-21: zeros                      */
-	unsigned int es:1;          /* 22: sampling enable control       */
-	unsigned int b23_29:7;      /* 23-29: zeros                      */
-	unsigned int cs:1;          /* 30: sampling activation control   */
-	unsigned int:1;             /* 31: reserved                      */
-	unsigned int bsdes:16;      /* 4-5: size of sampling entry       */
-	unsigned int:16;            /* 6-7: reserved                     */
-	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
-	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
-	unsigned long tear;         /* 24-31: TEAR contents              */
-	unsigned long dear;         /* 32-39: DEAR contents              */
-	unsigned int rsvrd0;        /* 40-43: reserved                   */
-	unsigned int cpu_speed;     /* 44-47: CPU speed                  */
-	unsigned long long rsvrd1;  /* 48-55: reserved                   */
-	unsigned long long rsvrd2;  /* 56-63: reserved                   */
-};
+#include <asm/cpu_mf.h>
 
 struct hws_ssctl_request_block     /* SET SAMPLING CONTROLS req block   */
 { /* bytes 0 - 7  Bit(s) */
@@ -68,36 +48,6 @@ struct hws_cpu_buffer {
 	unsigned int stop_mode:1;
 };
 
-struct hws_data_entry {
-	unsigned int def:16;        /* 0-15  Data Entry Format           */
-	unsigned int R:4;           /* 16-19 reserved                    */
-	unsigned int U:4;           /* 20-23 Number of unique instruct.  */
-	unsigned int z:2;           /* zeros                             */
-	unsigned int T:1;           /* 26 PSW DAT mode                   */
-	unsigned int W:1;           /* 27 PSW wait state                 */
-	unsigned int P:1;           /* 28 PSW Problem state              */
-	unsigned int AS:2;          /* 29-30 PSW address-space control   */
-	unsigned int I:1;           /* 31 entry valid or invalid         */
-	unsigned int:16;
-	unsigned int prim_asn:16;   /* primary ASN                       */
-	unsigned long long ia;      /* Instruction Address               */
-	unsigned long long gpp;     /* Guest Program Parameter		 */
-	unsigned long long hpp;     /* Host Program Parameter		 */
-};
-
-struct hws_trailer_entry {
-	unsigned int f:1;           /* 0 - Block Full Indicator          */
-	unsigned int a:1;           /* 1 - Alert request control         */
-	unsigned long:62;           /* 2 - 63: Reserved                  */
-	unsigned long overflow;     /* 64 - sample Overflow count        */
-	unsigned long timestamp;    /* 16 - time-stamp                   */
-	unsigned long timestamp1;   /*                                   */
-	unsigned long reserved1;    /* 32 -Reserved                      */
-	unsigned long reserved2;    /*                                   */
-	unsigned long progusage1;   /* 48 - reserved for programming use */
-	unsigned long progusage2;   /*                                   */
-};
-
 int hwsampler_setup(void);
 int hwsampler_shutdown(void);
 int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 04e1b6a85362..9ffe645d5989 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/oprofile.h>
+#include <linux/perf_event.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
@@ -67,6 +68,21 @@ module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
 MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
 		           "(report cpu_type \"timer\"");
 
+static int __oprofile_hwsampler_start(void)
+{
+	int retval;
+
+	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+	if (retval)
+		return retval;
+
+	retval = hwsampler_start_all(oprofile_hw_interval);
+	if (retval)
+		hwsampler_deallocate();
+
+	return retval;
+}
+
 static int oprofile_hwsampler_start(void)
 {
 	int retval;
@@ -76,13 +92,13 @@ static int oprofile_hwsampler_start(void)
 	if (!hwsampler_running)
 		return timer_ops.start();
 
-	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+	retval = perf_reserve_sampling();
 	if (retval)
 		return retval;
 
-	retval = hwsampler_start_all(oprofile_hw_interval);
+	retval = __oprofile_hwsampler_start();
 	if (retval)
-		hwsampler_deallocate();
+		perf_release_sampling();
 
 	return retval;
 }
@@ -96,6 +112,7 @@ static void oprofile_hwsampler_stop(void)
 
 	hwsampler_stop_all();
 	hwsampler_deallocate();
+	perf_release_sampling();
 	return;
 }
 
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index bf7c73d71eef..66670ff262a0 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -407,8 +407,8 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	struct msi_msg msg;
 	int rc;
 
-	if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
-		return -EINVAL;
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
 	msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
 	msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
 
@@ -919,17 +919,23 @@ static void zpci_mem_exit(void)
 	kmem_cache_destroy(zdev_fmb_cache);
 }
 
-static unsigned int s390_pci_probe;
+static unsigned int s390_pci_probe = 1;
+static unsigned int s390_pci_initialized;
 
 char * __init pcibios_setup(char *str)
 {
-	if (!strcmp(str, "on")) {
-		s390_pci_probe = 1;
+	if (!strcmp(str, "off")) {
+		s390_pci_probe = 0;
 		return NULL;
 	}
 	return str;
 }
 
+bool zpci_is_enabled(void)
+{
+	return s390_pci_initialized;
+}
+
 static int __init pci_base_init(void)
 {
 	int rc;
@@ -961,6 +967,7 @@ static int __init pci_base_init(void)
 	if (rc)
 		goto out_find;
 
+	s390_pci_initialized = 1;
 	return 0;
 
 out_find:
@@ -978,5 +985,6 @@ subsys_initcall_sync(pci_base_init);
 
 void zpci_rescan(void)
 {
-	clp_rescan_pci_devices_simple();
+	if (zpci_is_enabled())
+		clp_rescan_pci_devices_simple();
 }
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9b83d080902d..60c11a629d96 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -285,7 +285,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 		flags |= ZPCI_TABLE_PROTECTED;
 
 	if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
-		atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages);
+		atomic64_add(nr_pages, &zdev->fmb->mapped_pages);
 		return dma_addr + (offset & ~PAGE_MASK);
 	}
 
@@ -313,7 +313,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 		zpci_err_hex(&dma_addr, sizeof(dma_addr));
 	}
 
-	atomic64_add(npages, (atomic64_t *) &zdev->fmb->unmapped_pages);
+	atomic64_add(npages, &zdev->fmb->unmapped_pages);
 	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 	dma_free_iommu(zdev, iommu_page_index, npages);
 }
@@ -332,7 +332,6 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 	if (!page)
 		return NULL;
 
-	atomic64_add(size / PAGE_SIZE, (atomic64_t *) &zdev->fmb->allocated_pages);
 	pa = page_to_phys(page);
 	memset((void *) pa, 0, size);
 
@@ -343,6 +342,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 		return NULL;
 	}
 
+	atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
 	if (dma_handle)
 		*dma_handle = map;
 	return (void *) pa;
@@ -352,8 +352,11 @@ static void s390_dma_free(struct device *dev, size_t size,
 			  void *pa, dma_addr_t dma_handle,
 			  struct dma_attrs *attrs)
 {
-	s390_dma_unmap_pages(dev, dma_handle, PAGE_ALIGN(size),
-			     DMA_BIDIRECTIONAL, NULL);
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+
+	size = PAGE_ALIGN(size);
+	atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
 	free_pages((unsigned long) pa, get_order(size));
 }
 
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 800f064b0da7..01e251b1da0c 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -43,9 +43,8 @@ struct zpci_ccdf_avail {
 	u16 pec;			/* PCI event code */
 } __packed;
 
-void zpci_event_error(void *data)
+static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
-	struct zpci_ccdf_err *ccdf = data;
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 
 	zpci_err("error CCDF:\n");
@@ -58,9 +57,14 @@ void zpci_event_error(void *data)
 	       pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
 }
 
-void zpci_event_availability(void *data)
+void zpci_event_error(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_error(data);
+}
+
+static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 {
-	struct zpci_ccdf_avail *ccdf = data;
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
 	int ret;
@@ -75,6 +79,7 @@ void zpci_event_availability(void *data)
 		if (!zdev || zdev->state == ZPCI_FN_STATE_CONFIGURED)
 			break;
 		zdev->state = ZPCI_FN_STATE_CONFIGURED;
+		zdev->fh = ccdf->fh;
 		ret = zpci_enable_device(zdev);
 		if (ret)
 			break;
@@ -98,9 +103,14 @@ void zpci_event_availability(void *data)
 
 		break;
 	case 0x0304: /* Configured -> Standby */
-		if (pdev)
+		if (pdev) {
+			/* Give the driver a hint that the function is
+			 * already unusable. */
+			pdev->error_state = pci_channel_io_perm_failure;
 			pci_stop_and_remove_bus_device(pdev);
+		}
 
+		zdev->fh = ccdf->fh;
 		zpci_disable_device(zdev);
 		zdev->state = ZPCI_FN_STATE_STANDBY;
 		break;
@@ -108,6 +118,8 @@ void zpci_event_availability(void *data)
 		clp_rescan_pci_devices();
 		break;
 	case 0x0308: /* Standby -> Reserved */
+		if (!zdev)
+			break;
 		pci_stop_root_bus(zdev->bus);
 		pci_remove_root_bus(zdev->bus);
 		break;
@@ -115,3 +127,9 @@ void zpci_event_availability(void *data)
 		break;
 	}
 }
+
+void zpci_event_availability(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_availability(data);
+}
author	Jiri Kosina	2014-02-20 14:54:28 +0100
committer	Jiri Kosina	2014-02-20 14:54:28 +0100
commit	d4263348f796f29546f90802177865dd4379dd0a (patch)
tree	adcbdaebae584eee2f32fab95e826e8e49eef385 /arch/s390
parent	be873ac782f5ff5ee6675f83929f4fe6737eead2 (diff)
parent	6d0abeca3242a88cab8232e4acd7e2bf088f3bc2 (diff)