From dea24190fbfb340dea5224e32161955514bef31b Mon Sep 17 00:00:00 2001
From: Heiko Carstens
Date: Tue, 3 Dec 2013 10:06:29 +0100
Subject: s390/smp: only send external call ipi if needed

If the per cpu ec_mask bit of the receiving cpu is already set there is
no need to send an ipi, since a different cpu has already sent an ipi
and the receiving cpu has not yet executed the external call ipi handler.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/smp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index dc4a53465060..86b291323c62 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -159,9 +159,9 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
 {
 	int order;
 
-	set_bit(ec_bit, &pcpu->ec_mask);
-	order = pcpu_running(pcpu) ?
-		SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+	if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+		return;
+	order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
 	pcpu_sigp_retry(pcpu, order, 0);
 }
 
-- 
cgit v1.2.3


From 1c182a628075af7431edb8155601740867b5ae51 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky
Date: Tue, 3 Dec 2013 11:09:10 +0100
Subject: s390/ptrace: simplify enable/disable single step

The user_enable_single_step() and user_disable_sindle_step() functions
are always called on the inferior, never for the currently active
process. Remove the unnecessary check for the current process and
the update_cr_regs() call from the enable/disable functions.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ptrace.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index e65c91c591e8..c369a26d1d56 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -107,15 +107,11 @@ void update_cr_regs(struct task_struct *task)
 void user_enable_single_step(struct task_struct *task)
 {
 	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
-	if (task == current)
-		update_cr_regs(task);
 }
 
 void user_disable_single_step(struct task_struct *task)
 {
 	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
-	if (task == current)
-		update_cr_regs(task);
 }
 
 /*
-- 
cgit v1.2.3


From c63badebfebacdba827ab1cc1d420fc81bd8d818 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky
Date: Tue, 3 Dec 2013 14:57:18 +0100
Subject: s390: optimize control register update

It is less expensive to update control registers 0 and 2 with two
individual stctg/lctlg instructions as with a single one that spans
control register 0, 1 and 2.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ptrace.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c369a26d1d56..f6be6087a0e9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -56,25 +56,26 @@ void update_cr_regs(struct task_struct *task)
 #ifdef CONFIG_64BIT
 	/* Take care of the enable/disable of transactional execution. */
 	if (MACHINE_HAS_TE) {
-		unsigned long cr[3], cr_new[3];
+		unsigned long cr, cr_new;
 
-		__ctl_store(cr, 0, 2);
-		cr_new[1] = cr[1];
+		__ctl_store(cr, 0, 0);
 		/* Set or clear transaction execution TXC bit 8. */
+		cr_new = cr | (1UL << 55);
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
-			cr_new[0] = cr[0] & ~(1UL << 55);
-		else
-			cr_new[0] = cr[0] | (1UL << 55);
+			cr_new &= ~(1UL << 55);
+		if (cr_new != cr)
+			__ctl_load(cr, 0, 0);
 		/* Set or clear transaction execution TDC bits 62 and 63. */
-		cr_new[2] = cr[2] & ~3UL;
+		__ctl_store(cr, 2, 2);
+		cr_new = cr & ~3UL;
 		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
 			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
-				cr_new[2] |= 1UL;
+				cr_new |= 1UL;
 			else
-				cr_new[2] |= 2UL;
+				cr_new |= 2UL;
 		}
-		if (memcmp(&cr_new, &cr, sizeof(cr)))
-			__ctl_load(cr_new, 0, 2);
+		if (cr_new != cr)
+			__ctl_load(cr_new, 2, 2);
 	}
 #endif
 	/* Copy user specified PER registers */
-- 
cgit v1.2.3


From 96619fc1b3d06703113ab4c5cd8c15f35a42dc99 Mon Sep 17 00:00:00 2001
From: Heiko Carstens
Date: Thu, 5 Dec 2013 12:42:09 +0100
Subject: s390/smp: reduce memory consumption of pcpu_devices array

Remove the embedded struct cpu from struct pcpu and replace it with a
pointer instead. The struct cpu now gets allocated when a new cpu gets
detected.

The size of the pcpu_devices array (NR_CPUS * sizeof(struct pcpu)) gets
reduced by nearly 120KB.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/smp.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 86b291323c62..8ceefc949e8f 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -59,7 +59,7 @@ enum {
 };
 
 struct pcpu {
-	struct cpu cpu;
+	struct cpu *cpu;
 	struct _lowcore *lowcore;	/* lowcore page(s) for the cpu */
 	unsigned long async_stack;	/* async stack for the cpu */
 	unsigned long panic_stack;	/* panic stack for the cpu */
@@ -958,7 +958,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
 			  void *hcpu)
 {
 	unsigned int cpu = (unsigned int)(long)hcpu;
-	struct cpu *c = &pcpu_devices[cpu].cpu;
+	struct cpu *c = pcpu_devices[cpu].cpu;
 	struct device *s = &c->dev;
 	int err = 0;
 
@@ -975,10 +975,15 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
 
 static int smp_add_present_cpu(int cpu)
 {
-	struct cpu *c = &pcpu_devices[cpu].cpu;
-	struct device *s = &c->dev;
+	struct device *s;
+	struct cpu *c;
 	int rc;
 
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+	pcpu_devices[cpu].cpu = c;
+	s = &c->dev;
 	c->hotpluggable = 1;
 	rc = register_cpu(c, cpu);
 	if (rc)
-- 
cgit v1.2.3


From 41932bc1c86e527f866acfcd26506da3bd20509b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky
Date: Tue, 10 Dec 2013 16:18:07 +0100
Subject: s390/compat: correct check for EFAULT in rt-signal frame creation

The return code of the __put_user call to store the rt_sigreturn
system call to the user stack if not properly checked, the err
variable is only checked before to the __put_user. Use an if
statement instead.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/compat_signal.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 95e7ba0fbb7e..8b84bc373e94 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -412,8 +412,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
 		regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
 	} else {
 		regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
-		err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
-				  (u16 __force __user *)(frame->retcode));
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
+			       (u16 __force __user *)(frame->retcode)))
+			goto give_sigsegv;
 	}
 
 	/* Set up backchain. */
-- 
cgit v1.2.3


From 52733e0152dad719ed6374b56fd1c33e784e44b3 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Thu, 5 Dec 2013 19:28:39 +0100
Subject: s390/sclp_early: Add function to detect sclp console capabilities

Add SCLP console detect functions to encapsulate detection of SCLP console
capabilities, for example, VT220 support.  Reuse the sclp_send/receive masks
that were stored by the most recent sclp_set_event_mask() call to prevent
unnecessary SCLP calls.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/sclp.h   |  4 ++--
 drivers/s390/char/sclp_early.c | 46 +++++++++++++++++++++++++++---------------
 2 files changed, 32 insertions(+), 18 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 2f390956c7c1..220e171413f8 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -52,8 +52,8 @@ int sclp_chp_configure(struct chp_id chpid);
 int sclp_chp_deconfigure(struct chp_id chpid);
 int sclp_chp_read_info(struct sclp_chp_info *info);
 void sclp_get_ipl_info(struct sclp_ipl_info *info);
-bool sclp_has_linemode(void);
-bool sclp_has_vt220(void);
+bool __init sclp_has_linemode(void);
+bool __init sclp_has_vt220(void);
 int sclp_pci_configure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 1af3555c096d..82f2c389b4d1 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -36,6 +36,8 @@ struct read_info_sccb {
 } __packed __aligned(PAGE_SIZE);
 
 static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata;
+static unsigned int sclp_con_has_vt220 __initdata;
+static unsigned int sclp_con_has_linemode __initdata;
 static unsigned long sclp_hsa_size;
 static struct sclp_ipl_info sclp_ipl_info;
 
@@ -109,26 +111,12 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 
 bool __init sclp_has_linemode(void)
 {
-	struct init_sccb *sccb = (void *) &sccb_early;
-
-	if (sccb->header.response_code != 0x20)
-		return 0;
-	if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK)))
-		return 0;
-	if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK)))
-		return 0;
-	return 1;
+	return !!sclp_con_has_linemode;
 }
 
 bool __init sclp_has_vt220(void)
 {
-	struct init_sccb *sccb = (void *) &sccb_early;
-
-	if (sccb->header.response_code != 0x20)
-		return 0;
-	if (sccb->sclp_send_mask & EVTYP_VT220MSG_MASK)
-		return 1;
-	return 0;
+	return !!sclp_con_has_vt220;
 }
 
 unsigned long long sclp_get_rnmax(void)
@@ -240,11 +228,37 @@ out:
 	sclp_hsa_size = size;
 }
 
+static unsigned int __init sclp_con_check_linemode(struct init_sccb *sccb)
+{
+	if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK)))
+		return 0;
+	if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK)))
+		return 0;
+	return 1;
+}
+
+static void __init sclp_console_detect(struct init_sccb *sccb)
+{
+	if (sccb->header.response_code != 0x20)
+		return;
+
+	if (sccb->sclp_send_mask & EVTYP_VT220MSG_MASK)
+		sclp_con_has_vt220 = 1;
+
+	if (sclp_con_check_linemode(sccb))
+		sclp_con_has_linemode = 1;
+}
+
 void __init sclp_early_detect(void)
 {
 	void *sccb = &sccb_early;
 
 	sclp_facilities_detect(sccb);
 	sclp_hsa_size_detect(sccb);
+
+	/* Turn off SCLP event notifications.  Also save remote masks in the
+	 * sccb.  These are sufficient to detect sclp console capabilities.
+	 */
 	sclp_set_event_mask(sccb, 0, 0);
+	sclp_console_detect(sccb);
 }
-- 
cgit v1.2.3


From cf48ad83278aad39d4a0158cd085b6038b2941e6 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Wed, 11 Dec 2013 12:15:52 +0100
Subject: s390/oprofile: move hwsampler interfaces to cpu_mf.h

Extract and move the oprofile hwsampler data structures and interfaces to
the cpu_mf.h header file which contains common interface definitions
for the various CPU-measurement facilities.   This change is necessary for
a new perf PMU.

Few interface names have been revised to fit to the latest CPU-measurement
facilities documentation.  Also declare the data structures as __packed and
correct checkpatch findings.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h | 138 +++++++++++++++++++++++++++++++++++++++++
 arch/s390/oprofile/hwsampler.c |  67 +-------------------
 arch/s390/oprofile/hwsampler.h |  52 +---------------
 3 files changed, 142 insertions(+), 115 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index c879fad404c8..f6dddeaad965 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -56,6 +56,78 @@ struct cpumf_ctr_info {
 	u32   reserved2[12];
 } __packed;
 
+/* QUERY SAMPLING INFORMATION block */
+struct hws_qsi_info_block {	    /* Bit(s) */
+	unsigned int b0_13:14;	    /* 0-13: zeros			 */
+	unsigned int as:1;	    /* 14: sampling authorisation control*/
+	unsigned int b15_21:7;	    /* 15-21: zeros			 */
+	unsigned int es:1;	    /* 22: sampling enable control	 */
+	unsigned int b23_29:7;	    /* 23-29: zeros			 */
+	unsigned int cs:1;	    /* 30: sampling activation control	 */
+	unsigned int:1; 	    /* 31: reserved			 */
+	unsigned int bsdes:16;	    /* 4-5: size of basic sampling entry      */
+	unsigned int dsdes:16;	    /* 6-7: size of diagnostic sampling entry */
+	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+	unsigned long tear;	    /* 24-31: TEAR contents		 */
+	unsigned long dear;	    /* 32-39: DEAR contents		 */
+	unsigned int rsvrd0;	    /* 40-43: reserved			 */
+	unsigned int cpu_speed;     /* 44-47: CPU speed 		 */
+	unsigned long long rsvrd1;  /* 48-55: reserved			 */
+	unsigned long long rsvrd2;  /* 56-63: reserved			 */
+} __packed;
+
+/* SET SAMPLING CONTROLS request block */
+struct hws_lsctl_request_block {
+	unsigned int s:1;	    /* 0: maximum buffer indicator	 */
+	unsigned int h:1;	    /* 1: part. level reserved for VM use*/
+	unsigned long long b2_53:52;/* 2-53: zeros			 */
+	unsigned int es:1;	    /* 54: sampling enable control	 */
+	unsigned int b55_61:7;	    /* 55-61: - zeros			 */
+	unsigned int cs:1;	    /* 62: sampling activation control	 */
+	unsigned int b63:1;	    /* 63: zero 			 */
+	unsigned long interval;     /* 8-15: sampling interval		 */
+	unsigned long tear;	    /* 16-23: TEAR contents		 */
+	unsigned long dear;	    /* 24-31: DEAR contents		 */
+	/* 32-63:							 */
+	unsigned long rsvrd1;	    /* reserved 			 */
+	unsigned long rsvrd2;	    /* reserved 			 */
+	unsigned long rsvrd3;	    /* reserved 			 */
+	unsigned long rsvrd4;	    /* reserved 			 */
+} __packed;
+
+
+struct hws_data_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:4;	    /* 16-19 reserved			 */
+	unsigned int U:4;	    /* 20-23 Number of unique instruct.  */
+	unsigned int z:2;	    /* zeros				 */
+	unsigned int T:1;	    /* 26 PSW DAT mode			 */
+	unsigned int W:1;	    /* 27 PSW wait state		 */
+	unsigned int P:1;	    /* 28 PSW Problem state		 */
+	unsigned int AS:2;	    /* 29-30 PSW address-space control	 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	unsigned int:16;
+	unsigned int prim_asn:16;   /* primary ASN			 */
+	unsigned long long ia;	    /* Instruction Address		 */
+	unsigned long long gpp;     /* Guest Program Parameter		 */
+	unsigned long long hpp;     /* Host Program Parameter		 */
+} __packed;
+
+struct hws_trailer_entry {
+	unsigned int f:1;	    /* 0 - Block Full Indicator 	 */
+	unsigned int a:1;	    /* 1 - Alert request control	 */
+	unsigned int t:1;	    /* 2 - Timestamp format		 */
+	unsigned long long:61;	    /* 3 - 63: Reserved 		 */
+	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
+	unsigned long long timestamp;	 /* 16 - time-stamp		      */
+	unsigned long long timestamp1;	 /*				      */
+	unsigned long long reserved1;	 /* 32 -Reserved		      */
+	unsigned long long reserved2;	 /*				      */
+	unsigned long long progusage1;	 /* 48 - reserved for programming use */
+	unsigned long long progusage2;	 /*				      */
+} __packed;
+
 /* Query counter information */
 static inline int qctri(struct cpumf_ctr_info *info)
 {
@@ -99,4 +171,70 @@ static inline int ecctr(u64 ctr, u64 *val)
 	return cc;
 }
 
+/* Query sampling information */
+static inline int qsi(struct hws_qsi_info_block *info)
+{
+	int cc;
+	cc = 1;
+
+	asm volatile(
+		"0:	.insn	s,0xb2860000,0(%1)\n"
+		"1:	lhi	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (cc), "+a" (info)
+		: "m" (*info)
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+/* Load sampling controls */
+static inline int lsctl(struct hws_lsctl_request_block *req)
+{
+	int cc;
+
+	cc = 1;
+	asm volatile(
+		"0:	.insn	s,0xb2870000,0(%1)\n"
+		"1:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (cc), "+a" (req)
+		: "m" (*req)
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+/* Sampling control helper functions */
+
+#define SDB_TE_ALERT_REQ_MASK	0x4000000000000000UL
+#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
+
+/* Return pointer to trailer entry of an sample data block */
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+	void *ret;
+
+	ret = (void *) v;
+	ret += PAGE_SIZE;
+	ret -= sizeof(struct hws_trailer_entry);
+
+	return (unsigned long *) ret;
+}
+
+/* Return if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt */
+static inline int is_link_entry(unsigned long *s)
+{
+	return *s & 0x1ul ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+	return (unsigned long *) (*s & ~0x1ul);
+}
 #endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 231cecafc2f1..bbca76ad6e1b 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -26,9 +26,6 @@
 #define MAX_NUM_SDB 511
 #define MIN_NUM_SDB 1
 
-#define ALERT_REQ_MASK   0x4000000000000000ul
-#define BUFFER_FULL_MASK 0x8000000000000000ul
-
 DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
 
 struct hws_execute_parms {
@@ -65,43 +62,6 @@ static unsigned long interval;
 static unsigned long min_sampler_rate;
 static unsigned long max_sampler_rate;
 
-static int ssctl(void *buffer)
-{
-	int cc;
-
-	/* set in order to detect a program check */
-	cc = 1;
-
-	asm volatile(
-		"0: .insn s,0xB2870000,0(%1)\n"
-		"1: ipm %0\n"
-		"   srl %0,28\n"
-		"2:\n"
-		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
-		: "+d" (cc), "+a" (buffer)
-		: "m" (*((struct hws_ssctl_request_block *)buffer))
-		: "cc", "memory");
-
-	return cc ? -EINVAL : 0 ;
-}
-
-static int qsi(void *buffer)
-{
-	int cc;
-	cc = 1;
-
-	asm volatile(
-		"0: .insn s,0xB2860000,0(%1)\n"
-		"1: lhi %0,0\n"
-		"2:\n"
-		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
-		: "=d" (cc), "+a" (buffer)
-		: "m" (*((struct hws_qsi_info_block *)buffer))
-		: "cc", "memory");
-
-	return cc ? -EINVAL : 0;
-}
-
 static void execute_qsi(void *parms)
 {
 	struct hws_execute_parms *ep = parms;
@@ -113,7 +73,7 @@ static void execute_ssctl(void *parms)
 {
 	struct hws_execute_parms *ep = parms;
 
-	ep->rc = ssctl(ep->buffer);
+	ep->rc = lsctl(ep->buffer);
 }
 
 static int smp_ctl_ssctl_stop(int cpu)
@@ -214,17 +174,6 @@ static int smp_ctl_qsi(int cpu)
 	return ep.rc;
 }
 
-static inline unsigned long *trailer_entry_ptr(unsigned long v)
-{
-	void *ret;
-
-	ret = (void *)v;
-	ret += PAGE_SIZE;
-	ret -= sizeof(struct hws_trailer_entry);
-
-	return (unsigned long *) ret;
-}
-
 static void hws_ext_handler(struct ext_code ext_code,
 			    unsigned int param32, unsigned long param64)
 {
@@ -256,16 +205,6 @@ static void init_all_cpu_buffers(void)
 	}
 }
 
-static int is_link_entry(unsigned long *s)
-{
-	return *s & 0x1ul ? 1 : 0;
-}
-
-static unsigned long *get_next_sdbt(unsigned long *s)
-{
-	return (unsigned long *) (*s & ~0x1ul);
-}
-
 static int prepare_cpu_buffers(void)
 {
 	int cpu;
@@ -353,7 +292,7 @@ static int allocate_sdbt(int cpu)
 			}
 			*sdbt = sdb;
 			trailer = trailer_entry_ptr(*sdbt);
-			*trailer = ALERT_REQ_MASK;
+			*trailer = SDB_TE_ALERT_REQ_MASK;
 			sdbt++;
 			mutex_unlock(&hws_sem_oom);
 		}
@@ -829,7 +768,7 @@ static void worker_on_interrupt(unsigned int cpu)
 
 		trailer = trailer_entry_ptr(*sdbt);
 		/* leave loop if no more work to do */
-		if (!(*trailer & BUFFER_FULL_MASK)) {
+		if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
 			done = 1;
 			if (!hws_flush_all)
 				continue;
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 0022e1ebfbde..a483d06f2fa7 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -9,27 +9,7 @@
 #define HWSAMPLER_H_
 
 #include <linux/workqueue.h>
-
-struct hws_qsi_info_block          /* QUERY SAMPLING information block  */
-{ /* Bit(s) */
-	unsigned int b0_13:14;      /* 0-13: zeros                       */
-	unsigned int as:1;          /* 14: sampling authorisation control*/
-	unsigned int b15_21:7;      /* 15-21: zeros                      */
-	unsigned int es:1;          /* 22: sampling enable control       */
-	unsigned int b23_29:7;      /* 23-29: zeros                      */
-	unsigned int cs:1;          /* 30: sampling activation control   */
-	unsigned int:1;             /* 31: reserved                      */
-	unsigned int bsdes:16;      /* 4-5: size of sampling entry       */
-	unsigned int:16;            /* 6-7: reserved                     */
-	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
-	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
-	unsigned long tear;         /* 24-31: TEAR contents              */
-	unsigned long dear;         /* 32-39: DEAR contents              */
-	unsigned int rsvrd0;        /* 40-43: reserved                   */
-	unsigned int cpu_speed;     /* 44-47: CPU speed                  */
-	unsigned long long rsvrd1;  /* 48-55: reserved                   */
-	unsigned long long rsvrd2;  /* 56-63: reserved                   */
-};
+#include <asm/cpu_mf.h>
 
 struct hws_ssctl_request_block     /* SET SAMPLING CONTROLS req block   */
 { /* bytes 0 - 7  Bit(s) */
@@ -68,36 +48,6 @@ struct hws_cpu_buffer {
 	unsigned int stop_mode:1;
 };
 
-struct hws_data_entry {
-	unsigned int def:16;        /* 0-15  Data Entry Format           */
-	unsigned int R:4;           /* 16-19 reserved                    */
-	unsigned int U:4;           /* 20-23 Number of unique instruct.  */
-	unsigned int z:2;           /* zeros                             */
-	unsigned int T:1;           /* 26 PSW DAT mode                   */
-	unsigned int W:1;           /* 27 PSW wait state                 */
-	unsigned int P:1;           /* 28 PSW Problem state              */
-	unsigned int AS:2;          /* 29-30 PSW address-space control   */
-	unsigned int I:1;           /* 31 entry valid or invalid         */
-	unsigned int:16;
-	unsigned int prim_asn:16;   /* primary ASN                       */
-	unsigned long long ia;      /* Instruction Address               */
-	unsigned long long gpp;     /* Guest Program Parameter		 */
-	unsigned long long hpp;     /* Host Program Parameter		 */
-};
-
-struct hws_trailer_entry {
-	unsigned int f:1;           /* 0 - Block Full Indicator          */
-	unsigned int a:1;           /* 1 - Alert request control         */
-	unsigned long:62;           /* 2 - 63: Reserved                  */
-	unsigned long overflow;     /* 64 - sample Overflow count        */
-	unsigned long timestamp;    /* 16 - time-stamp                   */
-	unsigned long timestamp1;   /*                                   */
-	unsigned long reserved1;    /* 32 -Reserved                      */
-	unsigned long reserved2;    /*                                   */
-	unsigned long progusage1;   /* 48 - reserved for programming use */
-	unsigned long progusage2;   /*                                   */
-};
-
 int hwsampler_setup(void);
 int hwsampler_shutdown(void);
 int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
-- 
cgit v1.2.3


From c716832513f30430179b60ac5ffd203c53f7eb40 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Wed, 11 Dec 2013 12:44:40 +0100
Subject: s390/cpum_cf: Export event names in sysfs

Provide PMU event attributes for supported counters and export their symbolic
names to the sysfs "events" directory.

See the /sys/devices/cpum_cf/events/ directory for a list of available counters.
Note that you might require counter set authorizations for the LPAR to use them.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h     |  23 ++-
 arch/s390/kernel/Makefile              |   3 +-
 arch/s390/kernel/perf_cpum_cf.c        |   1 +
 arch/s390/kernel/perf_cpum_cf_events.c | 322 +++++++++++++++++++++++++++++++++
 arch/s390/kernel/perf_event.c          |  12 ++
 5 files changed, 358 insertions(+), 3 deletions(-)
 create mode 100644 arch/s390/kernel/perf_cpum_cf_events.c

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 1141fb3e7b21..34185020ae0a 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -1,11 +1,18 @@
 /*
  * Performance event support - s390 specific definitions.
  *
- * Copyright IBM Corp. 2009, 2012
+ * Copyright IBM Corp. 2009, 2013
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *	      Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
 
+#ifndef _ASM_S390_PERF_EVENT_H
+#define _ASM_S390_PERF_EVENT_H
+
+#ifdef CONFIG_64BIT
+
+#include <linux/perf_event.h>
+#include <linux/device.h>
 #include <asm/cpu_mf.h>
 
 /* CPU-measurement counter facility */
@@ -15,7 +22,18 @@
 #define PMU_F_RESERVED			0x1000
 #define PMU_F_ENABLED			0x2000
 
-#ifdef CONFIG_64BIT
+/* Perf defintions for PMU event attributes in sysfs */
+extern __init const struct attribute_group **cpumf_cf_event_group(void);
+extern ssize_t cpumf_events_sysfs_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *page);
+#define EVENT_VAR(_cat, _name)		event_attr_##_cat##_##_name
+#define EVENT_PTR(_cat, _name)		(&EVENT_VAR(_cat, _name).attr.attr)
+
+#define CPUMF_EVENT_ATTR(cat, name, id)			\
+	PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show)
+#define CPUMF_EVENT_PTR(cat, name)	EVENT_PTR(cat, name)
+
 
 /* Perf callbacks */
 struct pt_regs;
@@ -24,3 +42,4 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs) perf_misc_flags(regs)
 
 #endif /* CONFIG_64BIT */
+#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2403303cfed7..9f1e2adbd77e 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -60,7 +60,8 @@ obj-$(CONFIG_FTRACE_SYSCALLS)  += ftrace.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 
 ifdef CONFIG_64BIT
-obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o \
+						perf_cpum_cf_events.o
 obj-y				+= runtime_instr.o cache.o
 endif
 
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1105502bf6e9..f51214c04858 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -680,6 +680,7 @@ static int __init cpumf_pmu_init(void)
 		goto out;
 	}
 
+	cpumf_pmu.attr_groups = cpumf_cf_event_group();
 	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
 	if (rc) {
 		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 000000000000..4554a4bae39e
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,322 @@
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+
+static struct attribute *cpumcf_pmu_event_attr[] = {
+	CPUMF_EVENT_PTR(cf, CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+	NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+	NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumcf_pmu_event_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+					    struct attribute **b)
+{
+	struct attribute **new;
+	int j, i;
+
+	for (j = 0; a[j]; j++)
+		;
+	for (i = 0; b[i]; i++)
+		j++;
+	j++;
+
+	new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+	if (!new)
+		return NULL;
+	j = 0;
+	for (i = 0; a[i]; i++)
+		new[j++] = a[i];
+	for (i = 0; b[i]; i++)
+		new[j++] = b[i];
+	new[j] = NULL;
+
+	return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+	struct attribute **combined, **model;
+	struct cpuid cpu_id;
+
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x2097:
+	case 0x2098:
+		model = cpumcf_z10_pmu_event_attr;
+		break;
+	case 0x2817:
+	case 0x2818:
+		model = cpumcf_z196_pmu_event_attr;
+		break;
+	case 0x2827:
+	case 0x2828:
+		model = cpumcf_zec12_pmu_event_attr;
+		break;
+	default:
+		model = NULL;
+		break;
+	};
+
+	if (!model)
+		goto out;
+
+	combined = merge_attr(cpumcf_pmu_event_attr, model);
+	if (combined)
+		cpumsf_pmu_events_group.attrs = combined;
+out:
+	return cpumsf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 2343c218b8f9..4c1d336ce941 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -16,6 +16,7 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
+#include <linux/sysfs.h>
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
 #include <asm/lowcore.h>
@@ -172,3 +173,14 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	__store_trace(entry, head, S390_lowcore.thread_info,
 		      S390_lowcore.thread_info + THREAD_SIZE);
 }
+
+/* Perf defintions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%04llx,name=%s\n",
+		       pmu_attr->id, attr->attr.name);
+}
-- 
cgit v1.2.3


From 8c069ff4bd6063a3f15e606c882e03f75c7e7711 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Thu, 12 Dec 2013 16:32:47 +0100
Subject: s390/perf: add support for the CPU-Measurement Sampling Facility

Introduce a perf PMU, "cpum_sf", to support the CPU-Measurement
Sampling Facility.  You can control the sampling facility through
this perf PMU interfaces.  Perf sampling events are created for
hardware samples.

For details about the CPU-Measurement Sampling Facility, see
"The Load-Program-Parameter and the CPU-Measurement Facilities" (SA23-2260).

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h     |   14 +
 arch/s390/include/asm/perf_event.h |   17 +-
 arch/s390/kernel/Makefile          |    2 +-
 arch/s390/kernel/perf_cpum_sf.c    | 1024 ++++++++++++++++++++++++++++++++++++
 arch/s390/kernel/perf_event.c      |   42 +-
 5 files changed, 1086 insertions(+), 13 deletions(-)
 create mode 100644 arch/s390/kernel/perf_cpum_sf.c

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index f6dddeaad965..d707abc26157 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -210,6 +210,20 @@ static inline int lsctl(struct hws_lsctl_request_block *req)
 
 /* Sampling control helper functions */
 
+#include <linux/time.h>
+
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+						unsigned long freq)
+{
+	return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+						unsigned long rate)
+{
+	return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
 #define SDB_TE_ALERT_REQ_MASK	0x4000000000000000UL
 #define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
 
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 34185020ae0a..b4eea25f379e 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -15,12 +15,13 @@
 #include <linux/device.h>
 #include <asm/cpu_mf.h>
 
-/* CPU-measurement counter facility */
-#define PERF_CPUM_CF_MAX_CTR		256
-
 /* Per-CPU flags for PMU states */
 #define PMU_F_RESERVED			0x1000
 #define PMU_F_ENABLED			0x2000
+#define PMU_F_IN_USE			0x4000
+#define PMU_F_ERR_IBE			0x0100
+#define PMU_F_ERR_LSDA			0x0200
+#define PMU_F_ERR_MASK			(PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
 
 /* Perf defintions for PMU event attributes in sysfs */
 extern __init const struct attribute_group **cpumf_cf_event_group(void);
@@ -41,5 +42,15 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs) perf_misc_flags(regs)
 
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR		256
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR		1
+#define PERF_EVENT_CPUM_SF		0xB0000UL	/* Raw event ID */
+
+#define TEAR_REG(hwc)		((hwc)->last_tag)
+#define SAMPL_RATE(hwc)		((hwc)->event_base)
+
 #endif /* CONFIG_64BIT */
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 9f1e2adbd77e..1b3ac09c11b6 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS)  += ftrace.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 
 ifdef CONFIG_64BIT
-obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o \
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o perf_cpum_sf.o \
 						perf_cpum_cf_events.o
 obj-y				+= runtime_instr.o cache.o
 endif
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 000000000000..141eca0917f4
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,1024 @@
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT	"cpum_sf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT    1
+
+/* Minimum number of sample-data-blocks:
+ * The minimum designates a single page for sample-data-block, i.e.,
+ * up to 126 sample-data-blocks with a size of 32 bytes (bsdes).
+ */
+#define CPUM_SF_MIN_SDB	    126
+
+/* Maximum number of sample-data-blocks:
+ * The maximum number designates approx. 256K per CPU including
+ * the given number of sample-data-blocks and taking the number
+ * of sample-data-block tables into account.
+ *
+ * Later, this number can be increased for extending the sampling
+ * buffer, for example, by factor 2 (512K) or 4 (1M).
+ */
+#define CPUM_SF_MAX_SDB	    6471
+
+struct sf_buffer {
+	unsigned long	 sdbt;	    /* Sample-data-block-table origin */
+	/* buffer characteristics (required for buffer increments) */
+	unsigned long num_sdb;	    /* Number of sample-data-blocks */
+	unsigned long	 tail;	    /* last sample-data-block-table */
+};
+
+struct cpu_hw_sf {
+	/* CPU-measurement sampling information block */
+	struct hws_qsi_info_block qsi;
+	struct hws_lsctl_request_block lsctl;
+	struct sf_buffer sfb;	    /* Sampling buffer */
+	unsigned int flags;	    /* Status flags */
+	struct perf_event *event;   /* Scheduled perf event */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+	return (cpuhw->sfb.sdbt) ? 1 : 0;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+	unsigned long sdbt, *curr;
+
+	if (!sfb->sdbt)
+		return;
+
+	sdbt = sfb->sdbt;
+	curr = (unsigned long *) sdbt;
+
+	/* we'll free the SDBT after all SDBs are processed... */
+	while (1) {
+		if (!*curr || !sdbt)
+			break;
+
+		/* watch for link entry reset if found */
+		if (is_link_entry(curr)) {
+			curr = get_next_sdbt(curr);
+			if (sdbt)
+				free_page(sdbt);
+
+			/* we are done if we reach the origin */
+			if ((unsigned long) curr == sfb->sdbt)
+				break;
+			else
+				sdbt = (unsigned long) curr;
+		} else {
+			/* process SDB pointer */
+			if (*curr) {
+				free_page(*curr);
+				curr++;
+			}
+		}
+	}
+
+	debug_sprintf_event(sfdbg, 5,
+			    "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt);
+	memset(sfb, 0, sizeof(*sfb));
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB).  For each allocation,
+ * a 4K page is used.  The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+	int j, k, rc;
+	unsigned long *sdbt, *tail, *trailer;
+	unsigned long sdb;
+	unsigned long num_sdbt, sdb_per_table;
+
+	if (sfb->sdbt)
+		return -EINVAL;
+	sfb->num_sdb = 0;
+
+	/* Compute the number of required sample-data-block-tables (SDBT) */
+	num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8);
+	if (num_sdbt < CPUM_SF_MIN_SDBT)
+		num_sdbt = CPUM_SF_MIN_SDBT;
+	sdb_per_table = (PAGE_SIZE - 8) / 8;
+
+	debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu "
+			    "num_sdb=%lu sdb_per_table=%lu\n",
+			    num_sdbt, num_sdb, sdb_per_table);
+	sdbt = NULL;
+	tail = sdbt;
+
+	for (j = 0; j < num_sdbt; j++) {
+		sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+		if (!sdbt) {
+			rc = -ENOMEM;
+			goto allocate_sdbt_error;
+		}
+
+		/* save origin of sample-data-block-table */
+		if (!sfb->sdbt)
+			sfb->sdbt = (unsigned long) sdbt;
+
+		/* link current page to tail of chain */
+		if (tail)
+			*tail = (unsigned long)(void *) sdbt + 1;
+
+		for (k = 0; k < num_sdb && k < sdb_per_table; k++) {
+			/* get and set SDB page */
+			sdb = get_zeroed_page(GFP_KERNEL);
+			if (!sdb) {
+				rc = -ENOMEM;
+				goto allocate_sdbt_error;
+			}
+			*sdbt = sdb;
+			trailer = trailer_entry_ptr(*sdbt);
+			*trailer = SDB_TE_ALERT_REQ_MASK;
+			sdbt++;
+		}
+		num_sdb -= k;
+		sfb->num_sdb += k;	/* count allocated sdb's */
+		tail = sdbt;
+	}
+
+	rc = 0;
+	if (tail)
+		*tail = sfb->sdbt + 1;
+	sfb->tail = (unsigned long) (void *)tail;
+
+allocate_sdbt_error:
+	if (rc)
+		free_sampling_buffer(sfb);
+	else
+		debug_sprintf_event(sfdbg, 4,
+			"alloc_sampling_buffer: tear=%0lx dear=%0lx\n",
+			sfb->sdbt, *(unsigned long *) sfb->sdbt);
+	return rc;
+}
+
+static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc)
+{
+	unsigned long n_sdb, freq;
+	unsigned long factor;
+
+	/* Calculate sampling buffers using 4K pages
+	 *
+	 *    1. Use frequency as input.  The samping buffer is designed for
+	 *	 a complete second.  This can be adjusted through the "factor"
+	 *	 variable.
+	 *	 In any case, alloc_sampling_buffer() sets the Alert Request
+	 *	 Control indicator to trigger measurement-alert to harvest
+	 *	 sample-data-blocks (sdb).
+	 *
+	 *    2. Compute the number of sample-data-blocks and ensure a minimum
+	 *	 of CPUM_SF_MIN_SDB.  Also ensure the upper limit does not
+	 *	 exceed CPUM_SF_MAX_SDB.  See also the remarks for these
+	 *	 symbolic constants.
+	 *
+	 *    3. Compute number of pages used for the sample-data-block-table
+	 *	 and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table
+	 *	 to manage up to 511 sample-data-blocks).
+	 */
+	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+	factor = 1;
+	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes));
+	if (n_sdb < CPUM_SF_MIN_SDB)
+		n_sdb = CPUM_SF_MIN_SDB;
+
+	/* Return if there is already a sampling buffer allocated.
+	 * XXX Remove this later and check number of available and
+	 * required sdb's and, if necessary, increase the sampling buffer.
+	 */
+	if (sf_buffer_available(cpuhw))
+		return 0;
+
+	debug_sprintf_event(sfdbg, 3,
+			    "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n",
+			    SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
+
+	return alloc_sampling_buffer(&cpuhw->sfb,
+			       min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB));
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+	struct hws_lsctl_request_block sreq;
+
+	memset(&sreq, 0, sizeof(sreq));
+	return lsctl(&sreq);
+}
+
+
+#define PMC_INIT      0
+#define PMC_RELEASE   1
+static void setup_pmc_cpu(void *flags)
+{
+	int err;
+	struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
+
+	/* XXX Improve error handling and pass a flag in the *flags
+	 *     variable to indicate failures.  Alternatively, ignore
+	 *     (print) errors here and let the PMU functions fail if
+	 *     the per-cpu PMU_F_RESERVED flag is not.
+	 */
+	err = 0;
+	switch (*((int *) flags)) {
+	case PMC_INIT:
+		memset(cpusf, 0, sizeof(*cpusf));
+		err = qsi(&cpusf->qsi);
+		if (err)
+			break;
+		cpusf->flags |= PMU_F_RESERVED;
+		err = sf_disable();
+		if (err)
+			pr_err("Switching off the sampling facility failed "
+			       "with rc=%i\n", err);
+		debug_sprintf_event(sfdbg, 5,
+				    "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+		break;
+	case PMC_RELEASE:
+		cpusf->flags &= ~PMU_F_RESERVED;
+		err = sf_disable();
+		if (err) {
+			pr_err("Switching off the sampling facility failed "
+			       "with rc=%i\n", err);
+		} else {
+			if (cpusf->sfb.sdbt)
+				free_sampling_buffer(&cpusf->sfb);
+		}
+		debug_sprintf_event(sfdbg, 5,
+				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+		break;
+	}
+}
+
+static void release_pmc_hardware(void)
+{
+	int flags = PMC_RELEASE;
+
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+}
+
+static int reserve_pmc_hardware(void)
+{
+	int flags = PMC_INIT;
+
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+	return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	/* Release PMC if this is the last perf event */
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+	hwc->sample_period = period;
+	hwc->last_period = hwc->sample_period;
+	local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static void hw_reset_registers(struct hw_perf_event *hwc,
+			       unsigned long sdbt_origin)
+{
+	TEAR_REG(hwc) = sdbt_origin;	      /* (re)set to first sdb table */
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+				   unsigned long rate)
+{
+	if (rate < si->min_sampl_rate)
+		return si->min_sampl_rate;
+	if (rate > si->max_sampl_rate)
+		return si->max_sampl_rate;
+	return rate;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct cpu_hw_sf *cpuhw;
+	struct hws_qsi_info_block si;
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long rate;
+	int cpu, err;
+
+	/* Reserve CPU-measurement sampling facility */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		goto out;
+
+	/* Access per-CPU sampling information (query sampling info) */
+	/*
+	 * The event->cpu value can be -1 to count on every CPU, for example,
+	 * when attaching to a task.  If this is specified, use the query
+	 * sampling info from the current CPU, otherwise use event->cpu to
+	 * retrieve the per-CPU information.
+	 * Later, cpuhw indicates whether to allocate sampling buffers for a
+	 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+	 */
+	memset(&si, 0, sizeof(si));
+	cpuhw = NULL;
+	if (event->cpu == -1)
+		qsi(&si);
+	else {
+		/* Event is pinned to a particular CPU, retrieve the per-CPU
+		 * sampling structure for accessing the CPU-specific QSI.
+		 */
+		cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+		si = cpuhw->qsi;
+	}
+
+	/* Check sampling facility authorization and, if not authorized,
+	 * fall back to other PMUs.  It is safe to check any CPU because
+	 * the authorization is identical for all configured CPUs.
+	 */
+	if (!si.as) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	/* The sampling information (si) contains information about the
+	 * min/max sampling intervals and the CPU speed.  So calculate the
+	 * correct sampling interval and avoid the whole period adjust
+	 * feedback loop.
+	 */
+	rate = 0;
+	if (attr->freq) {
+		rate = freq_to_sample_rate(&si, attr->sample_freq);
+		rate = hw_limit_rate(&si, rate);
+		attr->freq = 0;
+		attr->sample_period = rate;
+	} else {
+		/* The min/max sampling rates specifies the valid range
+		 * of sample periods.  If the specified sample period is
+		 * out of range, limit the period to the range boundary.
+		 */
+		rate = hw_limit_rate(&si, hwc->sample_period);
+
+		/* The perf core maintains a maximum sample rate that is
+		 * configurable through the sysctl interface.  Ensure the
+		 * sampling rate does not exceed this value.  This also helps
+		 * to avoid throttling when pushing samples with
+		 * perf_event_overflow().
+		 */
+		if (sample_rate_to_freq(&si, rate) >
+		      sysctl_perf_event_sample_rate) {
+			err = -EINVAL;
+			debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
+			goto out;
+		}
+	}
+	SAMPL_RATE(hwc) = rate;
+	hw_init_period(hwc, SAMPL_RATE(hwc));
+
+	/* Allocate the per-CPU sampling buffer using the CPU information
+	 * from the event.  If the event is not pinned to a particular
+	 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+	 * buffers for each online CPU.
+	 */
+	if (cpuhw)
+		/* Event is pinned to a particular CPU */
+		err = allocate_sdbt(cpuhw, hwc);
+	else {
+		/* Event is not pinned, allocate sampling buffer on
+		 * each online CPU
+		 */
+		for_each_online_cpu(cpu) {
+			cpuhw = &per_cpu(cpu_hw_sf, cpu);
+			err = allocate_sdbt(cpuhw, hwc);
+			if (err)
+				break;
+		}
+	}
+out:
+	return err;
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	if (event->attr.type != PERF_TYPE_RAW)
+		return -ENOENT;
+
+	if (event->attr.config != PERF_EVENT_CPUM_SF)
+		return -ENOENT;
+
+	if (event->cpu >= nr_cpumask_bits ||
+	    (event->cpu >= 0 && !cpu_online(event->cpu)))
+		return -ENODEV;
+
+	err = __hw_perf_event_init(event);
+	if (unlikely(err))
+		if (event->destroy)
+			event->destroy(event);
+	return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	int err;
+
+	if (cpuhw->flags & PMU_F_ENABLED)
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	cpuhw->flags |= PMU_F_ENABLED;
+	barrier();
+
+	err = lsctl(&cpuhw->lsctl);
+	if (err) {
+		cpuhw->flags &= ~PMU_F_ENABLED;
+		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+			1, err);
+		return;
+	}
+
+	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n",
+			    cpuhw->lsctl.es, cpuhw->lsctl.cs,
+			    (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	struct hws_lsctl_request_block inactive;
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!(cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Switch off sampling activation control */
+	inactive = cpuhw->lsctl;
+	inactive.cs = 0;
+
+	err = lsctl(&inactive);
+	if (err) {
+		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+			2, err);
+		return;
+	}
+
+	/* Save state of TEAR and DEAR register contents */
+	if (!qsi(&si)) {
+		/* TEAR/DEAR values are valid only if the sampling facility is
+		 * enabled.  Note that cpumsf_pmu_disable() might be called even
+		 * for a disabled sampling facility because cpumsf_pmu_enable()
+		 * controls the enable/disable state.
+		 */
+		if (si.es) {
+			cpuhw->lsctl.tear = si.tear;
+			cpuhw->lsctl.dear = si.dear;
+		}
+	} else
+		debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
+				    "qsi() failed with err=%i\n", err);
+
+	cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event:	The perf event
+ * @sample:	Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample.  The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows.  If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event,
+			    struct hws_data_entry *sample)
+{
+	int overflow;
+	struct pt_regs regs;
+	struct perf_sample_data data;
+
+	/* Skip samples that are invalid or for which the instruction address
+	 * is not predictable.	For the latter, the wait-state bit is set.
+	 */
+	if (sample->I || sample->W)
+		return 0;
+
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+
+	memset(&regs, 0, sizeof(regs));
+	regs.psw.addr = sample->ia;
+	if (sample->T)
+		regs.psw.mask |= PSW_MASK_DAT;
+	if (sample->W)
+		regs.psw.mask |= PSW_MASK_WAIT;
+	if (sample->P)
+		regs.psw.mask |= PSW_MASK_PSTATE;
+	switch (sample->AS) {
+	case 0x0:
+		regs.psw.mask |= PSW_ASC_PRIMARY;
+		break;
+	case 0x1:
+		regs.psw.mask |= PSW_ASC_ACCREG;
+		break;
+	case 0x2:
+		regs.psw.mask |= PSW_ASC_SECONDARY;
+		break;
+	case 0x3:
+		regs.psw.mask |= PSW_ASC_HOME;
+		break;
+	}
+
+	overflow = 0;
+	if (perf_event_overflow(event, &data, &regs)) {
+		overflow = 1;
+		event->pmu->stop(event, 0);
+		debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped"
+				    " because of an event overflow\n");
+	}
+	perf_event_update_userpage(event);
+
+	return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+	local64_add(count, &event->count);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event:	The perf event
+ * @sdbt:	Sample-data-block table
+ * @overflow:	Event overflow counter
+ *
+ * Walks through a sample-data-block and collects hardware sample-data that is
+ * pushed to the perf event subsystem.	The overflow reports the number of
+ * samples that has been discarded due to an event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+			       unsigned long long *overflow)
+{
+	struct hws_data_entry *sample;
+	unsigned long *trailer;
+
+	trailer = trailer_entry_ptr(*sdbt);
+	sample = (struct hws_data_entry *) *sdbt;
+	while ((unsigned long *) sample < trailer) {
+		/* Check for an empty sample */
+		if (!sample->def)
+			break;
+
+		/* Update perf event period */
+		perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+		/* Check for basic sampling mode */
+		if (sample->def == 0x0001) {
+			/* If an event overflow occurred, the PMU is stopped to
+			 * throttle event delivery.  Remaining sample data is
+			 * discarded.
+			 */
+			if (!*overflow)
+				*overflow = perf_push_sample(event, sample);
+			else
+				/* Count discarded samples */
+				*overflow += 1;
+		} else
+			/* Sample slot is not yet written or other record */
+			debug_sprintf_event(sfdbg, 5, "hw_collect_samples: "
+					    "Unknown sample data entry format:"
+					    " %i\n", sample->def);
+
+		/* Reset sample slot and advance to next sample */
+		sample->def = 0;
+		sample++;
+	}
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event:	The perf event
+ * @flush_all:	Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed.	Specify the flash_all flag
+ * to also walk through partially filled sample-data-blocks.
+ *
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hws_trailer_entry *te;
+	unsigned long *sdbt;
+	unsigned long long event_overflow, sampl_overflow;
+	int done;
+
+	sdbt = (unsigned long *) TEAR_REG(hwc);
+	done = event_overflow = sampl_overflow = 0;
+	while (!done) {
+		/* Get the trailer entry of the sample-data-block */
+		te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+		/* Leave loop if no more work to do (block full indicator) */
+		if (!te->f) {
+			done = 1;
+			if (!flush_all)
+				break;
+		}
+
+		/* Check sample overflow count */
+		if (te->overflow) {
+			/* Increment sample overflow counter */
+			sampl_overflow += te->overflow;
+
+			/* XXX: If an sample overflow occurs, increase the
+			 *	sampling buffer.  Set a "realloc" flag because
+			 *	the sampler must be re-enabled for changing
+			 *	the sample-data-block-table content.
+			 */
+		}
+
+		/* Timestamps are valid for full sample-data-blocks only */
+		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
+				    "overflow=%llu timestamp=0x%llx\n",
+				    sdbt, te->overflow,
+				    (te->f) ? te->timestamp : 0ULL);
+
+		/* Collect all samples from a single sample-data-block and
+		 * flag if an (perf) event overflow happened.  If so, the PMU
+		 * is stopped and remaining samples will be discarded.
+		 */
+		hw_collect_samples(event, sdbt, &event_overflow);
+
+		/* Reset trailer */
+		xchg(&te->overflow, 0);
+		xchg((unsigned char *) te, 0x40);
+
+		/* Advance to next sample-data-block */
+		sdbt++;
+		if (is_link_entry(sdbt))
+			sdbt = get_next_sdbt(sdbt);
+
+		/* Update event hardware registers */
+		TEAR_REG(hwc) = (unsigned long) sdbt;
+
+		/* Stop processing sample-data if all samples of the current
+		 * sample-data-block were flushed even if it was not full.
+		 */
+		if (flush_all && done)
+			break;
+
+		/* If an event overflow happened, discard samples by
+		 * processing any remaining sample-data-blocks.
+		 */
+		if (event_overflow)
+			flush_all = 1;
+	}
+
+	if (sampl_overflow || event_overflow)
+		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+				    "overflow stats: sample=%llu event=%llu\n",
+				    sampl_overflow, event_overflow);
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+	/* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	perf_pmu_disable(event->pmu);
+	event->hw.state = 0;
+	cpuhw->lsctl.cs = 1;
+	perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	perf_pmu_disable(event->pmu);
+	cpuhw->lsctl.cs = 0;
+	event->hw.state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+		hw_perf_event_update(event, 1);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+	perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	int err;
+
+	if (cpuhw->flags & PMU_F_IN_USE)
+		return -EAGAIN;
+
+	if (!cpuhw->sfb.sdbt)
+		return -EINVAL;
+
+	err = 0;
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	/* Set up sampling controls.  Always program the sampling register
+	 * using the SDB-table start.  Reset TEAR_REG event hardware register
+	 * that is used by hw_perf_event_update() to store the sampling buffer
+	 * position after samples have been flushed.
+	 */
+	cpuhw->lsctl.s = 0;
+	cpuhw->lsctl.h = 1;
+	cpuhw->lsctl.tear = cpuhw->sfb.sdbt;
+	cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+	cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+	hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+
+	/* Ensure sampling functions are in the disabled state.  If disabled,
+	 * switch on sampling enable control. */
+	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) {
+		err = -EAGAIN;
+		goto out;
+	}
+	cpuhw->lsctl.es = 1;
+
+	/* Set in_use flag and store event */
+	event->hw.idx = 0;	  /* only one sampling event per CPU supported */
+	cpuhw->event = event;
+	cpuhw->flags |= PMU_F_IN_USE;
+
+	if (flags & PERF_EF_START)
+		cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	perf_pmu_disable(event->pmu);
+	cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+	cpuhw->lsctl.es = 0;
+	cpuhw->flags &= ~PMU_F_IN_USE;
+	cpuhw->event = NULL;
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_event_idx(struct perf_event *event)
+{
+	return event->hw.idx;
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+
+static struct attribute *cpumsf_pmu_events_attr[] = {
+	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+	NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumsf_pmu_events_attr,
+};
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+static struct pmu cpumf_sampling = {
+	.pmu_enable   = cpumsf_pmu_enable,
+	.pmu_disable  = cpumsf_pmu_disable,
+
+	.event_init   = cpumsf_pmu_event_init,
+	.add	      = cpumsf_pmu_add,
+	.del	      = cpumsf_pmu_del,
+
+	.start	      = cpumsf_pmu_start,
+	.stop	      = cpumsf_pmu_stop,
+	.read	      = cpumsf_pmu_read,
+
+	.event_idx    = cpumsf_pmu_event_idx,
+	.attr_groups  = cpumsf_pmu_attr_groups,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_hw_sf *cpuhw;
+
+	if (!(alert & CPU_MF_INT_SF_MASK))
+		return;
+	inc_irq_stat(IRQEXT_CMS);
+	cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+	/* Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case. */
+	if (!(cpuhw->flags & PMU_F_RESERVED))
+		return;
+
+	/* The processing below must take care of multiple alert events that
+	 * might be indicated concurrently. */
+
+	/* Program alert request */
+	if (alert & CPU_MF_INT_SF_PRA) {
+		if (cpuhw->flags & PMU_F_IN_USE)
+			hw_perf_event_update(cpuhw->event, 0);
+		else
+			WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+	}
+
+	/* Report measurement alerts only for non-PRA codes */
+	if (alert != CPU_MF_INT_SF_PRA)
+		debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+
+	/* Sampling authorization change request */
+	if (alert & CPU_MF_INT_SF_SACA)
+		qsi(&cpuhw->qsi);
+
+	/* Loss of sample data due to high-priority machine activities */
+	if (alert & CPU_MF_INT_SF_LSDA) {
+		pr_err("Sample data was lost\n");
+		cpuhw->flags |= PMU_F_ERR_LSDA;
+		sf_disable();
+	}
+
+	/* Invalid sampling buffer entry */
+	if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+		pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+		       alert);
+		cpuhw->flags |= PMU_F_ERR_IBE;
+		sf_disable();
+	}
+}
+
+static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
+					unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long) hcpu;
+	int flags;
+
+	/* Ignore the notification if no events are scheduled on the PMU.
+	 * This might be racy...
+	 */
+	if (!atomic_read(&num_events))
+		return NOTIFY_OK;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		flags = PMC_INIT;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	case CPU_DOWN_PREPARE:
+		flags = PMC_RELEASE;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+	int err;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+
+	sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+	if (!sfdbg)
+		pr_err("Registering for s390dbf failed\n");
+	debug_register_view(sfdbg, &debug_sprintf_view);
+
+	err = register_external_interrupt(0x1407, cpumf_measurement_alert);
+	if (err) {
+		pr_err("Failed to register for CPU-measurement alerts\n");
+		goto out;
+	}
+
+	err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+	if (err) {
+		pr_err("Failed to register cpum_sf pmu\n");
+		unregister_external_interrupt(0x1407, cpumf_measurement_alert);
+		goto out;
+	}
+	perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+	return err;
+}
+arch_initcall(init_cpum_sampling_pmu);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 4c1d336ce941..b9843ba9829f 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -16,6 +16,7 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
+#include <linux/spinlock.h>
 #include <linux/sysfs.h>
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
@@ -36,6 +37,8 @@ int perf_num_counters(void)
 
 	if (cpum_cf_avail())
 		num += PERF_CPUM_CF_MAX_CTR;
+	if (cpum_sf_avail())
+		num += PERF_CPUM_SF_MAX_CTR;
 
 	return num;
 }
@@ -93,24 +96,45 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
 			       : PERF_RECORD_MISC_KERNEL;
 }
 
-void perf_event_print_debug(void)
+void print_debug_cf(void)
 {
 	struct cpumf_ctr_info cf_info;
-	unsigned long flags;
-	int cpu;
-
-	if (!cpum_cf_avail())
-		return;
-
-	local_irq_save(flags);
+	int cpu = smp_processor_id();
 
-	cpu = smp_processor_id();
 	memset(&cf_info, 0, sizeof(cf_info));
 	if (!qctri(&cf_info))
 		pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
 			cpu, cf_info.cfvn, cf_info.csvn,
 			cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+	struct hws_qsi_info_block si;
+	int cpu = smp_processor_id();
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si)) {
+		pr_err("CPU[%i]: CPM_SF: qsi failed\n");
+		return;
+	}
+
+	pr_info("CPU[%i]: CPM_SF: as=%i es=%i cs=%i bsdes=%i dsdes=%i"
+		" min=%i max=%i cpu_speed=%i tear=%p dear=%p\n",
+		cpu, si.as, si.es, si.cs, si.bsdes, si.dsdes,
+		si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed,
+		si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
 
+	local_irq_save(flags);
+	if (cpum_cf_avail())
+		print_debug_cf();
+	if (cpum_sf_avail())
+		print_debug_sf();
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3


From 55baa2f831ae4a41da9617ab9e7cef5ebc991ec9 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Thu, 12 Dec 2013 16:47:00 +0100
Subject: s390/perf: Improve PMU selection for PERF_COUNT_HW_CPU_CYCLES events

The cpum_cf (counter facility) PMU does not support sampling events.
With cpum_sf (sampling facility), a PMU for sampling CPU cycles is
available.

Make cpum_sf the "default" PMU for PERF_COUNT_HW_CPU_CYCLES sampling
events but use the more precise cpum_cf PMU for non-sampling events.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 141eca0917f4..52bf36ee91aa 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -468,11 +468,29 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 {
 	int err;
 
-	if (event->attr.type != PERF_TYPE_RAW)
-		return -ENOENT;
-
-	if (event->attr.config != PERF_EVENT_CPUM_SF)
+	/* No support for taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+		if (event->attr.config != PERF_EVENT_CPUM_SF)
+			return -ENOENT;
+		break;
+	case PERF_TYPE_HARDWARE:
+		/* Support sampling of CPU cycles in addition to the
+		 * counter facility.  However, the counter facility
+		 * is more precise and, hence, restrict this PMU to
+		 * sampling events only.
+		 */
+		if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+			return -ENOENT;
+		if (!is_sampling_event(event))
+			return -ENOENT;
+		break;
+	default:
 		return -ENOENT;
+	}
 
 	if (event->cpu >= nr_cpumask_bits ||
 	    (event->cpu >= 0 && !cpu_online(event->cpu)))
-- 
cgit v1.2.3


From e28bb79d9935293a8eea5f3c771fde89db645ba7 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Thu, 12 Dec 2013 16:52:48 +0100
Subject: s390/perf,oprofile: Share sampling facility

Introduce reserve/release functions to share the sampling facility
between perf and oprofile.
Also improve error handling for the sampling facility support in perf.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h |  4 ++++
 arch/s390/kernel/perf_cpum_sf.c    | 17 ++++++++++++-----
 arch/s390/kernel/perf_event.c      | 30 ++++++++++++++++++++++++++++++
 arch/s390/oprofile/hwsampler.c     |  7 +++++++
 arch/s390/oprofile/init.c          | 23 ++++++++++++++++++++---
 5 files changed, 73 insertions(+), 8 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index b4eea25f379e..23d2dfa8201d 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -52,5 +52,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define TEAR_REG(hwc)		((hwc)->last_tag)
 #define SAMPL_RATE(hwc)		((hwc)->event_base)
 
+/* Perf hardware reserve and release functions */
+int perf_reserve_sampling(void);
+void perf_release_sampling(void);
+
 #endif /* CONFIG_64BIT */
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 52bf36ee91aa..ae5e0192160d 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -260,16 +260,12 @@ static int sf_disable(void)
 
 #define PMC_INIT      0
 #define PMC_RELEASE   1
+#define PMC_FAILURE   2
 static void setup_pmc_cpu(void *flags)
 {
 	int err;
 	struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
 
-	/* XXX Improve error handling and pass a flag in the *flags
-	 *     variable to indicate failures.  Alternatively, ignore
-	 *     (print) errors here and let the PMU functions fail if
-	 *     the per-cpu PMU_F_RESERVED flag is not.
-	 */
 	err = 0;
 	switch (*((int *) flags)) {
 	case PMC_INIT:
@@ -299,6 +295,8 @@ static void setup_pmc_cpu(void *flags)
 				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
 		break;
 	}
+	if (err)
+		*((int *) flags) |= PMC_FAILURE;
 }
 
 static void release_pmc_hardware(void)
@@ -307,13 +305,22 @@ static void release_pmc_hardware(void)
 
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	perf_release_sampling();
 }
 
 static int reserve_pmc_hardware(void)
 {
 	int flags = PMC_INIT;
+	int err;
 
+	err = perf_reserve_sampling();
+	if (err)
+		return err;
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	if (flags & PMC_FAILURE) {
+		release_pmc_hardware();
+		return -ENODEV;
+	}
 	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 
 	return 0;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index b9843ba9829f..4edcdfa4894e 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -208,3 +208,33 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
 	return sprintf(page, "event=0x%04llx,name=%s\n",
 		       pmu_attr->id, attr->attr.name);
 }
+
+/* Reserve/release functions for sharing perf hardware */
+static DEFINE_SPINLOCK(perf_hw_owner_lock);
+static void *perf_sampling_owner;
+
+int perf_reserve_sampling(void)
+{
+	int err;
+
+	err = 0;
+	spin_lock(&perf_hw_owner_lock);
+	if (perf_sampling_owner) {
+		pr_warn("The sampling facility is already reserved by %p\n",
+			perf_sampling_owner);
+		err = -EBUSY;
+	} else
+		perf_sampling_owner = __builtin_return_address(0);
+	spin_unlock(&perf_hw_owner_lock);
+	return err;
+}
+EXPORT_SYMBOL(perf_reserve_sampling);
+
+void perf_release_sampling(void)
+{
+	spin_lock(&perf_hw_owner_lock);
+	WARN_ON(!perf_sampling_owner);
+	perf_sampling_owner = NULL;
+	spin_unlock(&perf_hw_owner_lock);
+}
+EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index bbca76ad6e1b..eb095874540d 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -41,6 +41,7 @@ static DEFINE_MUTEX(hws_sem_oom);
 
 static unsigned char hws_flush_all;
 static unsigned int hws_oom;
+static unsigned int hws_alert;
 static struct workqueue_struct *hws_wq;
 
 static unsigned int hws_state;
@@ -182,6 +183,9 @@ static void hws_ext_handler(struct ext_code ext_code,
 	if (!(param32 & CPU_MF_INT_SF_MASK))
 		return;
 
+	if (!hws_alert)
+		return;
+
 	inc_irq_stat(IRQEXT_CMS);
 	atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
 
@@ -941,6 +945,7 @@ int hwsampler_deallocate(void)
 		goto deallocate_exit;
 
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	hws_alert = 0;
 	deallocate_sdbt();
 
 	hws_state = HWS_DEALLOCATED;
@@ -1055,6 +1060,7 @@ int hwsampler_shutdown(void)
 
 		if (hws_state == HWS_STOPPED) {
 			irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+			hws_alert = 0;
 			deallocate_sdbt();
 		}
 		if (hws_wq) {
@@ -1129,6 +1135,7 @@ start_all_exit:
 	hws_oom = 1;
 	hws_flush_all = 0;
 	/* now let them in, 1407 CPUMF external interrupts */
+	hws_alert = 1;
 	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 
 	return 0;
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 04e1b6a85362..9ffe645d5989 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/oprofile.h>
+#include <linux/perf_event.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
@@ -67,6 +68,21 @@ module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
 MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
 		           "(report cpu_type \"timer\"");
 
+static int __oprofile_hwsampler_start(void)
+{
+	int retval;
+
+	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+	if (retval)
+		return retval;
+
+	retval = hwsampler_start_all(oprofile_hw_interval);
+	if (retval)
+		hwsampler_deallocate();
+
+	return retval;
+}
+
 static int oprofile_hwsampler_start(void)
 {
 	int retval;
@@ -76,13 +92,13 @@ static int oprofile_hwsampler_start(void)
 	if (!hwsampler_running)
 		return timer_ops.start();
 
-	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+	retval = perf_reserve_sampling();
 	if (retval)
 		return retval;
 
-	retval = hwsampler_start_all(oprofile_hw_interval);
+	retval = __oprofile_hwsampler_start();
 	if (retval)
-		hwsampler_deallocate();
+		perf_release_sampling();
 
 	return retval;
 }
@@ -96,6 +112,7 @@ static void oprofile_hwsampler_stop(void)
 
 	hwsampler_stop_all();
 	hwsampler_deallocate();
+	perf_release_sampling();
 	return;
 }
 
-- 
cgit v1.2.3


From 99c64b6679c41d8238b154c1a462724d7101765c Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Fri, 13 Dec 2013 12:38:39 +0100
Subject: s390/perf: Add service level information for CPU-Measurement
 Facilities

Register a service level handler to report information about available
CPU-Measurement facilities.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_event.c | 58 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 4edcdfa4894e..3bd2bf030ad4 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -16,17 +16,19 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
+#include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/sysfs.h>
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
 #include <asm/lowcore.h>
 #include <asm/processor.h>
+#include <asm/sysinfo.h>
 
 const char *perf_pmu_name(void)
 {
 	if (cpum_cf_avail() || cpum_sf_avail())
-		return "CPU-measurement facilities (CPUMF)";
+		return "CPU-Measurement Facilities (CPU-MF)";
 	return "pmu";
 }
 EXPORT_SYMBOL(perf_pmu_name);
@@ -138,6 +140,60 @@ void perf_event_print_debug(void)
 	local_irq_restore(flags);
 }
 
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+	struct cpumf_ctr_info ci;
+
+	memset(&ci, 0, sizeof(ci));
+	if (qctri(&ci))
+		return;
+
+	seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+		   "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+	struct hws_qsi_info_block si;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	if (!si.as && !si.ad)
+		return;
+
+	seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+		   " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+		   si.cpu_speed);
+	if (si.as)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+			   " sample_size=%u\n", si.bsdes);
+	if (si.ad)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+			   " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+				     struct service_level *sl)
+{
+	if (cpum_cf_avail())
+		sl_print_counter(m);
+	if (cpum_sf_avail())
+		sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+	.seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+	return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
 /* See also arch/s390/kernel/traps.c */
 static unsigned long __store_trace(struct perf_callchain_entry *entry,
 				   unsigned long sp,
-- 
cgit v1.2.3


From aa3b7c296732b4351dfdbfe70be6b38a0882be14 Mon Sep 17 00:00:00 2001
From: Sebastian Ott
Date: Thu, 12 Dec 2013 17:48:32 +0100
Subject: s390/pci: prevent inadvertently triggered bus scans

Initialization and scanning of the pci bus is omitted on older
machines without pci support or if pci=off was specified. Remember
the fact that we ran without pci support and prevent further bus
scans during resume from hibernate or after receiving hotplug
notifications.

Reported-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci.h |  1 +
 arch/s390/pci/pci.c         | 10 +++++++++-
 arch/s390/pci/pci_event.c   | 18 ++++++++++++++----
 3 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c129ab2ac731..2583466f576b 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -144,6 +144,7 @@ int clp_disable_fh(struct zpci_dev *);
 void zpci_event_error(void *);
 void zpci_event_availability(void *);
 void zpci_rescan(void);
+bool zpci_is_enabled(void);
 #else /* CONFIG_PCI */
 static inline void zpci_event_error(void *e) {}
 static inline void zpci_event_availability(void *e) {}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index bf7c73d71eef..e3265b50f3ae 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -920,6 +920,7 @@ static void zpci_mem_exit(void)
 }
 
 static unsigned int s390_pci_probe;
+static unsigned int s390_pci_initialized;
 
 char * __init pcibios_setup(char *str)
 {
@@ -930,6 +931,11 @@ char * __init pcibios_setup(char *str)
 	return str;
 }
 
+bool zpci_is_enabled(void)
+{
+	return s390_pci_initialized;
+}
+
 static int __init pci_base_init(void)
 {
 	int rc;
@@ -961,6 +967,7 @@ static int __init pci_base_init(void)
 	if (rc)
 		goto out_find;
 
+	s390_pci_initialized = 1;
 	return 0;
 
 out_find:
@@ -978,5 +985,6 @@ subsys_initcall_sync(pci_base_init);
 
 void zpci_rescan(void)
 {
-	clp_rescan_pci_devices_simple();
+	if (zpci_is_enabled())
+		clp_rescan_pci_devices_simple();
 }
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 800f064b0da7..228787a3630a 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -43,9 +43,8 @@ struct zpci_ccdf_avail {
 	u16 pec;			/* PCI event code */
 } __packed;
 
-void zpci_event_error(void *data)
+static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
-	struct zpci_ccdf_err *ccdf = data;
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 
 	zpci_err("error CCDF:\n");
@@ -58,9 +57,14 @@ void zpci_event_error(void *data)
 	       pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
 }
 
-void zpci_event_availability(void *data)
+void zpci_event_error(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_error(data);
+}
+
+static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 {
-	struct zpci_ccdf_avail *ccdf = data;
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
 	int ret;
@@ -115,3 +119,9 @@ void zpci_event_availability(void *data)
 		break;
 	}
 }
+
+void zpci_event_availability(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_availability(data);
+}
-- 
cgit v1.2.3


From 704268925d32a0457202371a61580af76b94c53a Mon Sep 17 00:00:00 2001
From: Sebastian Ott
Date: Thu, 12 Dec 2013 17:50:53 +0100
Subject: s390/pci: fix removal of nonexistent pci bus

If we remove a pci bus after receiving a hotplug notification we need
to check if the bus is actually present (creation of the pci bus
during an earlier notification may have been failed).

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_event.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/s390')

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 228787a3630a..65ea105f68a1 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -112,6 +112,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		clp_rescan_pci_devices();
 		break;
 	case 0x0308: /* Standby -> Reserved */
+		if (!zdev)
+			break;
 		pci_stop_root_bus(zdev->bus);
 		pci_remove_root_bus(zdev->bus);
 		break;
-- 
cgit v1.2.3


From 0c0c2776926018e7560b99e921467aea1115d03b Mon Sep 17 00:00:00 2001
From: Sebastian Ott
Date: Thu, 12 Dec 2013 17:53:11 +0100
Subject: s390/pci: set error state for unavailable functions

If we receive a notification that a pci function became unavailable we clean
up by removing the pci device. This can confuse the driver since the function
is already unaccessible. Improve this situation by setting an appropriate
error_state.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_event.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 65ea105f68a1..7fc4c2c5708c 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -102,8 +102,12 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 
 		break;
 	case 0x0304: /* Configured -> Standby */
-		if (pdev)
+		if (pdev) {
+			/* Give the driver a hint that the function is
+			 * already unusable. */
+			pdev->error_state = pci_channel_io_perm_failure;
 			pci_stop_and_remove_bus_device(pdev);
+		}
 
 		zpci_disable_device(zdev);
 		zdev->state = ZPCI_FN_STATE_STANDBY;
-- 
cgit v1.2.3


From f7038b7c3f4924b18390c51c1ec1e49287cc87db Mon Sep 17 00:00:00 2001
From: Sebastian Ott
Date: Thu, 12 Dec 2013 17:53:59 +0100
Subject: s390/pci/dma: fix accounting of allocated_pages

allocated_pages sometimes are increased even if s390_dma_alloc fails
also this value is never decreased even if s390_dma_free is called.
This patch fixes these bugs.
Also remove the atomic64_t casts (the members are already of this type).

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_dma.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9b83d080902d..60c11a629d96 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -285,7 +285,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 		flags |= ZPCI_TABLE_PROTECTED;
 
 	if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
-		atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages);
+		atomic64_add(nr_pages, &zdev->fmb->mapped_pages);
 		return dma_addr + (offset & ~PAGE_MASK);
 	}
 
@@ -313,7 +313,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 		zpci_err_hex(&dma_addr, sizeof(dma_addr));
 	}
 
-	atomic64_add(npages, (atomic64_t *) &zdev->fmb->unmapped_pages);
+	atomic64_add(npages, &zdev->fmb->unmapped_pages);
 	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 	dma_free_iommu(zdev, iommu_page_index, npages);
 }
@@ -332,7 +332,6 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 	if (!page)
 		return NULL;
 
-	atomic64_add(size / PAGE_SIZE, (atomic64_t *) &zdev->fmb->allocated_pages);
 	pa = page_to_phys(page);
 	memset((void *) pa, 0, size);
 
@@ -343,6 +342,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 		return NULL;
 	}
 
+	atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
 	if (dma_handle)
 		*dma_handle = map;
 	return (void *) pa;
@@ -352,8 +352,11 @@ static void s390_dma_free(struct device *dev, size_t size,
 			  void *pa, dma_addr_t dma_handle,
 			  struct dma_attrs *attrs)
 {
-	s390_dma_unmap_pages(dev, dma_handle, PAGE_ALIGN(size),
-			     DMA_BIDIRECTIONAL, NULL);
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+
+	size = PAGE_ALIGN(size);
+	atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
 	free_pages((unsigned long) pa, get_order(size));
 }
 
-- 
cgit v1.2.3


From 257608fb4112b4cabefd9e33a4fc2df6b64dca6a Mon Sep 17 00:00:00 2001
From: Sebastian Ott
Date: Thu, 12 Dec 2013 17:55:22 +0100
Subject: s390/pci: reenable per default

HW, FW and Linux support is in a better shape now - let's reenable
pci bus probing per default.

Acked-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e3265b50f3ae..0820362c7b0f 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -919,13 +919,13 @@ static void zpci_mem_exit(void)
 	kmem_cache_destroy(zdev_fmb_cache);
 }
 
-static unsigned int s390_pci_probe;
+static unsigned int s390_pci_probe = 1;
 static unsigned int s390_pci_initialized;
 
 char * __init pcibios_setup(char *str)
 {
-	if (!strcmp(str, "on")) {
-		s390_pci_probe = 1;
+	if (!strcmp(str, "off")) {
+		s390_pci_probe = 0;
 		return NULL;
 	}
 	return str;
-- 
cgit v1.2.3


From 69f239ed335a4b03265cae3ca930f3f166e42e35 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Thu, 12 Dec 2013 17:03:48 +0100
Subject: s390/cpum_sf: Dynamically extend the sampling buffer if overflows
 occur

Improve the sampling buffer allocation and add a function to reallocate and
increase the sampling buffer structure.  The number of allocated buffer elements
(sample-data-blocks) are accounted.  You can control the minimum and maximum
number these sample-data-blocks through the cpum_sfb_size kernel parameter.

The number hardware sample overflows (if any) are also accounted and stored
per perf event.  During the PMU disable/enable calls, the accumulated overflow
counter is analyzed and, if necessary, the sampling buffer is dynamically
increased.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h |   4 +
 arch/s390/kernel/perf_cpum_sf.c    | 527 ++++++++++++++++++++++++++++---------
 2 files changed, 411 insertions(+), 120 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 23d2dfa8201d..99d7f4e333c2 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -49,6 +49,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define PERF_CPUM_SF_MAX_CTR		1
 #define PERF_EVENT_CPUM_SF		0xB0000UL	/* Raw event ID */
 
+#define REG_NONE		0
+#define REG_OVERFLOW		1
+#define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
 #define TEAR_REG(hwc)		((hwc)->last_tag)
 #define SAMPL_RATE(hwc)		((hwc)->event_base)
 
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ae5e0192160d..ea1656073dac 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -17,6 +17,8 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
 #include <asm/cpu_mf.h>
 #include <asm/irq.h>
 #include <asm/debug.h>
@@ -26,34 +28,54 @@
  * At least one table is required for the sampling buffer structure.
  * A single table contains up to 511 pointers to sample-data-blocks.
  */
-#define CPUM_SF_MIN_SDBT    1
+#define CPUM_SF_MIN_SDBT	1
 
-/* Minimum number of sample-data-blocks:
- * The minimum designates a single page for sample-data-block, i.e.,
- * up to 126 sample-data-blocks with a size of 32 bytes (bsdes).
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * The table contains SDB origin (8 bytes) and one SDBT origin that
+ * points to the next table.
  */
-#define CPUM_SF_MIN_SDB	    126
+#define CPUM_SF_SDB_PER_TABLE	((PAGE_SIZE - 8) / 8)
 
-/* Maximum number of sample-data-blocks:
- * The maximum number designates approx. 256K per CPU including
- * the given number of sample-data-blocks and taking the number
- * of sample-data-block tables into account.
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET	(CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+	return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer
+ * taking the number of sample-data-block-tables into account.
  *
- * Later, this number can be increased for extending the sampling
- * buffer, for example, by factor 2 (512K) or 4 (1M).
+ * Sampling buffer size		Buffer characteristics
+ * ---------------------------------------------------
+ *	 64KB		    ==	  16 pages (4KB per page)
+ *				   1 page  for SDB-tables
+ *				  15 pages for SDBs
+ *
+ *  32MB		    ==	8192 pages (4KB per page)
+ *				  16 pages for SDB-tables
+ *				8176 pages for SDBs
  */
-#define CPUM_SF_MAX_SDB	    6471
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
 
 struct sf_buffer {
-	unsigned long	 sdbt;	    /* Sample-data-block-table origin */
+	unsigned long	 *sdbt;	    /* Sample-data-block-table origin */
 	/* buffer characteristics (required for buffer increments) */
-	unsigned long num_sdb;	    /* Number of sample-data-blocks */
-	unsigned long	 tail;	    /* last sample-data-block-table */
+	unsigned long  num_sdb;	    /* Number of sample-data-blocks */
+	unsigned long num_sdbt;	    /* Number of sample-data-block-tables */
+	unsigned long	 *tail;	    /* last sample-data-block-table */
 };
 
 struct cpu_hw_sf {
 	/* CPU-measurement sampling information block */
 	struct hws_qsi_info_block qsi;
+	/* CPU-measurement sampling control block */
 	struct hws_lsctl_request_block lsctl;
 	struct sf_buffer sfb;	    /* Sampling buffer */
 	unsigned int flags;	    /* Status flags */
@@ -64,12 +86,23 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
 /* Debug feature */
 static debug_info_t *sfdbg;
 
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+	struct hws_lsctl_request_block sreq;
+
+	memset(&sreq, 0, sizeof(sreq));
+	return lsctl(&sreq);
+}
+
 /*
  * sf_buffer_available() - Check for an allocated sampling buffer
  */
 static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
 {
-	return (cpuhw->sfb.sdbt) ? 1 : 0;
+	return !!cpuhw->sfb.sdbt;
 }
 
 /*
@@ -77,32 +110,32 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
  */
 static void free_sampling_buffer(struct sf_buffer *sfb)
 {
-	unsigned long sdbt, *curr;
+	unsigned long *sdbt, *curr;
 
 	if (!sfb->sdbt)
 		return;
 
 	sdbt = sfb->sdbt;
-	curr = (unsigned long *) sdbt;
+	curr = sdbt;
 
-	/* we'll free the SDBT after all SDBs are processed... */
+	/* Free the SDBT after all SDBs are processed... */
 	while (1) {
 		if (!*curr || !sdbt)
 			break;
 
-		/* watch for link entry reset if found */
+		/* Process table-link entries */
 		if (is_link_entry(curr)) {
 			curr = get_next_sdbt(curr);
 			if (sdbt)
-				free_page(sdbt);
+				free_page((unsigned long) sdbt);
 
-			/* we are done if we reach the origin */
-			if ((unsigned long) curr == sfb->sdbt)
+			/* If the origin is reached, sampling buffer is freed */
+			if (curr == sfb->sdbt)
 				break;
 			else
-				sdbt = (unsigned long) curr;
+				sdbt = curr;
 		} else {
-			/* process SDB pointer */
+			/* Process SDB pointer */
 			if (*curr) {
 				free_page(*curr);
 				curr++;
@@ -111,10 +144,106 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
 	}
 
 	debug_sprintf_event(sfdbg, 5,
-			    "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt);
+			    "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
 	memset(sfb, 0, sizeof(*sfb));
 }
 
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+	unsigned long sdb, *trailer;
+
+	/* Allocate and initialize sample-data-block */
+	sdb = get_zeroed_page(gfp_flags);
+	if (!sdb)
+		return -ENOMEM;
+	trailer = trailer_entry_ptr(sdb);
+	*trailer = SDB_TE_ALERT_REQ_MASK;
+
+	/* Link SDB into the sample-data-block-table */
+	*sdbt = sdb;
+
+	return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ *	      sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+				   unsigned long num_sdb, gfp_t gfp_flags)
+{
+	int i, rc;
+	unsigned long *new, *tail;
+
+	if (!sfb->sdbt || !sfb->tail)
+		return -EINVAL;
+
+	if (!is_link_entry(sfb->tail))
+		return -EINVAL;
+
+	/* Append to the existing sampling buffer, overwriting the table-link
+	 * register.
+	 * The tail variables always points to the "tail" (last and table-link)
+	 * entry in an SDB-table.
+	 */
+	tail = sfb->tail;
+
+	/* Do a sanity check whether the table-link entry points to
+	 * the sampling buffer origin.
+	 */
+	if (sfb->sdbt != get_next_sdbt(tail)) {
+		debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+				    "sampling buffer is not linked: origin=%p"
+				    "tail=%p\n",
+				    (void *) sfb->sdbt, (void *) tail);
+		return -EINVAL;
+	}
+
+	/* Allocate remaining SDBs */
+	rc = 0;
+	for (i = 0; i < num_sdb; i++) {
+		/* Allocate a new SDB-table if it is full. */
+		if (require_table_link(tail)) {
+			new = (unsigned long *) get_zeroed_page(gfp_flags);
+			if (!new) {
+				rc = -ENOMEM;
+				break;
+			}
+			sfb->num_sdbt++;
+			/* Link current page to tail of chain */
+			*tail = (unsigned long)(void *) new + 1;
+			tail = new;
+		}
+
+		/* Allocate a new sample-data-block.
+		 * If there is not enough memory, stop the realloc process
+		 * and simply use what was allocated.  If this is a temporary
+		 * issue, a new realloc call (if required) might succeed.
+		 */
+		rc = alloc_sample_data_block(tail, gfp_flags);
+		if (rc)
+			break;
+		sfb->num_sdb++;
+		tail++;
+	}
+
+	/* Link sampling buffer to its origin */
+	*tail = (unsigned long) sfb->sdbt + 1;
+	sfb->tail = tail;
+
+	debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
+			    " settings: sdbt=%lu sdb=%lu\n",
+			    sfb->num_sdbt, sfb->num_sdb);
+	return rc;
+}
+
 /*
  * allocate_sampling_buffer() - allocate sampler memory
  *
@@ -128,75 +257,74 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
  */
 static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 {
-	int j, k, rc;
-	unsigned long *sdbt, *tail, *trailer;
-	unsigned long sdb;
-	unsigned long num_sdbt, sdb_per_table;
+	int rc;
 
 	if (sfb->sdbt)
 		return -EINVAL;
+
+	/* Allocate the sample-data-block-table origin */
+	sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+	if (!sfb->sdbt)
+		return -ENOMEM;
 	sfb->num_sdb = 0;
+	sfb->num_sdbt = 1;
 
-	/* Compute the number of required sample-data-block-tables (SDBT) */
-	num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8);
-	if (num_sdbt < CPUM_SF_MIN_SDBT)
-		num_sdbt = CPUM_SF_MIN_SDBT;
-	sdb_per_table = (PAGE_SIZE - 8) / 8;
-
-	debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu "
-			    "num_sdb=%lu sdb_per_table=%lu\n",
-			    num_sdbt, num_sdb, sdb_per_table);
-	sdbt = NULL;
-	tail = sdbt;
-
-	for (j = 0; j < num_sdbt; j++) {
-		sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
-		if (!sdbt) {
-			rc = -ENOMEM;
-			goto allocate_sdbt_error;
-		}
+	/* Link the table origin to point to itself to prepare for
+	 * realloc_sampling_buffer() invocation.
+	 */
+	sfb->tail = sfb->sdbt;
+	*sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
 
-		/* save origin of sample-data-block-table */
-		if (!sfb->sdbt)
-			sfb->sdbt = (unsigned long) sdbt;
+	/* Allocate requested number of sample-data-blocks */
+	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+	if (rc) {
+		free_sampling_buffer(sfb);
+		debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
+			"realloc_sampling_buffer failed with rc=%i\n", rc);
+	} else
+		debug_sprintf_event(sfdbg, 4,
+			"alloc_sampling_buffer: tear=%p dear=%p\n",
+			sfb->sdbt, (void *) *sfb->sdbt);
+	return rc;
+}
 
-		/* link current page to tail of chain */
-		if (tail)
-			*tail = (unsigned long)(void *) sdbt + 1;
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+	CPUM_SF_MIN_SDB = min;
+	CPUM_SF_MAX_SDB = max;
+}
 
-		for (k = 0; k < num_sdb && k < sdb_per_table; k++) {
-			/* get and set SDB page */
-			sdb = get_zeroed_page(GFP_KERNEL);
-			if (!sdb) {
-				rc = -ENOMEM;
-				goto allocate_sdbt_error;
-			}
-			*sdbt = sdb;
-			trailer = trailer_entry_ptr(*sdbt);
-			*trailer = SDB_TE_ALERT_REQ_MASK;
-			sdbt++;
-		}
-		num_sdb -= k;
-		sfb->num_sdb += k;	/* count allocated sdb's */
-		tail = sdbt;
-	}
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+					struct hw_perf_event *hwc)
+{
+	if (!sfb->sdbt)
+		return SFB_ALLOC_REG(hwc);
+	if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+		return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+	return 0;
+}
 
-	rc = 0;
-	if (tail)
-		*tail = sfb->sdbt + 1;
-	sfb->tail = (unsigned long) (void *)tail;
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	return sfb_pending_allocs(sfb, hwc) > 0;
+}
 
-allocate_sdbt_error:
-	if (rc)
-		free_sampling_buffer(sfb);
-	else
-		debug_sprintf_event(sfdbg, 4,
-			"alloc_sampling_buffer: tear=%0lx dear=%0lx\n",
-			sfb->sdbt, *(unsigned long *) sfb->sdbt);
-	return rc;
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	/* Limit the number SDBs to not exceed the maximum */
+	num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc));
+	if (num)
+		SFB_ALLOC_REG(hwc) += num;
 }
 
-static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc)
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	SFB_ALLOC_REG(hwc) = 0;
+	sfb_account_allocs(num, hwc);
+}
+
+static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 {
 	unsigned long n_sdb, freq;
 	unsigned long factor;
@@ -225,39 +353,133 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hw
 	if (n_sdb < CPUM_SF_MIN_SDB)
 		n_sdb = CPUM_SF_MIN_SDB;
 
-	/* Return if there is already a sampling buffer allocated.
-	 * XXX Remove this later and check number of available and
-	 * required sdb's and, if necessary, increase the sampling buffer.
+	/* If there is already a sampling buffer allocated, it is very likely
+	 * that the sampling facility is enabled too.  If the event to be
+	 * initialized requires a greater sampling buffer, the allocation must
+	 * be postponed.  Changing the sampling buffer requires the sampling
+	 * facility to be in the disabled state.  So, account the number of
+	 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+	 * before the event is started.
 	 */
+	sfb_init_allocs(n_sdb, hwc);
 	if (sf_buffer_available(cpuhw))
 		return 0;
 
 	debug_sprintf_event(sfdbg, 3,
-			    "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n",
+			    "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n",
 			    SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
 
 	return alloc_sampling_buffer(&cpuhw->sfb,
-			       min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB));
+				     sfb_pending_allocs(&cpuhw->sfb, hwc));
 }
 
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+				 unsigned long min)
+{
+	return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
 
-/* Number of perf events counting hardware events */
-static atomic_t num_events;
-/* Used to avoid races in calling reserve/release_cpumf_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+	/* Use a percentage-based approach to extend the sampling facility
+	 * buffer.  Accept up to 5% sample data loss.
+	 * Vary the extents between 1% to 5% of the current number of
+	 * sample-data-blocks.
+	 */
+	if (ratio <= 5)
+		return 0;
+	if (ratio <= 25)
+		return min_percent(1, base, 1);
+	if (ratio <= 50)
+		return min_percent(1, base, 1);
+	if (ratio <= 75)
+		return min_percent(2, base, 2);
+	if (ratio <= 100)
+		return min_percent(3, base, 3);
+	if (ratio <= 250)
+		return min_percent(4, base, 4);
+
+	return min_percent(5, base, 8);
+}
 
-/*
- * sf_disable() - Switch off sampling facility
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+				  struct hw_perf_event *hwc)
+{
+	unsigned long ratio, num;
+
+	if (!OVERFLOW_REG(hwc))
+		return;
+
+	/* The sample_overflow contains the average number of sample data
+	 * that has been lost because sample-data-blocks were full.
+	 *
+	 * Calculate the total number of sample data entries that has been
+	 * discarded.  Then calculate the ratio of lost samples to total samples
+	 * per second in percent.
+	 */
+	ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+			     sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+	/* Compute number of sample-data-blocks */
+	num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+	if (num)
+		sfb_account_allocs(num, hwc);
+
+	debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
+			    " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+	OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb:	Sampling buffer structure (for local CPU)
+ * @hwc:	Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ *	      change the sampling buffer structure.  Do not call this function
+ *	      when the PMU is active.
  */
-static int sf_disable(void)
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
 {
-	struct hws_lsctl_request_block sreq;
+	unsigned long num, num_old;
+	int rc;
 
-	memset(&sreq, 0, sizeof(sreq));
-	return lsctl(&sreq);
+	num = sfb_pending_allocs(sfb, hwc);
+	if (!num)
+		return;
+	num_old = sfb->num_sdb;
+
+	/* Disable the sampling facility to reset any states and also
+	 * clear pending measurement alerts.
+	 */
+	sf_disable();
+
+	/* Extend the sampling buffer.
+	 * This memory allocation typically happens in an atomic context when
+	 * called by perf.  Because this is a reallocation, it is fine if the
+	 * new SDB-request cannot be satisfied immediately.
+	 */
+	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+	if (rc)
+		debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
+				    "failed with rc=%i\n", rc);
+
+	if (sfb_has_pending_allocs(sfb, hwc))
+		debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+				    "req=%lu alloc=%lu remaining=%lu\n",
+				    num, sfb->num_sdb - num_old,
+				    sfb_pending_allocs(sfb, hwc));
 }
 
 
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
 #define PMC_INIT      0
 #define PMC_RELEASE   1
 #define PMC_FAILURE   2
@@ -345,19 +567,17 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
 }
 
 static void hw_reset_registers(struct hw_perf_event *hwc,
-			       unsigned long sdbt_origin)
+			       unsigned long *sdbt_origin)
 {
-	TEAR_REG(hwc) = sdbt_origin;	      /* (re)set to first sdb table */
+	/* (Re)set to first sample-data-block-table */
+	TEAR_REG(hwc) = (unsigned long) sdbt_origin;
 }
 
 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
 				   unsigned long rate)
 {
-	if (rate < si->min_sampl_rate)
-		return si->min_sampl_rate;
-	if (rate > si->max_sampl_rate)
-		return si->max_sampl_rate;
-	return rate;
+	return clamp_t(unsigned long, rate,
+		       si->min_sampl_rate, si->max_sampl_rate);
 }
 
 static int __hw_perf_event_init(struct perf_event *event)
@@ -448,6 +668,10 @@ static int __hw_perf_event_init(struct perf_event *event)
 	SAMPL_RATE(hwc) = rate;
 	hw_init_period(hwc, SAMPL_RATE(hwc));
 
+	/* Initialize sample data overflow accounting */
+	hwc->extra_reg.reg = REG_OVERFLOW;
+	OVERFLOW_REG(hwc) = 0;
+
 	/* Allocate the per-CPU sampling buffer using the CPU information
 	 * from the event.  If the event is not pinned to a particular
 	 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
@@ -513,6 +737,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 static void cpumsf_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+	struct hw_perf_event *hwc;
 	int err;
 
 	if (cpuhw->flags & PMU_F_ENABLED)
@@ -521,6 +746,26 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 	if (cpuhw->flags & PMU_F_ERR_MASK)
 		return;
 
+	/* Check whether to extent the sampling buffer.
+	 *
+	 * Two conditions trigger an increase of the sampling buffer for a
+	 * perf event:
+	 *    1. Postponed buffer allocations from the event initialization.
+	 *    2. Sampling overflows that contribute to pending allocations.
+	 *
+	 * Note that the extend_sampling_buffer() function disables the sampling
+	 * facility, but it can be fully re-enabled using sampling controls that
+	 * have been saved in cpumsf_pmu_disable().
+	 */
+	if (cpuhw->event) {
+		hwc = &cpuhw->event->hw;
+		/* Account number of overflow-designated buffer extents */
+		sfb_account_overflows(cpuhw, hwc);
+		if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+			extend_sampling_buffer(&cpuhw->sfb, hwc);
+	}
+
+	/* (Re)enable the PMU and sampling facility */
 	cpuhw->flags |= PMU_F_ENABLED;
 	barrier();
 
@@ -632,8 +877,6 @@ static int perf_push_sample(struct perf_event *event,
 	if (perf_event_overflow(event, &data, &regs)) {
 		overflow = 1;
 		event->pmu->stop(event, 0);
-		debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped"
-				    " because of an event overflow\n");
 	}
 	perf_event_update_userpage(event);
 
@@ -710,11 +953,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	struct hw_perf_event *hwc = &event->hw;
 	struct hws_trailer_entry *te;
 	unsigned long *sdbt;
-	unsigned long long event_overflow, sampl_overflow;
+	unsigned long long event_overflow, sampl_overflow, num_sdb;
 	int done;
 
 	sdbt = (unsigned long *) TEAR_REG(hwc);
-	done = event_overflow = sampl_overflow = 0;
+	done = event_overflow = sampl_overflow = num_sdb = 0;
 	while (!done) {
 		/* Get the trailer entry of the sample-data-block */
 		te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
@@ -726,17 +969,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 				break;
 		}
 
-		/* Check sample overflow count */
-		if (te->overflow) {
-			/* Increment sample overflow counter */
-			sampl_overflow += te->overflow;
-
-			/* XXX: If an sample overflow occurs, increase the
-			 *	sampling buffer.  Set a "realloc" flag because
-			 *	the sampler must be re-enabled for changing
-			 *	the sample-data-block-table content.
+		/* Check the sample overflow count */
+		if (te->overflow)
+			/* Account sample overflows and, if a particular limit
+			 * is reached, extend the sampling buffer.
+			 * For details, see sfb_account_overflows().
 			 */
-		}
+			sampl_overflow += te->overflow;
 
 		/* Timestamps are valid for full sample-data-blocks only */
 		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
@@ -749,6 +988,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 		 * is stopped and remaining samples will be discarded.
 		 */
 		hw_collect_samples(event, sdbt, &event_overflow);
+		num_sdb++;
 
 		/* Reset trailer */
 		xchg(&te->overflow, 0);
@@ -775,6 +1015,10 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 			flush_all = 1;
 	}
 
+	/* Account sample overflows in the event hardware structure */
+	if (sampl_overflow)
+		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+						 sampl_overflow, 1 + num_sdb);
 	if (sampl_overflow || event_overflow)
 		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
 				    "overflow stats: sample=%llu event=%llu\n",
@@ -849,7 +1093,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 	 */
 	cpuhw->lsctl.s = 0;
 	cpuhw->lsctl.h = 1;
-	cpuhw->lsctl.tear = cpuhw->sfb.sdbt;
+	cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
 	cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
 	cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
 	hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
@@ -1018,6 +1262,48 @@ static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+	int rc;
+	unsigned long min, max;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	if (!val || !strlen(val))
+		return -EINVAL;
+
+	/* Valid parameter values: "min,max" or "max" */
+	min = CPUM_SF_MIN_SDB;
+	max = CPUM_SF_MAX_SDB;
+	if (strchr(val, ','))
+		rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+	else
+		rc = kstrtoul(val, 10, &max);
+
+	if (min < 2 || min >= max || max > get_num_physpages())
+		rc = -EINVAL;
+	if (rc)
+		return rc;
+
+	sfb_set_limits(min, max);
+	pr_info("Changed sampling buffer settings: min=%lu max=%lu\n",
+		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+	return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_sfb_size = {
+	.set = param_set_sfb_size,
+	.get = param_get_sfb_size,
+};
+
 static int __init init_cpum_sampling_pmu(void)
 {
 	int err;
@@ -1047,3 +1333,4 @@ out:
 	return err;
 }
 arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
-- 
cgit v1.2.3


From fcc77f507333776eaa336ab4ff49c23422f53703 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Thu, 12 Dec 2013 17:26:51 +0100
Subject: s390/cpum_sf: Atomically reset trailer entry fields of
 sample-data-blocks

Ensure to reset the sample-data-block full indicator and the overflow counter
at the same time.  This must be done atomically because the sampling hardware
is still active while full sample-data-block is processed.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h  | 13 +++++++++----
 arch/s390/kernel/perf_cpum_sf.c | 12 ++++++++----
 2 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index d707abc26157..b0b3059b8d64 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -115,10 +115,15 @@ struct hws_data_entry {
 } __packed;
 
 struct hws_trailer_entry {
-	unsigned int f:1;	    /* 0 - Block Full Indicator 	 */
-	unsigned int a:1;	    /* 1 - Alert request control	 */
-	unsigned int t:1;	    /* 2 - Timestamp format		 */
-	unsigned long long:61;	    /* 3 - 63: Reserved 		 */
+	union {
+		struct {
+			unsigned int f:1;	/* 0 - Block Full Indicator   */
+			unsigned int a:1;	/* 1 - Alert request control  */
+			unsigned int t:1;	/* 2 - Timestamp format	      */
+			unsigned long long:61;	/* 3 - 63: Reserved	      */
+		};
+		unsigned long long flags;	/* 0 - 63: All indicators     */
+	};
 	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
 	unsigned long long timestamp;	 /* 16 - time-stamp		      */
 	unsigned long long timestamp1;	 /*				      */
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ea1656073dac..9202f2858894 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -953,7 +953,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	struct hw_perf_event *hwc = &event->hw;
 	struct hws_trailer_entry *te;
 	unsigned long *sdbt;
-	unsigned long long event_overflow, sampl_overflow, num_sdb;
+	unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
 	int done;
 
 	sdbt = (unsigned long *) TEAR_REG(hwc);
@@ -990,9 +990,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 		hw_collect_samples(event, sdbt, &event_overflow);
 		num_sdb++;
 
-		/* Reset trailer */
-		xchg(&te->overflow, 0);
-		xchg((unsigned char *) te, 0x40);
+		/* Reset trailer (using compare-double-and-swap) */
+		do {
+			te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+			te_flags |= SDB_TE_ALERT_REQ_MASK;
+		} while (!cmpxchg_double(&te->flags, &te->overflow,
+					 te->flags, te->overflow,
+					 te_flags, 0ULL));
 
 		/* Advance to next sample-data-block */
 		sdbt++;
-- 
cgit v1.2.3


From 443d4beb823d4dccaaf964b59df9dd38b4d6aae7 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Thu, 12 Dec 2013 17:38:50 +0100
Subject: s390/cpum_sf: Add helper to read TOD from trailer entries

The trailer entry contains a timestamp of the time when the sample-data-block
became full.  The timestamp specifies a TOD (time-of-day) value in either the
STCK or STCKE format.

Provide a helper function to return the TOD value depending on the setting of
time format indicator.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h  | 14 ++++++++++++--
 arch/s390/kernel/perf_cpum_sf.c |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index b0b3059b8d64..09dc5facc0bc 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -125,8 +125,7 @@ struct hws_trailer_entry {
 		unsigned long long flags;	/* 0 - 63: All indicators     */
 	};
 	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
-	unsigned long long timestamp;	 /* 16 - time-stamp		      */
-	unsigned long long timestamp1;	 /*				      */
+	unsigned char timestamp[16];	 /* 16 - 31 timestamp		      */
 	unsigned long long reserved1;	 /* 32 -Reserved		      */
 	unsigned long long reserved2;	 /*				      */
 	unsigned long long progusage1;	 /* 48 - reserved for programming use */
@@ -232,6 +231,17 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
 #define SDB_TE_ALERT_REQ_MASK	0x4000000000000000UL
 #define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
 
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+	/* TOD in STCKE format */
+	if (te->t)
+		return *((unsigned long long *) &te->timestamp[1]);
+
+	/* TOD in STCK format */
+	return *((unsigned long long *) &te->timestamp[0]);
+}
+
 /* Return pointer to trailer entry of an sample data block */
 static inline unsigned long *trailer_entry_ptr(unsigned long v)
 {
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 9202f2858894..3ab7e67ee2e4 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -981,7 +981,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
 				    "overflow=%llu timestamp=0x%llx\n",
 				    sdbt, te->overflow,
-				    (te->f) ? te->timestamp : 0ULL);
+				    (te->f) ? trailer_timestamp(te) : 0ULL);
 
 		/* Collect all samples from a single sample-data-block and
 		 * flag if an (perf) event overflow happened.  If so, the PMU
-- 
cgit v1.2.3


From 443e802bab16916f9a51a34f2213f4dee6e8762c Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Thu, 12 Dec 2013 17:54:57 +0100
Subject: s390/cpum_sf: Detect KVM guest samples

The host-program-parameter (hpp) value of basic sample-data-entries designates
a SIE control block that is set by the LPP instruction in sie64a().
Non-zero values indicate guest samples, a value of zero indicates a host sample.

For perf samples, host and guest samples are distinguished using particular
PERF_MISC_* flags.  The perf layer calls perf_misc_flags() to set the flags
based on the pt_regs content.  For each sample-data-entry, the cpum_sf PMU
creates a pt_regs structure with the sample-data information.  An additional
flag structure is added to easily distinguish between host and guest samples.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h |  6 ++++++
 arch/s390/kernel/perf_cpum_sf.c    | 20 ++++++++++++++++++++
 arch/s390/kernel/perf_event.c      | 25 ++++++++++++++++++++++++-
 3 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 99d7f4e333c2..7667bde37dcb 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -42,6 +42,12 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs) perf_misc_flags(regs)
 
+/* Perf pt_regs extension for sample-data-entry indicators */
+struct perf_sf_sde_regs {
+	unsigned char in_guest:1;	  /* guest sample */
+	unsigned long reserved:63;	  /* reserved */
+};
+
 /* Perf PMU definitions for the counter facility */
 #define PERF_CPUM_CF_MAX_CTR		256
 
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3ab7e67ee2e4..d611facae599 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -840,6 +840,7 @@ static int perf_push_sample(struct perf_event *event,
 {
 	int overflow;
 	struct pt_regs regs;
+	struct perf_sf_sde_regs *sde_regs;
 	struct perf_sample_data data;
 
 	/* Skip samples that are invalid or for which the instruction address
@@ -850,7 +851,16 @@ static int perf_push_sample(struct perf_event *event,
 
 	perf_sample_data_init(&data, 0, event->hw.last_period);
 
+	/* Setup pt_regs to look like an CPU-measurement external interrupt
+	 * using the Program Request Alert code.  The regs.int_parm_long
+	 * field which is unused contains additional sample-data-entry related
+	 * indicators.
+	 */
 	memset(&regs, 0, sizeof(regs));
+	regs.int_code = 0x1407;
+	regs.int_parm = CPU_MF_INT_SF_PRA;
+	sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
 	regs.psw.addr = sample->ia;
 	if (sample->T)
 		regs.psw.mask |= PSW_MASK_DAT;
@@ -873,6 +883,16 @@ static int perf_push_sample(struct perf_event *event,
 		break;
 	}
 
+	/* The host-program-parameter (hpp) contains the sie control
+	 * block that is set by sie64a() in entry64.S.	Check if hpp
+	 * refers to a valid control block and set sde_regs flags
+	 * accordingly.  This would allow to use hpp values for other
+	 * purposes too.
+	 * For now, simply use a non-zero value as guest indicator.
+	 */
+	if (sample->hpp)
+		sde_regs->in_guest = 1;
+
 	overflow = 0;
 	if (perf_event_overflow(event, &data, &regs)) {
 		overflow = 1;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 3bd2bf030ad4..60a68261d091 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -1,7 +1,7 @@
 /*
  * Performance event support for s390x
  *
- *  Copyright IBM Corp. 2012
+ *  Copyright IBM Corp. 2012, 2013
  *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -89,8 +89,31 @@ static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
 					: PERF_RECORD_MISC_GUEST_KERNEL;
 }
 
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+	struct perf_sf_sde_regs *sde_regs;
+	unsigned long flags;
+
+	sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+	if (sde_regs->in_guest)
+		flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+	else
+		flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+					: PERF_RECORD_MISC_KERNEL;
+	return flags;
+}
+
 unsigned long perf_misc_flags(struct pt_regs *regs)
 {
+	/* Check if the cpum_sf PMU has created the pt_regs structure.
+	 * In this case, perf misc flags can be easily extracted.  Otherwise,
+	 * do regular checks on the pt_regs content.
+	 */
+	if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+		if (!regs->gprs[15])
+			return perf_misc_flags_sf(regs);
+
 	if (is_in_guest(regs))
 		return perf_misc_guest_flags(regs);
 
-- 
cgit v1.2.3


From dd127b3b977b81eab58d1d7ee037195cf0bbeba7 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Thu, 12 Dec 2013 18:05:20 +0100
Subject: s390/cpum_sf: Filter perf events based event->attr.exclude_* settings

Introduce the perf_exclude_event() function to filter perf samples
according to event->attr.exclude_* settings.   During event initialization,
reset event exclude settings that are not supported.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index d611facae599..28fa2f235158 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -723,10 +723,19 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 		return -ENOENT;
 	}
 
+	/* Check online status of the CPU to which the event is pinned */
 	if (event->cpu >= nr_cpumask_bits ||
 	    (event->cpu >= 0 && !cpu_online(event->cpu)))
 		return -ENODEV;
 
+	/* Force reset of idle/hv excludes regardless of what the
+	 * user requested.
+	 */
+	if (event->attr.exclude_hv)
+		event->attr.exclude_hv = 0;
+	if (event->attr.exclude_idle)
+		event->attr.exclude_idle = 0;
+
 	err = __hw_perf_event_init(event);
 	if (unlikely(err))
 		if (event->destroy)
@@ -824,6 +833,29 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 	cpuhw->flags &= ~PMU_F_ENABLED;
 }
 
+/* perf_exclude_event() - Filter event
+ * @event:	The perf event
+ * @regs:	pt_regs structure
+ * @sde_regs:	Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+			      struct perf_sf_sde_regs *sde_regs)
+{
+	if (event->attr.exclude_user && user_mode(regs))
+		return 1;
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return 1;
+	if (event->attr.exclude_guest && sde_regs->in_guest)
+		return 1;
+	if (event->attr.exclude_host && !sde_regs->in_guest)
+		return 1;
+	return 0;
+}
+
 /* perf_push_sample() - Push samples to perf
  * @event:	The perf event
  * @sample:	Hardware sample data
@@ -894,12 +926,14 @@ static int perf_push_sample(struct perf_event *event,
 		sde_regs->in_guest = 1;
 
 	overflow = 0;
+	if (perf_exclude_event(event, &regs, sde_regs))
+		goto out;
 	if (perf_event_overflow(event, &data, &regs)) {
 		overflow = 1;
 		event->pmu->stop(event, 0);
 	}
 	perf_event_update_userpage(event);
-
+out:
 	return overflow;
 }
 
-- 
cgit v1.2.3


From 7e75fc3ff4cffd90684816d69838f8730ac3e072 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Fri, 13 Dec 2013 11:42:44 +0100
Subject: s390/cpum_sf: Add raw data sampling to support the
 diagnostic-sampling function

Also support the diagnostic-sampling function in addition to the basic-sampling
function.  Diagnostic-sampling data entries contain hardware model specific
sampling data and additional programs are required to analyze the data.

To deliver diagnostic-sampling, as well, as basis-sampling data entries to user
space, introduce support for sampling "raw data".  If this particular perf
sampling type (PERF_SAMPLE_RAW) is used, sampling data entries are copied
to user space.  External programs can then analyze these data.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h     |  40 ++--
 arch/s390/include/asm/perf_event.h |  28 ++-
 arch/s390/kernel/perf_cpum_sf.c    | 380 ++++++++++++++++++++++++++++++-------
 arch/s390/kernel/perf_event.c      |  21 +-
 arch/s390/oprofile/hwsampler.c     |   4 +-
 5 files changed, 377 insertions(+), 96 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 09dc5facc0bc..cb700d54bd83 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -59,13 +59,15 @@ struct cpumf_ctr_info {
 /* QUERY SAMPLING INFORMATION block */
 struct hws_qsi_info_block {	    /* Bit(s) */
 	unsigned int b0_13:14;	    /* 0-13: zeros			 */
-	unsigned int as:1;	    /* 14: sampling authorisation control*/
-	unsigned int b15_21:7;	    /* 15-21: zeros			 */
-	unsigned int es:1;	    /* 22: sampling enable control	 */
-	unsigned int b23_29:7;	    /* 23-29: zeros			 */
-	unsigned int cs:1;	    /* 30: sampling activation control	 */
-	unsigned int:1; 	    /* 31: reserved			 */
-	unsigned int bsdes:16;	    /* 4-5: size of basic sampling entry      */
+	unsigned int as:1;	    /* 14: basic-sampling authorization	 */
+	unsigned int ad:1;	    /* 15: diag-sampling authorization	 */
+	unsigned int b16_21:6;	    /* 16-21: zeros			 */
+	unsigned int es:1;	    /* 22: basic-sampling enable control */
+	unsigned int ed:1;	    /* 23: diag-sampling enable control	 */
+	unsigned int b24_29:6;	    /* 24-29: zeros			 */
+	unsigned int cs:1;	    /* 30: basic-sampling activation control */
+	unsigned int cd:1;	    /* 31: diag-sampling activation control */
+	unsigned int bsdes:16;	    /* 4-5: size of basic sampling entry */
 	unsigned int dsdes:16;	    /* 6-7: size of diagnostic sampling entry */
 	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
 	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
@@ -82,10 +84,11 @@ struct hws_lsctl_request_block {
 	unsigned int s:1;	    /* 0: maximum buffer indicator	 */
 	unsigned int h:1;	    /* 1: part. level reserved for VM use*/
 	unsigned long long b2_53:52;/* 2-53: zeros			 */
-	unsigned int es:1;	    /* 54: sampling enable control	 */
-	unsigned int b55_61:7;	    /* 55-61: - zeros			 */
-	unsigned int cs:1;	    /* 62: sampling activation control	 */
-	unsigned int b63:1;	    /* 63: zero 			 */
+	unsigned int es:1;	    /* 54: basic-sampling enable control */
+	unsigned int ed:1;	    /* 55: diag-sampling enable control	 */
+	unsigned int b56_61:6;	    /* 56-61: - zeros			 */
+	unsigned int cs:1;	    /* 62: basic-sampling activation control */
+	unsigned int cd:1;	    /* 63: diag-sampling activation control  */
 	unsigned long interval;     /* 8-15: sampling interval		 */
 	unsigned long tear;	    /* 16-23: TEAR contents		 */
 	unsigned long dear;	    /* 24-31: DEAR contents		 */
@@ -96,8 +99,7 @@ struct hws_lsctl_request_block {
 	unsigned long rsvrd4;	    /* reserved 			 */
 } __packed;
 
-
-struct hws_data_entry {
+struct hws_basic_entry {
 	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
 	unsigned int R:4;	    /* 16-19 reserved			 */
 	unsigned int U:4;	    /* 20-23 Number of unique instruct.  */
@@ -114,6 +116,18 @@ struct hws_data_entry {
 	unsigned long long hpp;     /* Host Program Parameter		 */
 } __packed;
 
+struct hws_diag_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:14;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	u8	     data[];	    /* Machine-dependent sample data	 */
+} __packed;
+
+struct hws_combined_entry {
+	struct hws_basic_entry	basic;	/* Basic-sampling data entry */
+	struct hws_diag_entry	diag;	/* Diagnostic-sampling data entry */
+} __packed;
+
 struct hws_trailer_entry {
 	union {
 		struct {
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 7667bde37dcb..bd4573f1d65c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -52,15 +52,39 @@ struct perf_sf_sde_regs {
 #define PERF_CPUM_CF_MAX_CTR		256
 
 /* Perf PMU definitions for the sampling facility */
-#define PERF_CPUM_SF_MAX_CTR		1
-#define PERF_EVENT_CPUM_SF		0xB0000UL	/* Raw event ID */
+#define PERF_CPUM_SF_MAX_CTR		2
+#define PERF_EVENT_CPUM_SF		0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG		0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE		0x0001	  /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_MODE_MASK		(PERF_CPUM_SF_BASIC_MODE| \
+					 PERF_CPUM_SF_DIAG_MODE)
 
 #define REG_NONE		0
 #define REG_OVERFLOW		1
 #define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
 #define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
+#define RAWSAMPLE_REG(hwc)	((hwc)->config)
 #define TEAR_REG(hwc)		((hwc)->last_tag)
 #define SAMPL_RATE(hwc)		((hwc)->event_base)
+#define SAMPL_FLAGS(hwc)	((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+
+/* Structure for sampling data entries to be passed as perf raw sample data
+ * to user space.  Note that raw sample data must be aligned and, thus, might
+ * be padded with zeros.
+ */
+struct sf_raw_sample {
+#define SF_RAW_SAMPLE_BASIC	PERF_CPUM_SF_BASIC_MODE
+#define SF_RAW_SAMPLE_DIAG	PERF_CPUM_SF_DIAG_MODE
+	u64			format;
+	u32			 size;	  /* Size of sf_raw_sample */
+	u16			bsdes;	  /* Basic-sampling data entry size */
+	u16			dsdes;	  /* Diagnostic-sampling data entry size */
+	struct hws_basic_entry	basic;	  /* Basic-sampling data entry */
+	struct hws_diag_entry	 diag;	  /* Diagnostic-sampling data entry */
+	u8		    padding[];	  /* Padding to next multiple of 8 */
+} __packed;
 
 /* Perf hardware reserve and release functions */
 int perf_reserve_sampling(void);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 28fa2f235158..b4ec058c4f10 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -17,6 +17,7 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/moduleparam.h>
 #include <asm/cpu_mf.h>
@@ -31,8 +32,8 @@
 #define CPUM_SF_MIN_SDBT	1
 
 /* Number of sample-data-blocks per sample-data-block-table (SDBT):
- * The table contains SDB origin (8 bytes) and one SDBT origin that
- * points to the next table.
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
  */
 #define CPUM_SF_SDB_PER_TABLE	((PAGE_SIZE - 8) / 8)
 
@@ -48,8 +49,11 @@ static inline int require_table_link(const void *sdbt)
 
 /* Minimum and maximum sampling buffer sizes:
  *
- * This number represents the maximum size of the sampling buffer
- * taking the number of sample-data-block-tables into account.
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account.  Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
  *
  * Sampling buffer size		Buffer characteristics
  * ---------------------------------------------------
@@ -63,6 +67,7 @@ static inline int require_table_link(const void *sdbt)
  */
 static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
 static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
 
 struct sf_buffer {
 	unsigned long	 *sdbt;	    /* Sample-data-block-table origin */
@@ -290,8 +295,20 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 
 static void sfb_set_limits(unsigned long min, unsigned long max)
 {
+	struct hws_qsi_info_block si;
+
 	CPUM_SF_MIN_SDB = min;
 	CPUM_SF_MAX_SDB = max;
+
+	memset(&si, 0, sizeof(si));
+	if (!qsi(&si))
+		CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+	return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+				    : CPUM_SF_MAX_SDB;
 }
 
 static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
@@ -312,8 +329,8 @@ static int sfb_has_pending_allocs(struct sf_buffer *sfb,
 
 static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
 {
-	/* Limit the number SDBs to not exceed the maximum */
-	num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc));
+	/* Limit the number of SDBs to not exceed the maximum */
+	num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
 	if (num)
 		SFB_ALLOC_REG(hwc) += num;
 }
@@ -324,32 +341,89 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
 	sfb_account_allocs(num, hwc);
 }
 
-static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+static size_t event_sample_size(struct hw_perf_event *hwc)
+{
+	struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+	size_t sample_size;
+
+	/* The sample size depends on the sampling function: The basic-sampling
+	 * function must be always enabled, diagnostic-sampling function is
+	 * optional.
+	 */
+	sample_size = sfr->bsdes;
+	if (SAMPL_DIAG_MODE(hwc))
+		sample_size += sfr->dsdes;
+
+	return sample_size;
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+	if (cpuhw->sfb.sdbt)
+		free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 {
-	unsigned long n_sdb, freq;
-	unsigned long factor;
+	unsigned long n_sdb, freq, factor;
+	size_t sfr_size, sample_size;
+	struct sf_raw_sample *sfr;
+
+	/* Allocate raw sample buffer
+	 *
+	 *    The raw sample buffer is used to temporarily store sampling data
+	 *    entries for perf raw sample processing.  The buffer size mainly
+	 *    depends on the size of diagnostic-sampling data entries which is
+	 *    machine-specific.  The exact size calculation includes:
+	 *	1. The first 4 bytes of diagnostic-sampling data entries are
+	 *	   already reflected in the sf_raw_sample structure.  Subtract
+	 *	   these bytes.
+	 *	2. The perf raw sample data must be 8-byte aligned (u64) and
+	 *	   perf's internal data size must be considered too.  So add
+	 *	   an additional u32 for correct alignment and subtract before
+	 *	   allocating the buffer.
+	 *	3. Store the raw sample buffer pointer in the perf event
+	 *	   hardware structure.
+	 */
+	sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
+			 sizeof(u32), sizeof(u64));
+	sfr_size -= sizeof(u32);
+	sfr = kzalloc(sfr_size, GFP_KERNEL);
+	if (!sfr)
+		return -ENOMEM;
+	sfr->size = sfr_size;
+	sfr->bsdes = cpuhw->qsi.bsdes;
+	sfr->dsdes = cpuhw->qsi.dsdes;
+	RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
 
 	/* Calculate sampling buffers using 4K pages
 	 *
-	 *    1. Use frequency as input.  The samping buffer is designed for
-	 *	 a complete second.  This can be adjusted through the "factor"
-	 *	 variable.
+	 *    1. Determine the sample data size which depends on the used
+	 *	 sampling functions, for example, basic-sampling or
+	 *	 basic-sampling with diagnostic-sampling.
+	 *
+	 *    2. Use the sampling frequency as input.  The sampling buffer is
+	 *	 designed for almost one second.  This can be adjusted through
+	 *	 the "factor" variable.
 	 *	 In any case, alloc_sampling_buffer() sets the Alert Request
-	 *	 Control indicator to trigger measurement-alert to harvest
+	 *	 Control indicator to trigger a measurement-alert to harvest
 	 *	 sample-data-blocks (sdb).
 	 *
-	 *    2. Compute the number of sample-data-blocks and ensure a minimum
+	 *    3. Compute the number of sample-data-blocks and ensure a minimum
 	 *	 of CPUM_SF_MIN_SDB.  Also ensure the upper limit does not
-	 *	 exceed CPUM_SF_MAX_SDB.  See also the remarks for these
-	 *	 symbolic constants.
+	 *	 exceed a "calculated" maximum.  The symbolic maximum is
+	 *	 designed for basic-sampling only and needs to be increased if
+	 *	 diagnostic-sampling is active.
+	 *	 See also the remarks for these symbolic constants.
 	 *
-	 *    3. Compute number of pages used for the sample-data-block-table
-	 *	 and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table
-	 *	 to manage up to 511 sample-data-blocks).
+	 *    4. Compute the number of sample-data-block-tables (SDBT) and
+	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+	 *	 to 511 SDBs).
 	 */
+	sample_size = event_sample_size(hwc);
 	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
 	factor = 1;
-	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes));
+	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
 	if (n_sdb < CPUM_SF_MIN_SDB)
 		n_sdb = CPUM_SF_MIN_SDB;
 
@@ -366,8 +440,10 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 		return 0;
 
 	debug_sprintf_event(sfdbg, 3,
-			    "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n",
-			    SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
+			    "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
+			    " sample_size=%lu cpuhw=%p\n",
+			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+			    sample_size, cpuhw);
 
 	return alloc_sampling_buffer(&cpuhw->sfb,
 				     sfb_pending_allocs(&cpuhw->sfb, hwc));
@@ -509,10 +585,8 @@ static void setup_pmc_cpu(void *flags)
 		if (err) {
 			pr_err("Switching off the sampling facility failed "
 			       "with rc=%i\n", err);
-		} else {
-			if (cpusf->sfb.sdbt)
-				free_sampling_buffer(&cpusf->sfb);
-		}
+		} else
+			deallocate_buffers(cpusf);
 		debug_sprintf_event(sfdbg, 5,
 				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
 		break;
@@ -550,6 +624,10 @@ static int reserve_pmc_hardware(void)
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
+	/* Free raw sample buffer */
+	if (RAWSAMPLE_REG(&event->hw))
+		kfree((void *) RAWSAMPLE_REG(&event->hw));
+
 	/* Release PMC if this is the last perf event */
 	if (!atomic_add_unless(&num_events, -1, 1)) {
 		mutex_lock(&pmc_reserve_mutex);
@@ -569,8 +647,15 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
 static void hw_reset_registers(struct hw_perf_event *hwc,
 			       unsigned long *sdbt_origin)
 {
+	struct sf_raw_sample *sfr;
+
 	/* (Re)set to first sample-data-block-table */
 	TEAR_REG(hwc) = (unsigned long) sdbt_origin;
+
+	/* (Re)set raw sampling buffer register */
+	sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+	memset(&sfr->basic, 0, sizeof(sfr->basic));
+	memset(&sfr->diag, 0, sfr->dsdes);
 }
 
 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
@@ -634,6 +719,20 @@ static int __hw_perf_event_init(struct perf_event *event)
 		goto out;
 	}
 
+	/* Always enable basic sampling */
+	SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+	/* Check if diagnostic sampling is requested.  Deny if the required
+	 * sampling authorization is missing.
+	 */
+	if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+		if (!si.ad) {
+			err = -EPERM;
+			goto out;
+		}
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+	}
+
 	/* The sampling information (si) contains information about the
 	 * min/max sampling intervals and the CPU speed.  So calculate the
 	 * correct sampling interval and avoid the whole period adjust
@@ -679,14 +778,14 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 */
 	if (cpuhw)
 		/* Event is pinned to a particular CPU */
-		err = allocate_sdbt(cpuhw, hwc);
+		err = allocate_buffers(cpuhw, hwc);
 	else {
 		/* Event is not pinned, allocate sampling buffer on
 		 * each online CPU
 		 */
 		for_each_online_cpu(cpu) {
 			cpuhw = &per_cpu(cpu_hw_sf, cpu);
-			err = allocate_sdbt(cpuhw, hwc);
+			err = allocate_buffers(cpuhw, hwc);
 			if (err)
 				break;
 		}
@@ -705,7 +804,8 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 
 	switch (event->attr.type) {
 	case PERF_TYPE_RAW:
-		if (event->attr.config != PERF_EVENT_CPUM_SF)
+		if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+		    (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
 			return -ENOENT;
 		break;
 	case PERF_TYPE_HARDWARE:
@@ -786,8 +886,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 		return;
 	}
 
-	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n",
-			    cpuhw->lsctl.es, cpuhw->lsctl.cs,
+	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
+			    "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
+			    cpuhw->lsctl.ed, cpuhw->lsctl.cd,
 			    (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
 }
 
@@ -807,6 +908,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 	/* Switch off sampling activation control */
 	inactive = cpuhw->lsctl;
 	inactive.cs = 0;
+	inactive.cd = 0;
 
 	err = lsctl(&inactive);
 	if (err) {
@@ -867,21 +969,19 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
  *
  * Return non-zero if an event overflow occurred.
  */
-static int perf_push_sample(struct perf_event *event,
-			    struct hws_data_entry *sample)
+static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 {
 	int overflow;
 	struct pt_regs regs;
 	struct perf_sf_sde_regs *sde_regs;
 	struct perf_sample_data data;
+	struct perf_raw_record raw;
 
-	/* Skip samples that are invalid or for which the instruction address
-	 * is not predictable.	For the latter, the wait-state bit is set.
-	 */
-	if (sample->I || sample->W)
-		return 0;
-
+	/* Setup perf sample */
 	perf_sample_data_init(&data, 0, event->hw.last_period);
+	raw.size = sfr->size;
+	raw.data = sfr;
+	data.raw = &raw;
 
 	/* Setup pt_regs to look like an CPU-measurement external interrupt
 	 * using the Program Request Alert code.  The regs.int_parm_long
@@ -893,14 +993,14 @@ static int perf_push_sample(struct perf_event *event,
 	regs.int_parm = CPU_MF_INT_SF_PRA;
 	sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
 
-	regs.psw.addr = sample->ia;
-	if (sample->T)
+	regs.psw.addr = sfr->basic.ia;
+	if (sfr->basic.T)
 		regs.psw.mask |= PSW_MASK_DAT;
-	if (sample->W)
+	if (sfr->basic.W)
 		regs.psw.mask |= PSW_MASK_WAIT;
-	if (sample->P)
+	if (sfr->basic.P)
 		regs.psw.mask |= PSW_MASK_PSTATE;
-	switch (sample->AS) {
+	switch (sfr->basic.AS) {
 	case 0x0:
 		regs.psw.mask |= PSW_ASC_PRIMARY;
 		break;
@@ -922,7 +1022,7 @@ static int perf_push_sample(struct perf_event *event,
 	 * purposes too.
 	 * For now, simply use a non-zero value as guest indicator.
 	 */
-	if (sample->hpp)
+	if (sfr->basic.hpp)
 		sde_regs->in_guest = 1;
 
 	overflow = 0;
@@ -942,51 +1042,155 @@ static void perf_event_count_update(struct perf_event *event, u64 count)
 	local64_add(count, &event->count);
 }
 
+static int sample_format_is_valid(struct hws_combined_entry *sample,
+				   unsigned int flags)
+{
+	if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+		/* Only basic-sampling data entries with data-entry-format
+		 * version of 0x0001 can be processed.
+		 */
+		if (sample->basic.def != 0x0001)
+			return 0;
+	if (flags & PERF_CPUM_SF_DIAG_MODE)
+		/* The data-entry-format number of diagnostic-sampling data
+		 * entries can vary.  Because diagnostic data is just passed
+		 * through, do only a sanity check on the DEF.
+		 */
+		if (sample->diag.def < 0x8001)
+			return 0;
+	return 1;
+}
+
+static int sample_is_consistent(struct hws_combined_entry *sample,
+				unsigned long flags)
+{
+	/* This check applies only to basic-sampling data entries of potentially
+	 * combined-sampling data entries.  Invalid entries cannot be processed
+	 * by the PMU and, thus, do not deliver an associated
+	 * diagnostic-sampling data entry.
+	 */
+	if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
+		return 0;
+	/*
+	 * Samples are skipped, if they are invalid or for which the
+	 * instruction address is not predictable, i.e., the wait-state bit is
+	 * set.
+	 */
+	if (sample->basic.I || sample->basic.W)
+		return 0;
+	return 1;
+}
+
+static void reset_sample_slot(struct hws_combined_entry *sample,
+			      unsigned long flags)
+{
+	if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+		sample->basic.def = 0;
+	if (flags & PERF_CPUM_SF_DIAG_MODE)
+		sample->diag.def = 0;
+}
+
+static void sfr_store_sample(struct sf_raw_sample *sfr,
+			     struct hws_combined_entry *sample)
+{
+	if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
+		sfr->basic = sample->basic;
+	if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
+		memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
+}
+
+static void debug_sample_entry(struct hws_combined_entry *sample,
+			       struct hws_trailer_entry *te,
+			       unsigned long flags)
+{
+	debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
+			    "sampling data entry: te->f=%i basic.def=%04x (%p)"
+			    " diag.def=%04x (%p)\n", te->f,
+			    sample->basic.def, &sample->basic,
+			    (flags & PERF_CPUM_SF_DIAG_MODE)
+					? sample->diag.def : 0xFFFF,
+			    (flags & PERF_CPUM_SF_DIAG_MODE)
+					?  &sample->diag : NULL);
+}
+
 /* hw_collect_samples() - Walk through a sample-data-block and collect samples
  * @event:	The perf event
  * @sdbt:	Sample-data-block table
  * @overflow:	Event overflow counter
  *
- * Walks through a sample-data-block and collects hardware sample-data that is
- * pushed to the perf event subsystem.	The overflow reports the number of
- * samples that has been discarded due to an event overflow.
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem.  Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries.  A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry.	The sampling function is determined by the flags in the perf
+ * event hardware structure.  The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
  */
 static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
 			       unsigned long long *overflow)
 {
-	struct hws_data_entry *sample;
-	unsigned long *trailer;
+	unsigned long flags = SAMPL_FLAGS(&event->hw);
+	struct hws_combined_entry *sample;
+	struct hws_trailer_entry *te;
+	struct sf_raw_sample *sfr;
+	size_t sample_size;
+
+	/* Prepare and initialize raw sample data */
+	sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
+	sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
 
-	trailer = trailer_entry_ptr(*sdbt);
-	sample = (struct hws_data_entry *) *sdbt;
-	while ((unsigned long *) sample < trailer) {
+	sample_size = event_sample_size(&event->hw);
+	te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+	sample = (struct hws_combined_entry *) *sdbt;
+	while ((unsigned long *) sample < (unsigned long *) te) {
 		/* Check for an empty sample */
-		if (!sample->def)
+		if (!sample->basic.def)
 			break;
 
 		/* Update perf event period */
 		perf_event_count_update(event, SAMPL_RATE(&event->hw));
 
-		/* Check for basic sampling mode */
-		if (sample->def == 0x0001) {
+		/* Check sampling data entry */
+		if (sample_format_is_valid(sample, flags)) {
 			/* If an event overflow occurred, the PMU is stopped to
 			 * throttle event delivery.  Remaining sample data is
 			 * discarded.
 			 */
-			if (!*overflow)
-				*overflow = perf_push_sample(event, sample);
-			else
+			if (!*overflow) {
+				if (sample_is_consistent(sample, flags)) {
+					/* Deliver sample data to perf */
+					sfr_store_sample(sfr, sample);
+					*overflow = perf_push_sample(event, sfr);
+				}
+			} else
 				/* Count discarded samples */
 				*overflow += 1;
-		} else
-			/* Sample slot is not yet written or other record */
-			debug_sprintf_event(sfdbg, 5, "hw_collect_samples: "
-					    "Unknown sample data entry format:"
-					    " %i\n", sample->def);
+		} else {
+			debug_sample_entry(sample, te, flags);
+			/* Sample slot is not yet written or other record.
+			 *
+			 * This condition can occur if the buffer was reused
+			 * from a combined basic- and diagnostic-sampling.
+			 * If only basic-sampling is then active, entries are
+			 * written into the larger diagnostic entries.
+			 * This is typically the case for sample-data-blocks
+			 * that are not full.  Stop processing if the first
+			 * invalid format was detected.
+			 */
+			if (!te->f)
+				break;
+		}
 
 		/* Reset sample slot and advance to next sample */
-		sample->def = 0;
-		sample++;
+		reset_sample_slot(sample, flags);
+		sample += sample_size;
 	}
 }
 
@@ -1104,6 +1308,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags)
 	perf_pmu_disable(event->pmu);
 	event->hw.state = 0;
 	cpuhw->lsctl.cs = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.cd = 1;
 	perf_pmu_enable(event->pmu);
 }
 
@@ -1119,6 +1325,7 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags)
 
 	perf_pmu_disable(event->pmu);
 	cpuhw->lsctl.cs = 0;
+	cpuhw->lsctl.cd = 0;
 	event->hw.state |= PERF_HES_STOPPED;
 
 	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
@@ -1158,11 +1365,13 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 
 	/* Ensure sampling functions are in the disabled state.  If disabled,
 	 * switch on sampling enable control. */
-	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) {
+	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
 		err = -EAGAIN;
 		goto out;
 	}
 	cpuhw->lsctl.es = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.ed = 1;
 
 	/* Set in_use flag and store event */
 	event->hw.idx = 0;	  /* only one sampling event per CPU supported */
@@ -1185,6 +1394,7 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags)
 	cpumsf_pmu_stop(event, PERF_EF_UPDATE);
 
 	cpuhw->lsctl.es = 0;
+	cpuhw->lsctl.ed = 0;
 	cpuhw->flags &= ~PMU_F_IN_USE;
 	cpuhw->event = NULL;
 
@@ -1198,9 +1408,11 @@ static int cpumsf_pmu_event_idx(struct perf_event *event)
 }
 
 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
 
 static struct attribute *cpumsf_pmu_events_attr[] = {
 	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
 	NULL,
 };
 
@@ -1351,8 +1563,9 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
 		return rc;
 
 	sfb_set_limits(min, max);
-	pr_info("Changed sampling buffer settings: min=%lu max=%lu\n",
-		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+	pr_info("The sampling buffer limits have changed to: "
+		"min=%lu max=%lu (diag=x%lu)\n",
+		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
 	return 0;
 }
 
@@ -1362,13 +1575,38 @@ static struct kernel_param_ops param_ops_sfb_size = {
 	.get = param_get_sfb_size,
 };
 
+#define RS_INIT_FAILURE_QSI	  0x0001
+#define RS_INIT_FAILURE_BSDES	  0x0002
+#define RS_INIT_FAILURE_ALRT	  0x0003
+#define RS_INIT_FAILURE_PERF	  0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+	pr_err("Sampling facility support for perf is not available: "
+	       "reason=%04x\n", reason);
+}
+
 static int __init init_cpum_sampling_pmu(void)
 {
+	struct hws_qsi_info_block si;
 	int err;
 
 	if (!cpum_sf_avail())
 		return -ENODEV;
 
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+		return -ENODEV;
+	}
+
+	if (si.bsdes != sizeof(struct hws_basic_entry)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+		return -EINVAL;
+	}
+
+	if (si.ad)
+		sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+
 	sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
 	if (!sfdbg)
 		pr_err("Registering for s390dbf failed\n");
@@ -1376,13 +1614,13 @@ static int __init init_cpum_sampling_pmu(void)
 
 	err = register_external_interrupt(0x1407, cpumf_measurement_alert);
 	if (err) {
-		pr_err("Failed to register for CPU-measurement alerts\n");
+		pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
 		goto out;
 	}
 
 	err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
 	if (err) {
-		pr_err("Failed to register cpum_sf pmu\n");
+		pr_cpumsf_err(RS_INIT_FAILURE_PERF);
 		unregister_external_interrupt(0x1407, cpumf_measurement_alert);
 		goto out;
 	}
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 60a68261d091..91aa215f947f 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -139,16 +139,21 @@ static void print_debug_sf(void)
 	int cpu = smp_processor_id();
 
 	memset(&si, 0, sizeof(si));
-	if (qsi(&si)) {
-		pr_err("CPU[%i]: CPM_SF: qsi failed\n");
+	if (qsi(&si))
 		return;
-	}
 
-	pr_info("CPU[%i]: CPM_SF: as=%i es=%i cs=%i bsdes=%i dsdes=%i"
-		" min=%i max=%i cpu_speed=%i tear=%p dear=%p\n",
-		cpu, si.as, si.es, si.cs, si.bsdes, si.dsdes,
-		si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed,
-		si.tear, si.dear);
+	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%i max=%i cpu_speed=%i\n",
+		cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+		si.cpu_speed);
+
+	if (si.as)
+		pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+			" bsdes=%i tear=%p dear=%p\n", cpu,
+			si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+	if (si.ad)
+		pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+			" dsdes=%i tear=%p dear=%p\n", cpu,
+			si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
 }
 
 void perf_event_print_debug(void)
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index eb095874540d..a32c96761eab 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -799,7 +799,7 @@ static void worker_on_interrupt(unsigned int cpu)
 static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
 		unsigned long *dear)
 {
-	struct hws_data_entry *sample_data_ptr;
+	struct hws_basic_entry *sample_data_ptr;
 	unsigned long *trailer;
 
 	trailer = trailer_entry_ptr(*sdbt);
@@ -809,7 +809,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
 		trailer = dear;
 	}
 
-	sample_data_ptr = (struct hws_data_entry *)(*sdbt);
+	sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
 
 	while ((unsigned long *)sample_data_ptr < trailer) {
 		struct pt_regs *regs = NULL;
-- 
cgit v1.2.3


From d7528862cf035994972c2c6f42c927db78f2f3a2 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Fri, 13 Dec 2013 12:45:01 +0100
Subject: s390/cpum_sf: Add flag to process full SDBs only

Add the PERF_CPUM_SF_FULL_BLOCKS flag to process only sample-data-blocks that
have the block-full-indicator bit set.  Sample-data-blocks that are partially
filled are discarded.  Use this flag if the sampling buffer is likely to be
shared among perf events that use different sampling modes.  In such
environments, flushing sample-data-blocks that are not completely filled, might
cause invalid-data-formats.

Setting PERF_CPUM_SF_FULL_BLOCKS prevents potentially invalid sampling data to
be processed but, in contrast, also discards valid samples in partially filled
sample-data-blocks.  Note that sample-data-blocks might not become full for
small sampling frequencies or for workload that is scheduled for tiny intervals.

To sample with the PERF_CPUM_SF_FULL_BLOCKS flag, set the perf->attr.config1
to 0x0004.  For example:

	perf record -e cpum_sf/config=0xB000,config1=0x0004/

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/perf_event.h |  2 ++
 arch/s390/kernel/perf_cpum_sf.c    | 13 +++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index bd4573f1d65c..159a8ec6da9a 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -59,6 +59,7 @@ struct perf_sf_sde_regs {
 #define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
 #define PERF_CPUM_SF_MODE_MASK		(PERF_CPUM_SF_BASIC_MODE| \
 					 PERF_CPUM_SF_DIAG_MODE)
+#define PERF_CPUM_SF_FULL_BLOCKS	0x0004	  /* Process full SDBs only */
 
 #define REG_NONE		0
 #define REG_OVERFLOW		1
@@ -69,6 +70,7 @@ struct perf_sf_sde_regs {
 #define SAMPL_RATE(hwc)		((hwc)->event_base)
 #define SAMPL_FLAGS(hwc)	((hwc)->config_base)
 #define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SDB_FULL_BLOCKS(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
 
 /* Structure for sampling data entries to be passed as perf raw sample data
  * to user space.  Note that raw sample data must be aligned and, thus, might
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index b4ec058c4f10..3c3bc8d7b220 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -733,6 +733,10 @@ static int __hw_perf_event_init(struct perf_event *event)
 		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
 	}
 
+	/* Check and set other sampling flags */
+	if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
+
 	/* The sampling information (si) contains information about the
 	 * min/max sampling intervals and the CPU speed.  So calculate the
 	 * correct sampling interval and avoid the whole period adjust
@@ -1203,8 +1207,10 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
  * register of the specified perf event.
  *
  * Only full sample-data-blocks are processed.	Specify the flash_all flag
- * to also walk through partially filled sample-data-blocks.
- *
+ * to also walk through partially filled sample-data-blocks.  It is ignored
+ * if PERF_CPUM_SF_FULL_BLOCKS is set.	The PERF_CPUM_SF_FULL_BLOCKS flag
+ * enforces the processing of full sample-data-blocks only (trailer entries
+ * with the block-full-indicator bit set).
  */
 static void hw_perf_event_update(struct perf_event *event, int flush_all)
 {
@@ -1214,6 +1220,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
 	int done;
 
+	if (flush_all && SDB_FULL_BLOCKS(hwc))
+		flush_all = 0;
+
 	sdbt = (unsigned long *) TEAR_REG(hwc);
 	done = event_overflow = sampl_overflow = num_sdb = 0;
 	while (!done) {
-- 
cgit v1.2.3


From 61aa4884b70cdf3b2d373e18ebbbada43789eade Mon Sep 17 00:00:00 2001
From: Heiko Carstens
Date: Fri, 1 Nov 2013 10:08:20 +0100
Subject: s390: use IS_ENABLED to check if a CONFIG is set to y or m

This is shorter and should be used instead of the longer form
which checks for both possible config options.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry64.S    | 6 +++---
 arch/s390/kernel/perf_event.c | 2 +-
 arch/s390/kernel/s390_ksyms.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index e5b43c97a834..9532fe23be47 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -74,7 +74,7 @@ _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 	.endm
 
 	.macro LPP newpp
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
 	jz	.+8
 	.insn	s,0xb2800000,\newpp
@@ -82,7 +82,7 @@ _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 	.endm
 
 	.macro	HANDLE_SIE_INTERCEPT scratch,reason
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jnz	.+62
 	lgr	\scratch,%r9
@@ -946,7 +946,7 @@ cleanup_idle_insn:
 	.quad	__critical_end - __critical_start
 
 
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 /*
  * sie64a calling convention:
  * %r2 pointer to sie control block
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 91aa215f947f..a76d602f5928 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -60,7 +60,7 @@ static bool is_in_guest(struct pt_regs *regs)
 {
 	if (user_mode(regs))
 		return false;
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 	return instruction_pointer(regs) == (unsigned long) &sie_exit;
 #else
 	return false;
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 3bac589844a7..9f60467938d1 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -5,7 +5,7 @@
 #ifdef CONFIG_FUNCTION_TRACER
 EXPORT_SYMBOL(_mcount);
 #endif
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+#if IS_ENABLED(CONFIG_KVM)
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
 #endif
-- 
cgit v1.2.3


From 9efe4f2992025c3a4027c60bf36ae9d710ca3781 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky
Date: Tue, 17 Dec 2013 13:41:31 +0100
Subject: s390/mm: optimize randomize_et_dyn for !PF_RANDOMIZE

Skip the call to brk_rnd() if the PF_RANDOMIZE flag is not set for
the process. This avoids the costly get_random_int() call. Modify
arch_randomize_brk() as well to make it look like randomize_et_dyn().

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/process.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7ed0d4e2a435..dd145321d215 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -261,20 +261,18 @@ static inline unsigned long brk_rnd(void)
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-	unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd());
+	unsigned long ret;
 
-	if (ret < mm->brk)
-		return mm->brk;
-	return ret;
+	ret = PAGE_ALIGN(mm->brk + brk_rnd());
+	return (ret > mm->brk) ? ret : mm->brk;
 }
 
 unsigned long randomize_et_dyn(unsigned long base)
 {
-	unsigned long ret = PAGE_ALIGN(base + brk_rnd());
+	unsigned long ret;
 
 	if (!(current->flags & PF_RANDOMIZE))
 		return base;
-	if (ret < base)
-		return base;
-	return ret;
+	ret = PAGE_ALIGN(base + brk_rnd());
+	return (ret > base) ? ret : base;
 }
-- 
cgit v1.2.3


From 91f3e3eaba4413e76ce8e12e3ef10525a889142f Mon Sep 17 00:00:00 2001
From: Ingo Tuchscherer
Date: Wed, 20 Nov 2013 10:47:13 +0100
Subject: s390/zcrypt: add support for EP11 coprocessor cards

This feature extends the generic cryptographic device driver (zcrypt)
with a new capability to service EP11 requests for the Crypto Express4S
card in EP11 (Enterprise PKCS#11 mode) coprocessor mode.

Signed-off-by: Ingo Tuchscherer <ingo.tuchscherer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 Documentation/kmsg/s390/zcrypt         |  20 +++
 arch/s390/include/uapi/asm/zcrypt.h    |  65 +++++++++
 drivers/s390/crypto/ap_bus.c           |  31 +++-
 drivers/s390/crypto/ap_bus.h           |   4 +-
 drivers/s390/crypto/zcrypt_api.c       | 109 +++++++++++++-
 drivers/s390/crypto/zcrypt_api.h       |   2 +
 drivers/s390/crypto/zcrypt_cex4.c      |  20 ++-
 drivers/s390/crypto/zcrypt_error.h     |  18 ++-
 drivers/s390/crypto/zcrypt_msgtype50.c |  12 ++
 drivers/s390/crypto/zcrypt_msgtype6.c  | 260 +++++++++++++++++++++++++++++++++
 drivers/s390/crypto/zcrypt_msgtype6.h  |   2 +
 drivers/s390/crypto/zcrypt_pcica.c     |  11 ++
 drivers/s390/crypto/zcrypt_pcicc.c     |  12 ++
 13 files changed, 552 insertions(+), 14 deletions(-)
 create mode 100644 Documentation/kmsg/s390/zcrypt

(limited to 'arch/s390')

diff --git a/Documentation/kmsg/s390/zcrypt b/Documentation/kmsg/s390/zcrypt
new file mode 100644
index 000000000000..7fb2087409d6
--- /dev/null
+++ b/Documentation/kmsg/s390/zcrypt
@@ -0,0 +1,20 @@
+/*?
+ * Text: "Cryptographic device %x failed and was set offline\n"
+ * Severity: Error
+ * Parameter:
+ *   @1: device index
+ * Description:
+ * A cryptographic device failed to process a cryptographic request.
+ * The cryptographic device driver could not correct the error and
+ * set the device offline. The application that issued the
+ * request received an indication that the request has failed.
+ * User action:
+ * Use the lszcrypt command to confirm that the cryptographic
+ * hardware is still configured to your LPAR or z/VM guest virtual
+ * machine. If the device is available to your Linux instance the
+ * command output contains a line that begins with 'card<device index>',
+ * where <device index> is the two-digit decimal number in the message text.
+ * After ensuring that the device is available, use the chzcrypt command to
+ * set it online again.
+ * If the error persists, contact your support organization.
+ */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index e83fc116f5bf..f2b18eacaca8 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -154,6 +154,67 @@ struct ica_xcRB {
 	unsigned short	priority_window;
 	unsigned int	status;
 } __attribute__((packed));
+
+/**
+ * struct ep11_cprb - EP11 connectivity programming request block
+ * @cprb_len:		CPRB header length [0x0020]
+ * @cprb_ver_id:	CPRB version id.   [0x04]
+ * @pad_000:		Alignment pad bytes
+ * @flags:		Admin cmd [0x80] or functional cmd [0x00]
+ * @func_id:		Function id / subtype [0x5434]
+ * @source_id:		Source id [originator id]
+ * @target_id:		Target id [usage/ctrl domain id]
+ * @ret_code:		Return code
+ * @reserved1:		Reserved
+ * @reserved2:		Reserved
+ * @payload_len:	Payload length
+ */
+struct ep11_cprb {
+	uint16_t	cprb_len;
+	unsigned char	cprb_ver_id;
+	unsigned char	pad_000[2];
+	unsigned char	flags;
+	unsigned char	func_id[2];
+	uint32_t	source_id;
+	uint32_t	target_id;
+	uint32_t	ret_code;
+	uint32_t	reserved1;
+	uint32_t	reserved2;
+	uint32_t	payload_len;
+} __attribute__((packed));
+
+/**
+ * struct ep11_target_dev - EP11 target device list
+ * @ap_id:	AP device id
+ * @dom_id:	Usage domain id
+ */
+struct ep11_target_dev {
+	uint16_t ap_id;
+	uint16_t dom_id;
+};
+
+/**
+ * struct ep11_urb - EP11 user request block
+ * @targets_num:	Number of target adapters
+ * @targets:		Addr to target adapter list
+ * @weight:		Level of request priority
+ * @req_no:		Request id/number
+ * @req_len:		Request length
+ * @req:		Addr to request block
+ * @resp_len:		Response length
+ * @resp:		Addr to response block
+ */
+struct ep11_urb {
+	uint16_t		targets_num;
+	uint64_t		targets;
+	uint64_t		weight;
+	uint64_t		req_no;
+	uint64_t		req_len;
+	uint64_t		req;
+	uint64_t		resp_len;
+	uint64_t		resp;
+} __attribute__((packed));
+
 #define AUTOSELECT ((unsigned int)0xFFFFFFFF)
 
 #define ZCRYPT_IOCTL_MAGIC 'z'
@@ -183,6 +244,9 @@ struct ica_xcRB {
  *   ZSECSENDCPRB
  *     Send an arbitrary CPRB to a crypto card.
  *
+ *   ZSENDEP11CPRB
+ *     Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
+ *
  *   Z90STAT_STATUS_MASK
  *     Return an 64 element array of unsigned chars for the status of
  *     all devices.
@@ -256,6 +320,7 @@ struct ica_xcRB {
 #define ICARSAMODEXPO	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
 #define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
 #define ZSECSENDCPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
 
 /* New status calls */
 #define Z90STAT_TOTALCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 02300dcfac91..ab3baa7f9508 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -591,7 +591,13 @@ static int ap_init_queue(ap_qid_t qid)
 		if (rc != -ENODEV && rc != -EBUSY)
 			break;
 		if (i < AP_MAX_RESET - 1) {
-			udelay(5);
+			/* Time we are waiting until we give up (0.7sec * 90).
+			 * Since the actual request (in progress) will not
+			 * interrupted immediately for the reset command,
+			 * we have to be patient. In worst case we have to
+			 * wait 60sec + reset time (some msec).
+			 */
+			schedule_timeout(AP_RESET_TIMEOUT);
 			status = ap_test_queue(qid, &dummy, &dummy);
 		}
 	}
@@ -992,6 +998,28 @@ static ssize_t ap_domain_show(struct bus_type *bus, char *buf)
 
 static BUS_ATTR(ap_domain, 0444, ap_domain_show, NULL);
 
+static ssize_t ap_control_domain_mask_show(struct bus_type *bus, char *buf)
+{
+	if (ap_configuration != NULL) { /* QCI not supported */
+		if (test_facility(76)) { /* format 1 - 256 bit domain field */
+			return snprintf(buf, PAGE_SIZE,
+				"0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
+			ap_configuration->adm[0], ap_configuration->adm[1],
+			ap_configuration->adm[2], ap_configuration->adm[3],
+			ap_configuration->adm[4], ap_configuration->adm[5],
+			ap_configuration->adm[6], ap_configuration->adm[7]);
+		} else { /* format 0 - 16 bit domain field */
+			return snprintf(buf, PAGE_SIZE, "%08x%08x\n",
+			ap_configuration->adm[0], ap_configuration->adm[1]);
+		  }
+	} else {
+		return snprintf(buf, PAGE_SIZE, "not supported\n");
+	  }
+}
+
+static BUS_ATTR(ap_control_domain_mask, 0444,
+		ap_control_domain_mask_show, NULL);
+
 static ssize_t ap_config_time_show(struct bus_type *bus, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%d\n", ap_config_time);
@@ -1077,6 +1105,7 @@ static BUS_ATTR(poll_timeout, 0644, poll_timeout_show, poll_timeout_store);
 
 static struct bus_attribute *const ap_bus_attrs[] = {
 	&bus_attr_ap_domain,
+	&bus_attr_ap_control_domain_mask,
 	&bus_attr_config_time,
 	&bus_attr_poll_thread,
 	&bus_attr_ap_interrupts,
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 685f6cc022f9..6405ae24a7a6 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -33,7 +33,7 @@
 #define AP_DEVICES 64		/* Number of AP devices. */
 #define AP_DOMAINS 16		/* Number of AP domains. */
 #define AP_MAX_RESET 90		/* Maximum number of resets. */
-#define AP_RESET_TIMEOUT (HZ/2)	/* Time in ticks for reset timeouts. */
+#define AP_RESET_TIMEOUT (HZ*0.7)	/* Time in ticks for reset timeouts. */
 #define AP_CONFIG_TIME 30	/* Time in seconds between AP bus rescans. */
 #define AP_POLL_TIME 1		/* Time in ticks between receive polls. */
 
@@ -125,6 +125,8 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 #define AP_FUNC_CRT4K 2
 #define AP_FUNC_COPRO 3
 #define AP_FUNC_ACCEL 4
+#define AP_FUNC_EP11  5
+#define AP_FUNC_APXA  6
 
 /*
  * AP reset flag states
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 31cfaa556072..4b824b15194f 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -44,6 +44,8 @@
 #include "zcrypt_debug.h"
 #include "zcrypt_api.h"
 
+#include "zcrypt_msgtype6.h"
+
 /*
  * Module description.
  */
@@ -554,9 +556,9 @@ static long zcrypt_send_cprb(struct ica_xcRB *xcRB)
 	spin_lock_bh(&zcrypt_device_lock);
 	list_for_each_entry(zdev, &zcrypt_device_list, list) {
 		if (!zdev->online || !zdev->ops->send_cprb ||
-		    (xcRB->user_defined != AUTOSELECT &&
-			AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined)
-		    )
+		   (zdev->ops->variant == MSGTYPE06_VARIANT_EP11) ||
+		   (xcRB->user_defined != AUTOSELECT &&
+		    AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined))
 			continue;
 		zcrypt_device_get(zdev);
 		get_device(&zdev->ap_dev->device);
@@ -581,6 +583,90 @@ static long zcrypt_send_cprb(struct ica_xcRB *xcRB)
 	return -ENODEV;
 }
 
+struct ep11_target_dev_list {
+	unsigned short		targets_num;
+	struct ep11_target_dev	*targets;
+};
+
+static bool is_desired_ep11dev(unsigned int dev_qid,
+			       struct ep11_target_dev_list dev_list)
+{
+	int n;
+
+	for (n = 0; n < dev_list.targets_num; n++, dev_list.targets++) {
+		if ((AP_QID_DEVICE(dev_qid) == dev_list.targets->ap_id) &&
+		    (AP_QID_QUEUE(dev_qid) == dev_list.targets->dom_id)) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb)
+{
+	struct zcrypt_device *zdev;
+	bool autoselect = false;
+	int rc;
+	struct ep11_target_dev_list ep11_dev_list = {
+		.targets_num	=  0x00,
+		.targets	=  NULL,
+	};
+
+	ep11_dev_list.targets_num = (unsigned short) xcrb->targets_num;
+
+	/* empty list indicates autoselect (all available targets) */
+	if (ep11_dev_list.targets_num == 0)
+		autoselect = true;
+	else {
+		ep11_dev_list.targets = kcalloc((unsigned short)
+						xcrb->targets_num,
+						sizeof(struct ep11_target_dev),
+						GFP_KERNEL);
+		if (!ep11_dev_list.targets)
+			return -ENOMEM;
+
+		if (copy_from_user(ep11_dev_list.targets,
+				   (struct ep11_target_dev *)xcrb->targets,
+				   xcrb->targets_num *
+				   sizeof(struct ep11_target_dev)))
+			return -EFAULT;
+	}
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		/* check if device is eligible */
+		if (!zdev->online ||
+		    zdev->ops->variant != MSGTYPE06_VARIANT_EP11)
+			continue;
+
+		/* check if device is selected as valid target */
+		if (!is_desired_ep11dev(zdev->ap_dev->qid, ep11_dev_list) &&
+		    !autoselect)
+			continue;
+
+		zcrypt_device_get(zdev);
+		get_device(&zdev->ap_dev->device);
+		zdev->request_count++;
+		__zcrypt_decrease_preference(zdev);
+		if (try_module_get(zdev->ap_dev->drv->driver.owner)) {
+			spin_unlock_bh(&zcrypt_device_lock);
+			rc = zdev->ops->send_ep11_cprb(zdev, xcrb);
+			spin_lock_bh(&zcrypt_device_lock);
+			module_put(zdev->ap_dev->drv->driver.owner);
+		} else {
+			rc = -EAGAIN;
+		  }
+		zdev->request_count--;
+		__zcrypt_increase_preference(zdev);
+		put_device(&zdev->ap_dev->device);
+		zcrypt_device_put(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		return rc;
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return -ENODEV;
+}
+
 static long zcrypt_rng(char *buffer)
 {
 	struct zcrypt_device *zdev;
@@ -784,6 +870,23 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
 			return -EFAULT;
 		return rc;
 	}
+	case ZSENDEP11CPRB: {
+		struct ep11_urb __user *uxcrb = (void __user *)arg;
+		struct ep11_urb xcrb;
+		if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb)))
+			return -EFAULT;
+		do {
+			rc = zcrypt_send_ep11_cprb(&xcrb);
+		} while (rc == -EAGAIN);
+		/* on failure: retry once again after a requested rescan */
+		if ((rc == -ENODEV) && (zcrypt_process_rescan()))
+			do {
+				rc = zcrypt_send_ep11_cprb(&xcrb);
+			} while (rc == -EAGAIN);
+		if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb)))
+			return -EFAULT;
+		return rc;
+	}
 	case Z90STAT_STATUS_MASK: {
 		char status[AP_DEVICES];
 		zcrypt_status_mask(status);
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 89632919c993..b3d496bfaa7e 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -74,6 +74,7 @@ struct ica_z90_status {
 #define ZCRYPT_CEX2A		6
 #define ZCRYPT_CEX3C		7
 #define ZCRYPT_CEX3A		8
+#define ZCRYPT_CEX4	       10
 
 /**
  * Large random numbers are pulled in 4096 byte chunks from the crypto cards
@@ -89,6 +90,7 @@ struct zcrypt_ops {
 	long (*rsa_modexpo_crt)(struct zcrypt_device *,
 				struct ica_rsa_modexpo_crt *);
 	long (*send_cprb)(struct zcrypt_device *, struct ica_xcRB *);
+	long (*send_ep11_cprb)(struct zcrypt_device *, struct ep11_urb *);
 	long (*rng)(struct zcrypt_device *, char *);
 	struct list_head list;		/* zcrypt ops list. */
 	struct module *owner;
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index ce1226398ac9..569f8b1d86c0 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -30,7 +30,12 @@
 #define CEX4A_MAX_MESSAGE_SIZE	MSGTYPE50_CRB3_MAX_MSG_SIZE
 #define CEX4C_MAX_MESSAGE_SIZE	MSGTYPE06_MAX_MSG_SIZE
 
-#define CEX4_CLEANUP_TIME	(15*HZ)
+/* Waiting time for requests to be processed.
+ * Currently there are some types of request which are not deterministic.
+ * But the maximum time limit managed by the stomper code is set to 60sec.
+ * Hence we have to wait at least that time period.
+ */
+#define CEX4_CLEANUP_TIME	(61*HZ)
 
 static struct ap_device_id zcrypt_cex4_ids[] = {
 	{ AP_DEVICE(AP_DEVICE_TYPE_CEX4)  },
@@ -101,6 +106,19 @@ static int zcrypt_cex4_probe(struct ap_device *ap_dev)
 			zdev->speed_rating = CEX4C_SPEED_RATING;
 			zdev->ops = zcrypt_msgtype_request(MSGTYPE06_NAME,
 							   MSGTYPE06_VARIANT_DEFAULT);
+		} else if (ap_test_bit(&ap_dev->functions, AP_FUNC_EP11)) {
+			zdev = zcrypt_device_alloc(CEX4C_MAX_MESSAGE_SIZE);
+			if (!zdev)
+				return -ENOMEM;
+			zdev->type_string = "CEX4P";
+			zdev->user_space_type = ZCRYPT_CEX4;
+			zdev->min_mod_size = CEX4C_MIN_MOD_SIZE;
+			zdev->max_mod_size = CEX4C_MAX_MOD_SIZE;
+			zdev->max_exp_bit_length = CEX4C_MAX_MOD_SIZE;
+			zdev->short_crt = 0;
+			zdev->speed_rating = CEX4C_SPEED_RATING;
+			zdev->ops = zcrypt_msgtype_request(MSGTYPE06_NAME,
+							MSGTYPE06_VARIANT_EP11);
 		}
 		break;
 	}
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index 0079b6617211..7b23f43c7b08 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -106,15 +106,15 @@ static inline int convert_error(struct zcrypt_device *zdev,
 	//   REP88_ERROR_MESSAGE_TYPE		// '20' CEX2A
 		/*
 		 * To sent a message of the wrong type is a bug in the
-		 * device driver. Warn about it, disable the device
+		 * device driver. Send error msg, disable the device
 		 * and then repeat the request.
 		 */
-		WARN_ON(1);
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid,
-			       zdev->online, ehdr->reply_code);
+			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
 		return -EAGAIN;
 	case REP82_ERROR_TRANSPORT_FAIL:
 	case REP82_ERROR_MACHINE_FAILURE:
@@ -122,15 +122,17 @@ static inline int convert_error(struct zcrypt_device *zdev,
 		/* If a card fails disable it and repeat the request. */
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid,
-			       zdev->online, ehdr->reply_code);
+			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
 		return -EAGAIN;
 	default:
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid,
-			       zdev->online, ehdr->reply_code);
+			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 7c522f338bda..334e282f255b 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -25,6 +25,9 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define KMSG_COMPONENT "zcrypt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -332,6 +335,11 @@ static int convert_type80(struct zcrypt_device *zdev,
 	if (t80h->len < sizeof(*t80h) + outputdatalength) {
 		/* The result is too short, the CEX2A card may not do that.. */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
+			       zdev->ap_dev->qid, zdev->online, t80h->code);
+
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 	if (zdev->user_space_type == ZCRYPT_CEX2A)
@@ -359,6 +367,10 @@ static int convert_response(struct zcrypt_device *zdev,
 				      outputdata, outputdatalength);
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 7d97fa5a26d0..57bfda1bd71a 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -25,6 +25,9 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define KMSG_COMPONENT "zcrypt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/err.h>
@@ -50,6 +53,7 @@ struct response_type {
 };
 #define PCIXCC_RESPONSE_TYPE_ICA  0
 #define PCIXCC_RESPONSE_TYPE_XCRB 1
+#define PCIXCC_RESPONSE_TYPE_EP11 2
 
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \
@@ -358,6 +362,91 @@ static int XCRB_msg_to_type6CPRB_msgX(struct zcrypt_device *zdev,
 	return 0;
 }
 
+static int xcrb_msg_to_type6_ep11cprb_msgx(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ep11_urb *xcRB)
+{
+	unsigned int lfmt;
+
+	static struct type6_hdr static_type6_ep11_hdr = {
+		.type		=  0x06,
+		.rqid		= {0x00, 0x01},
+		.function_code	= {0x00, 0x00},
+		.agent_id[0]	=  0x58,	/* {'X'} */
+		.agent_id[1]	=  0x43,	/* {'C'} */
+		.offset1	=  0x00000058,
+	};
+
+	struct {
+		struct type6_hdr hdr;
+		struct ep11_cprb cprbx;
+		unsigned char	pld_tag;	/* fixed value 0x30 */
+		unsigned char	pld_lenfmt;	/* payload length format */
+	} __packed * msg = ap_msg->message;
+
+	struct pld_hdr {
+		unsigned char	func_tag;	/* fixed value 0x4 */
+		unsigned char	func_len;	/* fixed value 0x4 */
+		unsigned int	func_val;	/* function ID	   */
+		unsigned char	dom_tag;	/* fixed value 0x4 */
+		unsigned char	dom_len;	/* fixed value 0x4 */
+		unsigned int	dom_val;	/* domain id	   */
+	} __packed * payload_hdr;
+
+	/* length checks */
+	ap_msg->length = sizeof(struct type6_hdr) + xcRB->req_len;
+	if (CEIL4(xcRB->req_len) > MSGTYPE06_MAX_MSG_SIZE -
+				   (sizeof(struct type6_hdr)))
+		return -EINVAL;
+
+	if (CEIL4(xcRB->resp_len) > MSGTYPE06_MAX_MSG_SIZE -
+				    (sizeof(struct type86_fmt2_msg)))
+		return -EINVAL;
+
+	/* prepare type6 header */
+	msg->hdr = static_type6_ep11_hdr;
+	msg->hdr.ToCardLen1   = xcRB->req_len;
+	msg->hdr.FromCardLen1 = xcRB->resp_len;
+
+	/* Import CPRB data from the ioctl input parameter */
+	if (copy_from_user(&(msg->cprbx.cprb_len),
+			   (char *)xcRB->req, xcRB->req_len)) {
+		return -EFAULT;
+	}
+
+	/*
+	 The target domain field within the cprb body/payload block will be
+	 replaced by the usage domain for non-management commands only.
+	 Therefore we check the first bit of the 'flags' parameter for
+	 management command indication.
+	   0 - non managment command
+	   1 - management command
+	*/
+	if (!((msg->cprbx.flags & 0x80) == 0x80)) {
+		msg->cprbx.target_id = (unsigned int)
+					AP_QID_QUEUE(zdev->ap_dev->qid);
+
+		if ((msg->pld_lenfmt & 0x80) == 0x80) { /*ext.len.fmt 2 or 3*/
+			switch (msg->pld_lenfmt & 0x03) {
+			case 1:
+				lfmt = 2;
+				break;
+			case 2:
+				lfmt = 3;
+				break;
+			default:
+				return -EINVAL;
+			}
+		} else {
+			lfmt = 1; /* length format #1 */
+		  }
+		payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt);
+		payload_hdr->dom_val = (unsigned int)
+					AP_QID_QUEUE(zdev->ap_dev->qid);
+	}
+	return 0;
+}
+
 /**
  * Copy results from a type 86 ICA reply message back to user space.
  *
@@ -377,6 +466,12 @@ struct type86x_reply {
 	char text[0];
 } __packed;
 
+struct type86_ep11_reply {
+	struct type86_hdr hdr;
+	struct type86_fmt2_ext fmt2;
+	struct ep11_cprb cprbx;
+} __packed;
+
 static int convert_type86_ica(struct zcrypt_device *zdev,
 			  struct ap_message *reply,
 			  char __user *outputdata,
@@ -440,6 +535,11 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
 		if (service_rc == 8 && service_rs == 72)
 			return -EINVAL;
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
+			       zdev->ap_dev->qid, zdev->online,
+			       msg->hdr.reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 	data = msg->text;
@@ -503,6 +603,33 @@ static int convert_type86_xcrb(struct zcrypt_device *zdev,
 	return 0;
 }
 
+/**
+ * Copy results from a type 86 EP11 XCRB reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @xcRB: pointer to EP11 user request block
+ *
+ * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
+ */
+static int convert_type86_ep11_xcrb(struct zcrypt_device *zdev,
+				    struct ap_message *reply,
+				    struct ep11_urb *xcRB)
+{
+	struct type86_fmt2_msg *msg = reply->message;
+	char *data = reply->message;
+
+	if (xcRB->resp_len < msg->fmt2.count1)
+		return -EINVAL;
+
+	/* Copy response CPRB to user */
+	if (copy_to_user((char *)xcRB->resp,
+			 data + msg->fmt2.offset1, msg->fmt2.count1))
+		return -EFAULT;
+	xcRB->resp_len = msg->fmt2.count1;
+	return 0;
+}
+
 static int convert_type86_rng(struct zcrypt_device *zdev,
 			  struct ap_message *reply,
 			  char *buffer)
@@ -551,6 +678,10 @@ static int convert_response_ica(struct zcrypt_device *zdev,
 		 * response */
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -579,10 +710,40 @@ static int convert_response_xcrb(struct zcrypt_device *zdev,
 	default: /* Unknown response type, this should NEVER EVER happen */
 		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
 
+static int convert_response_ep11_xcrb(struct zcrypt_device *zdev,
+	struct ap_message *reply, struct ep11_urb *xcRB)
+{
+	struct type86_ep11_reply *msg = reply->message;
+
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *)reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE87_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE86_RSP_CODE:
+		if (msg->hdr.reply_code)
+			return convert_error(zdev, reply);
+		if (msg->cprbx.cprb_ver_id == 0x04)
+			return convert_type86_ep11_xcrb(zdev, reply, xcRB);
+	/* Fall through, no break, incorrect cprb version is an unknown resp.*/
+	default: /* Unknown response type, this should NEVER EVER happen */
+		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
+		return -EAGAIN; /* repeat the request on a different device. */
+	}
+}
+
 static int convert_response_rng(struct zcrypt_device *zdev,
 				 struct ap_message *reply,
 				 char *data)
@@ -602,6 +763,10 @@ static int convert_response_rng(struct zcrypt_device *zdev,
 		 * response */
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -657,6 +822,51 @@ out:
 	complete(&(resp_type->work));
 }
 
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_msgtype6_receive_ep11(struct ap_device *ap_dev,
+					 struct ap_message *msg,
+					 struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct response_type *resp_type =
+		(struct response_type *)msg->private;
+	struct type86_ep11_reply *t86r;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply)) {
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+		goto out;
+	}
+	t86r = reply->message;
+	if (t86r->hdr.type == TYPE86_RSP_CODE &&
+	    t86r->cprbx.cprb_ver_id == 0x04) {
+		switch (resp_type->type) {
+		case PCIXCC_RESPONSE_TYPE_EP11:
+			length = t86r->fmt2.offset1 + t86r->fmt2.count1;
+			length = min(MSGTYPE06_MAX_MSG_SIZE, length);
+			memcpy(msg->message, reply->message, length);
+			break;
+		default:
+			memcpy(msg->message, &error_reply, sizeof(error_reply));
+		}
+	} else {
+		memcpy(msg->message, reply->message, sizeof(error_reply));
+	  }
+out:
+	complete(&(resp_type->work));
+}
+
 static atomic_t zcrypt_step = ATOMIC_INIT(0);
 
 /**
@@ -781,6 +991,46 @@ out_free:
 	return rc;
 }
 
+/**
+ * The request distributor calls this function if it picked the CEX4P
+ * device to handle a send_ep11_cprb request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  CEX4P device to the request distributor
+ * @xcRB: pointer to the ep11 user request block
+ */
+static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_device *zdev,
+						struct ep11_urb *xcrb)
+{
+	struct ap_message ap_msg;
+	struct response_type resp_type = {
+		.type = PCIXCC_RESPONSE_TYPE_EP11,
+	};
+	int rc;
+
+	ap_init_message(&ap_msg);
+	ap_msg.message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.receive = zcrypt_msgtype6_receive_ep11;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &resp_type;
+	rc = xcrb_msg_to_type6_ep11cprb_msgx(zdev, &ap_msg, xcrb);
+	if (rc)
+		goto out_free;
+	init_completion(&resp_type.work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible(&resp_type.work);
+	if (rc == 0)
+		rc = convert_response_ep11_xcrb(zdev, &ap_msg, xcrb);
+	else /* Signal pending. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+
+out_free:
+	kzfree(ap_msg.message);
+	return rc;
+}
+
 /**
  * The request distributor calls this function if it picked the PCIXCC/CEX2C
  * device to generate random data.
@@ -839,10 +1089,19 @@ static struct zcrypt_ops zcrypt_msgtype6_ops = {
 	.rng = zcrypt_msgtype6_rng,
 };
 
+static struct zcrypt_ops zcrypt_msgtype6_ep11_ops = {
+	.owner = THIS_MODULE,
+	.variant = MSGTYPE06_VARIANT_EP11,
+	.rsa_modexpo = NULL,
+	.rsa_modexpo_crt = NULL,
+	.send_ep11_cprb = zcrypt_msgtype6_send_ep11_cprb,
+};
+
 int __init zcrypt_msgtype6_init(void)
 {
 	zcrypt_msgtype_register(&zcrypt_msgtype6_norng_ops);
 	zcrypt_msgtype_register(&zcrypt_msgtype6_ops);
+	zcrypt_msgtype_register(&zcrypt_msgtype6_ep11_ops);
 	return 0;
 }
 
@@ -850,6 +1109,7 @@ void __exit zcrypt_msgtype6_exit(void)
 {
 	zcrypt_msgtype_unregister(&zcrypt_msgtype6_norng_ops);
 	zcrypt_msgtype_unregister(&zcrypt_msgtype6_ops);
+	zcrypt_msgtype_unregister(&zcrypt_msgtype6_ep11_ops);
 }
 
 module_init(zcrypt_msgtype6_init);
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h
index 1e500d3c0735..207247570623 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.h
+++ b/drivers/s390/crypto/zcrypt_msgtype6.h
@@ -32,6 +32,7 @@
 #define MSGTYPE06_NAME			"zcrypt_msgtype6"
 #define MSGTYPE06_VARIANT_DEFAULT	0
 #define MSGTYPE06_VARIANT_NORNG		1
+#define MSGTYPE06_VARIANT_EP11		2
 
 #define MSGTYPE06_MAX_MSG_SIZE		(12*1024)
 
@@ -99,6 +100,7 @@ struct type86_hdr {
 } __packed;
 
 #define TYPE86_RSP_CODE 0x86
+#define TYPE87_RSP_CODE 0x87
 #define TYPE86_FMT2	0x02
 
 struct type86_fmt2_ext {
diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c
index f2b71d8df01f..7a743f4c646c 100644
--- a/drivers/s390/crypto/zcrypt_pcica.c
+++ b/drivers/s390/crypto/zcrypt_pcica.c
@@ -24,6 +24,9 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define KMSG_COMPONENT "zcrypt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -199,6 +202,10 @@ static int convert_type84(struct zcrypt_device *zdev,
 	if (t84h->len < sizeof(*t84h) + outputdatalength) {
 		/* The result is too short, the PCICA card may not do that.. */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
+			       zdev->ap_dev->qid, zdev->online, t84h->code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 	BUG_ON(t84h->len > PCICA_MAX_RESPONSE_SIZE);
@@ -223,6 +230,10 @@ static int convert_response(struct zcrypt_device *zdev,
 				      outputdata, outputdatalength);
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c
index 0d90a4334055..4d14c04b746e 100644
--- a/drivers/s390/crypto/zcrypt_pcicc.c
+++ b/drivers/s390/crypto/zcrypt_pcicc.c
@@ -24,6 +24,9 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define KMSG_COMPONENT "zcrypt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
@@ -372,6 +375,11 @@ static int convert_type86(struct zcrypt_device *zdev,
 		if (service_rc == 8 && service_rs == 72)
 			return -EINVAL;
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
+			       zdev->ap_dev->qid, zdev->online,
+			       msg->hdr.reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 	data = msg->text;
@@ -425,6 +433,10 @@ static int convert_response(struct zcrypt_device *zdev,
 		/* no break, incorrect cprb version is an unknown response */
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
+		pr_err("Cryptographic device %x failed and was set offline\n",
+		       zdev->ap_dev->qid);
+		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
+			       zdev->ap_dev->qid, zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
-- 
cgit v1.2.3


From 075dfd82102d2048e43e1cbf48d558d915c50072 Mon Sep 17 00:00:00 2001
From: Heiko Carstens
Date: Mon, 13 Jan 2014 13:35:16 +0100
Subject: s390/compat: fix PSW32_USER_BITS definition

PSW32_USER_BITS should define the primary address space for user space
instead of the home address space.
Symptom of this bug is that gdb doesn't work in compat mode.

The bug was introduced with e258d719ff28 "s390/uaccess: always run the kernel
in home space" and f26946d7ecad "s390/compat: make psw32_user_bits a constant
value again".

Cc: stable@vger.kernel.org # v3.13+
Reported-by: Andreas Arnez <arnez@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/compat.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 4bf9da03591e..5d7e8cf83bd6 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -38,7 +38,8 @@
 
 #define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
 			 PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
-			 PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | PSW32_ASC_HOME)
+			 PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
+			 PSW32_ASC_PRIMARY)
 
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"s390\0\0\0\0"
-- 
cgit v1.2.3


From 28aa39b853fc889e672b73f69ce591d15e6306b0 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker
Date: Mon, 13 Jan 2014 16:02:28 +0100
Subject: s390: delete new instances of __cpuinit usage

The patch "s390/perf: add support for the CPU-Measurement Sampling
Facility" added a new instance of the __cpuinit macro usage.

We removed this a couple versions ago; we now want to remove
the compat no-op stubs.  Introducing new users is not what
we want to see at this point in time, as it will break once
the stubs are gone.

Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3c3bc8d7b220..6c0d29827cb6 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1512,8 +1512,8 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
 	}
 }
 
-static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
-					unsigned long action, void *hcpu)
+static int cpumf_pmu_notifier(struct notifier_block *self,
+			      unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long) hcpu;
 	int flags;
-- 
cgit v1.2.3


From 1c59a861d6982edf3f9905ad2098575336ae904d Mon Sep 17 00:00:00 2001
From: Eugene Crosser
Date: Wed, 24 Apr 2013 12:00:23 +0200
Subject: s390/qdio: bridgeport support - CHSC part

Introduce function for the "Perform network-subchannel operation"
CHSC command with operation code "bridgeport information",
and bit definitions for "characteristics" pertaning to this command.

Signed-off-by: Eugene Crosser <eugene.crosser@ru.ibm.com>
Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/css_chars.h |  2 +
 arch/s390/include/asm/qdio.h      | 33 ++++++++++++++
 drivers/s390/cio/chsc.c           | 33 ++++++++++++++
 drivers/s390/cio/chsc.h           | 51 +++++++++++++++++++++-
 drivers/s390/cio/qdio_main.c      | 91 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 209 insertions(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 7e1c917bbba2..09d1dd46bd57 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -29,6 +29,8 @@ struct css_general_char {
 	u32 fcx : 1;	 /* bit 88 */
 	u32 : 19;
 	u32 alt_ssi : 1; /* bit 108 */
+	u32:1;
+	u32 narf:1;	 /* bit 110 */
 } __packed;
 
 extern struct css_general_char css_general_characteristics;
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 57d0d7e794b1..0a1abf1e69af 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -378,6 +378,34 @@ struct qdio_initialize {
 	struct qdio_outbuf_state *output_sbal_state_array;
 };
 
+/**
+ * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
+ * @l3_ipv6_addr: entry contains IPv6 address
+ * @l3_ipv4_addr: entry contains IPv4 address
+ * @l2_addr_lnid: entry contains MAC address and VLAN ID
+ */
+enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
+
+/**
+ * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
+ * @nit:  Network interface token
+ * @addr: Address of one of the three types
+ *
+ * The struct is passed to the callback function by qdio_brinfo_desc()
+ */
+struct qdio_brinfo_entry_l3_ipv6 {
+	u64 nit;
+	struct { unsigned char _s6_addr[16]; } addr;
+} __packed;
+struct qdio_brinfo_entry_l3_ipv4 {
+	u64 nit;
+	struct { uint32_t _s_addr; } addr;
+} __packed;
+struct qdio_brinfo_entry_l2 {
+	u64 nit;
+	struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
 #define QDIO_STATE_INACTIVE		0x00000002 /* after qdio_cleanup */
 #define QDIO_STATE_ESTABLISHED		0x00000004 /* after qdio_establish */
 #define QDIO_STATE_ACTIVE		0x00000008 /* after qdio_activate */
@@ -399,5 +427,10 @@ extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
 extern int qdio_shutdown(struct ccw_device *, int);
 extern int qdio_free(struct ccw_device *);
 extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+extern int qdio_pnso_brinfo(struct subchannel_id schid,
+		int cnc, u16 *response,
+		void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+				void *entry),
+		void *priv);
 
 #endif /* __QDIO_H__ */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index eee70cb8730b..f6b9188c5af5 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -55,6 +55,7 @@ int chsc_error_from_response(int response)
 	case 0x0004:
 		return -EOPNOTSUPP;
 	case 0x000b:
+	case 0x0107:		/* "Channel busy" for the op 0x003d */
 		return -EBUSY;
 	case 0x0100:
 	case 0x0102:
@@ -1202,3 +1203,35 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(chsc_scm_info);
+
+/**
+ * chsc_pnso_brinfo() - Perform Network-Subchannel Operation, Bridge Info.
+ * @schid:		id of the subchannel on which PNSO is performed
+ * @brinfo_area:	request and response block for the operation
+ * @resume_token:	resume token for multiblock response
+ * @cnc:		Boolean change-notification control
+ *
+ * brinfo_area must be allocated by the caller with get_zeroed_page(GFP_KERNEL)
+ *
+ * Returns 0 on success.
+ */
+int chsc_pnso_brinfo(struct subchannel_id schid,
+		struct chsc_pnso_area *brinfo_area,
+		struct chsc_brinfo_resume_token resume_token,
+		int cnc)
+{
+	memset(brinfo_area, 0, sizeof(*brinfo_area));
+	brinfo_area->request.length = 0x0030;
+	brinfo_area->request.code = 0x003d; /* network-subchannel operation */
+	brinfo_area->m	   = schid.m;
+	brinfo_area->ssid  = schid.ssid;
+	brinfo_area->sch   = schid.sch_no;
+	brinfo_area->cssid = schid.cssid;
+	brinfo_area->oc    = 0; /* Store-network-bridging-information list */
+	brinfo_area->resume_token = resume_token;
+	brinfo_area->n	   = (cnc != 0);
+	if (chsc(brinfo_area))
+		return -EIO;
+	return chsc_error_from_response(brinfo_area->response.code);
+}
+EXPORT_SYMBOL_GPL(chsc_pnso_brinfo);
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 23d072e70eb2..7e53a9c8b0b9 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -61,7 +61,9 @@ struct css_chsc_char {
 	u32 : 20;
 	u32 scssc : 1;  /* bit 107 */
 	u32 scsscf : 1; /* bit 108 */
-	u32 : 19;
+	u32:7;
+	u32 pnso:1; /* bit 116 */
+	u32:11;
 }__attribute__((packed));
 
 extern struct css_chsc_char css_chsc_characteristics;
@@ -188,6 +190,53 @@ struct chsc_scm_info {
 
 int chsc_scm_info(struct chsc_scm_info *scm_area, u64 token);
 
+struct chsc_brinfo_resume_token {
+	u64 t1;
+	u64 t2;
+} __packed;
+
+struct chsc_brinfo_naihdr {
+	struct chsc_brinfo_resume_token resume_token;
+	u32:32;
+	u32 instance;
+	u32:24;
+	u8 naids;
+	u32 reserved[3];
+} __packed;
+
+struct chsc_pnso_area {
+	struct chsc_header request;
+	u8:2;
+	u8 m:1;
+	u8:5;
+	u8:2;
+	u8 ssid:2;
+	u8 fmt:4;
+	u16 sch;
+	u8:8;
+	u8 cssid;
+	u16:16;
+	u8 oc;
+	u32:24;
+	struct chsc_brinfo_resume_token resume_token;
+	u32 n:1;
+	u32:31;
+	u32 reserved[3];
+	struct chsc_header response;
+	u32:32;
+	struct chsc_brinfo_naihdr naihdr;
+	union {
+		struct qdio_brinfo_entry_l3_ipv6 l3_ipv6[0];
+		struct qdio_brinfo_entry_l3_ipv4 l3_ipv4[0];
+		struct qdio_brinfo_entry_l2	 l2[0];
+	} entries;
+} __packed;
+
+int chsc_pnso_brinfo(struct subchannel_id schid,
+		struct chsc_pnso_area *brinfo_area,
+		struct chsc_brinfo_resume_token resume_token,
+		int cnc);
+
 #ifdef CONFIG_SCM_BUS
 int scm_update_information(void);
 int scm_process_availability_information(void);
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 3e602e8affa7..c883a085c059 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -1752,6 +1752,97 @@ int qdio_stop_irq(struct ccw_device *cdev, int nr)
 }
 EXPORT_SYMBOL(qdio_stop_irq);
 
+/**
+ * qdio_pnso_brinfo() - perform network subchannel op #0 - bridge info.
+ * @schid:		Subchannel ID.
+ * @cnc:		Boolean Change-Notification Control
+ * @response:		Response code will be stored at this address
+ * @cb: 		Callback function will be executed for each element
+ *			of the address list
+ * @priv:		Pointer passed from the caller to qdio_pnso_brinfo()
+ * @type:		Type of the address entry passed to the callback
+ * @entry:		Entry containg the address of the specified type
+ * @priv:		Pointer to pass to the callback function.
+ *
+ * Performs "Store-network-bridging-information list" operation and calls
+ * the callback function for every entry in the list. If "change-
+ * notification-control" is set, further changes in the address list
+ * will be reported via the IPA command.
+ */
+int qdio_pnso_brinfo(struct subchannel_id schid,
+		int cnc, u16 *response,
+		void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+				void *entry),
+		void *priv)
+{
+	struct chsc_pnso_area *rr;
+	int rc;
+	u32 prev_instance = 0;
+	int isfirstblock = 1;
+	int i, size, elems;
+
+	rr = (struct chsc_pnso_area *)get_zeroed_page(GFP_KERNEL);
+	if (rr == NULL)
+		return -ENOMEM;
+	do {
+		/* on the first iteration, naihdr.resume_token will be zero */
+		rc = chsc_pnso_brinfo(schid, rr, rr->naihdr.resume_token, cnc);
+		if (rc != 0 && rc != -EBUSY)
+			goto out;
+		if (rr->response.code != 1) {
+			rc = -EIO;
+			continue;
+		} else
+			rc = 0;
+
+		if (cb == NULL)
+			continue;
+
+		size = rr->naihdr.naids;
+		elems = (rr->response.length -
+				sizeof(struct chsc_header) -
+				sizeof(struct chsc_brinfo_naihdr)) /
+				size;
+
+		if (!isfirstblock && (rr->naihdr.instance != prev_instance)) {
+			/* Inform the caller that they need to scrap */
+			/* the data that was already reported via cb */
+				rc = -EAGAIN;
+				break;
+		}
+		isfirstblock = 0;
+		prev_instance = rr->naihdr.instance;
+		for (i = 0; i < elems; i++)
+			switch (size) {
+			case sizeof(struct qdio_brinfo_entry_l3_ipv6):
+				(*cb)(priv, l3_ipv6_addr,
+						&rr->entries.l3_ipv6[i]);
+				break;
+			case sizeof(struct qdio_brinfo_entry_l3_ipv4):
+				(*cb)(priv, l3_ipv4_addr,
+						&rr->entries.l3_ipv4[i]);
+				break;
+			case sizeof(struct qdio_brinfo_entry_l2):
+				(*cb)(priv, l2_addr_lnid,
+						&rr->entries.l2[i]);
+				break;
+			default:
+				WARN_ON_ONCE(1);
+				rc = -EIO;
+				goto out;
+			}
+	} while (rr->response.code == 0x0107 ||  /* channel busy */
+		  (rr->response.code == 1 && /* list stored */
+		   /* resume token is non-zero => list incomplete */
+		   (rr->naihdr.resume_token.t1 || rr->naihdr.resume_token.t2)));
+	(*response) = rr->response.code;
+
+out:
+	free_page((unsigned long)rr);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(qdio_pnso_brinfo);
+
 static int __init init_QDIO(void)
 {
 	int rc;
-- 
cgit v1.2.3


From b4a960159e6f5254ac3c95dd183789f402431977 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Fri, 13 Dec 2013 12:53:42 +0100
Subject: s390: Fix misspellings using 'codespell' tool

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/qdio.h          | 2 +-
 arch/s390/kernel/entry64.S            | 2 +-
 arch/s390/kernel/setup.c              | 2 +-
 arch/s390/kvm/priv.c                  | 2 +-
 arch/s390/lib/uaccess_pt.c            | 4 ++--
 arch/s390/mm/pgtable.c                | 4 ++--
 drivers/s390/block/dasd.c             | 2 +-
 drivers/s390/crypto/zcrypt_msgtype6.c | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 0a1abf1e69af..d786c634e052 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -336,7 +336,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
 #define QDIO_FLAG_CLEANUP_USING_HALT		0x02
 
 /**
- * struct qdio_initialize - qdio initalization data
+ * struct qdio_initialize - qdio initialization data
  * @cdev: associated ccw device
  * @q_format: queue format
  * @adapter_name: name for the adapter
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9532fe23be47..384e609b4711 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -975,7 +975,7 @@ sie_done:
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 # some program checks are suppressing. C code (e.g. do_protection_exception)
 # will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions beween sie64a and sie_done should not cause program
+# instructions between sie64a and sie_done should not cause program
 # interrupts. So lets use a nop (47 00 00 00) as a landing pad.
 # See also HANDLE_SIE_INTERCEPT
 rewind_pad:
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 4444875266ee..36e81d775031 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -373,7 +373,7 @@ static void __init setup_lowcore(void)
 
 	/*
 	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
-	 * restart data to the absolute zero lowcore. This is necesary if
+	 * restart data to the absolute zero lowcore. This is necessary if
 	 * PSW restart is done on an offline CPU that has lowcore zero.
 	 */
 	lc->restart_stack = (unsigned long) restart_stack;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 2440602e6df1..d101dae62771 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -275,7 +275,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP;
 	} else {
 		/*
-		 * Set condition code 3 to stop the guest from issueing channel
+		 * Set condition code 3 to stop the guest from issuing channel
 		 * I/O instructions.
 		 */
 		kvm_s390_set_psw_cc(vcpu, 3);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index dbdab3e7a1a6..0632dc50da78 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -74,8 +74,8 @@ static size_t copy_in_kernel(size_t count, void __user *to,
 
 /*
  * Returns kernel address for user virtual address. If the returned address is
- * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address
- * contains the (negative) exception code.
+ * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occurred and the
+ * address contains the (negative) exception code.
  */
 #ifdef CONFIG_64BIT
 
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index e794c88f699a..3584ed9b20a1 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -293,7 +293,7 @@ static int gmap_alloc_table(struct gmap *gmap,
  * @addr: address in the guest address space
  * @len: length of the memory area to unmap
  *
- * Returns 0 if the unmap succeded, -EINVAL if not.
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
  */
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 {
@@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
  * @from: source address in the parent address space
  * @to: target address in the guest address space
  *
- * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
  */
 int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		     unsigned long to, unsigned long len)
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index f302efa937ef..1eef0f586950 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3386,7 +3386,7 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
 
 	if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
 		/*
-		 * safe offline allready running
+		 * safe offline already running
 		 * could only be called by normal offline so safe_offline flag
 		 * needs to be removed to run normal offline and kill all I/O
 		 */
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 57bfda1bd71a..dc542e0a3055 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -419,7 +419,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct zcrypt_device *zdev,
 	 replaced by the usage domain for non-management commands only.
 	 Therefore we check the first bit of the 'flags' parameter for
 	 management command indication.
-	   0 - non managment command
+	   0 - non management command
 	   1 - management command
 	*/
 	if (!((msg->cprbx.flags & 0x80) == 0x80)) {
-- 
cgit v1.2.3


From f85168e4d96b31b09ecf09a679820b031224e69e Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner
Date: Wed, 8 Jan 2014 16:45:39 +0100
Subject: s390/cpum_sf: fix printk format warnings

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index a76d602f5928..5d2dfa31c4ef 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -142,17 +142,17 @@ static void print_debug_sf(void)
 	if (qsi(&si))
 		return;
 
-	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%i max=%i cpu_speed=%i\n",
+	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
 		cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
 		si.cpu_speed);
 
 	if (si.as)
 		pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
-			" bsdes=%i tear=%p dear=%p\n", cpu,
+			" bsdes=%i tear=%016lx dear=%016lx\n", cpu,
 			si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
 	if (si.ad)
 		pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
-			" dsdes=%i tear=%p dear=%p\n", cpu,
+			" dsdes=%i tear=%016lx dear=%016lx\n", cpu,
 			si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
 }
 
-- 
cgit v1.2.3