From e545a6140b698b2494daf0b32107bdcc5e901390 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 7 Sep 2008 16:57:22 +0200 Subject: kernel/cpu.c: create a CPU_STARTING cpu_chain notifier Right now, there is no notifier that is called on a new cpu, before the new cpu begins processing interrupts/softirqs. Various kernel function would need that notification, e.g. kvm works around by calling smp_call_function_single(), rcu polls cpu_online_map. The patch adds a CPU_STARTING notification. It also adds a helper function that sends the message to all cpu_chain handlers. Tested on x86-64. All other archs are untested. Especially on sparc, I'm not sure if I got it right. Signed-off-by: Manfred Spraul Signed-off-by: Ingo Molnar --- arch/s390/kernel/smp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 00b9b4dec5eb..9e8b1f9b8f4d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -585,6 +585,8 @@ int __cpuinit start_secondary(void *cpuvoid) /* Enable pfault pseudo page faults on this cpu. */ pfault_init(); + /* call cpu notifiers */ + notify_cpu_starting(smp_processor_id()); /* Mark this cpu as online */ spin_lock(&call_lock); cpu_set(smp_processor_id(), cpu_online_map); -- cgit v1.2.3 From 3d6e48f43340343d97839eadb1ab7b6a3ea98797 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 9 Sep 2008 12:38:56 +0200 Subject: [S390] CVE-2008-1514: prevent ptrace padding area read/write in 31-bit mode When running a 31-bit ptrace, on either an s390 or s390x kernel, reads and writes into a padding area in struct user_regs_struct32 will result in a kernel panic. This is also known as CVE-2008-1514. Test case available here: http://sources.redhat.com/cgi-bin/cvsweb.cgi/~checkout~/tests/ptrace-tests/tests/user-area-padding.c?cvsroot=systemtap Steps to reproduce: 1) wget the above 2) gcc -o user-area-padding-31bit user-area-padding.c -Wall -ggdb2 -D_GNU_SOURCE -m31 3) ./user-area-padding-31bit Test status ----------- Without patch, both s390 and s390x kernels panic. With patch, the test case, as well as the gdb testsuite, pass without incident, padding area reads returning zero, writes ignored. Nb: original version returned -EINVAL on write attempts, which broke the gdb test and made the test case slightly unhappy, Jan Kratochvil suggested the change to return 0 on write attempts. Signed-off-by: Jarod Wilson Tested-by: Jan Kratochvil Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_ptrace.h | 1 + arch/s390/kernel/ptrace.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h index cde81fa64f89..a2be3a978d5c 100644 --- a/arch/s390/kernel/compat_ptrace.h +++ b/arch/s390/kernel/compat_ptrace.h @@ -42,6 +42,7 @@ struct user_regs_struct32 u32 gprs[NUM_GPRS]; u32 acrs[NUM_ACRS]; u32 orig_gpr2; + /* nb: there's a 4-byte hole here */ s390_fp_regs fp_regs; /* * These per registers are in here so that gdb can modify them diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 2815bfe348a6..c8b08289eb87 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -170,6 +170,13 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) */ tmp = (addr_t) task_pt_regs(child)->orig_gpr2; + } else if (addr < (addr_t) &dummy->regs.fp_regs) { + /* + * prevent reads of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + tmp = 0; + } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure @@ -270,6 +277,13 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) */ task_pt_regs(child)->orig_gpr2 = data; + } else if (addr < (addr_t) &dummy->regs.fp_regs) { + /* + * prevent writes of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + return 0; + } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure @@ -428,6 +442,13 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) */ tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); + } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + /* + * prevent reads of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + tmp = 0; + } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure @@ -514,6 +535,13 @@ static int __poke_user_compat(struct task_struct *child, */ *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; + } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + /* + * prevent writess of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + return 0; + } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure -- cgit v1.2.3 From d3d238c7744d08c36a114a59cb537d4c0c6c9a86 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 3 Oct 2008 21:54:59 +0200 Subject: [S390] nohz: Fix __udelay. This fixes a regression that came with 934b2857cc576ae53c92a66e63fce7ddcfa74691 ("[S390] nohz/sclp: disable timer on synchronous waits."). If udelay() gets called from a disabled context it sets the clock comparator to a value where it expects the next interrupt. When the interrupt happens the clock comparator gets not reset and therefore the interrupt condition doesn't get cleared. The result is an endless timer interrupt loop. In addition this patch fixes also the following: rcutorture reveals that our __udelay implementation is still buggy, since it might schedule tasklets, but prevents their execution: NOHZ: local_softirq_pending 42 NOHZ: local_softirq_pending 02 NOHZ: local_softirq_pending 142 NOHZ: local_softirq_pending 02 To fix this we make sure that only the clock comparator interrupt is enabled when the enabled wait psw is loaded. Also no code gets called anymore which might schedule tasklets. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/time.c | 2 ++ arch/s390/lib/delay.c | 88 ++++++++++++++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 34 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index ca114fe46ffb..06acb1a18bbc 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -169,6 +169,8 @@ void init_cpu_timer(void) static void clock_comparator_interrupt(__u16 code) { + if (S390_lowcore.clock_comparator == -1ULL) + set_clock_comparator(S390_lowcore.clock_comparator); } static void etr_timing_alert(struct etr_irq_parm *); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index fc6ab6094df8..0953cee05efc 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -1,14 +1,9 @@ /* - * arch/s390/lib/delay.c * Precise Delay Loops for S390 * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * - * Derived from "arch/i386/lib/delay.c" - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares + * Copyright IBM Corp. 1999,2008 + * Author(s): Martin Schwidefsky , + * Heiko Carstens , */ #include @@ -29,30 +24,31 @@ void __delay(unsigned long loops) asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); } -/* - * Waits for 'usecs' microseconds using the TOD clock comparator. - */ -void __udelay(unsigned long usecs) +static void __udelay_disabled(unsigned long usecs) { - u64 end, time, old_cc = 0; - unsigned long flags, cr0, mask, dummy; - int irq_context; + unsigned long mask, cr0, cr0_saved; + u64 clock_saved; - irq_context = in_interrupt(); - if (!irq_context) - local_bh_disable(); - local_irq_save(flags); - if (raw_irqs_disabled_flags(flags)) { - old_cc = local_tick_disable(); - S390_lowcore.clock_comparator = -1ULL; - __ctl_store(cr0, 0, 0); - dummy = (cr0 & 0xffff00e0) | 0x00000800; - __ctl_load(dummy , 0, 0); - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; - } else - mask = psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_EXT | PSW_MASK_IO; + clock_saved = local_tick_disable(); + set_clock_comparator(get_clock() + ((u64) usecs << 12)); + __ctl_store(cr0_saved, 0, 0); + cr0 = (cr0_saved & 0xffff00e0) | 0x00000800; + __ctl_load(cr0 , 0, 0); + mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + trace_hardirqs_on(); + __load_psw_mask(mask); + local_irq_disable(); + __ctl_load(cr0_saved, 0, 0); + local_tick_enable(clock_saved); + set_clock_comparator(S390_lowcore.clock_comparator); +} +static void __udelay_enabled(unsigned long usecs) +{ + unsigned long mask; + u64 end, time; + + mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO; end = get_clock() + ((u64) usecs << 12); do { time = end < S390_lowcore.clock_comparator ? @@ -62,13 +58,37 @@ void __udelay(unsigned long usecs) __load_psw_mask(mask); local_irq_disable(); } while (get_clock() < end); + set_clock_comparator(S390_lowcore.clock_comparator); +} - if (raw_irqs_disabled_flags(flags)) { - __ctl_load(cr0, 0, 0); - local_tick_enable(old_cc); +/* + * Waits for 'usecs' microseconds using the TOD clock comparator. + */ +void __udelay(unsigned long usecs) +{ + unsigned long flags; + + preempt_disable(); + local_irq_save(flags); + if (in_irq()) { + __udelay_disabled(usecs); + goto out; + } + if (in_softirq()) { + if (raw_irqs_disabled_flags(flags)) + __udelay_disabled(usecs); + else + __udelay_enabled(usecs); + goto out; } - if (!irq_context) + if (raw_irqs_disabled_flags(flags)) { + local_bh_disable(); + __udelay_disabled(usecs); _local_bh_enable(); - set_clock_comparator(S390_lowcore.clock_comparator); + goto out; + } + __udelay_enabled(usecs); +out: local_irq_restore(flags); + preempt_enable(); } -- cgit v1.2.3 From 7a0f475513fa573bc8e072021960313da32f0ee3 Mon Sep 17 00:00:00 2001 From: Klaus-Dieter Wacker Date: Fri, 10 Oct 2008 21:33:18 +0200 Subject: [S390] qdio enhanced SIGA (iqdio) support. Add support for z10 HiperSockets multiwrite SBALs on output queues. This is used on LPAR with EDDP enabled devices. Signed-off-by: Klaus-Dieter Wacker Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/qdio.h | 8 +++++++- drivers/s390/cio/qdio.h | 3 +++ drivers/s390/cio/qdio_main.c | 24 +++++++++++++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 6813772171f2..4734c3f05354 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -299,7 +299,13 @@ struct qdio_ssqd_desc { u8 mbccnt; u16 qdioac2; u64 sch_token; - u64:64; + u8 mro; + u8 mri; + u8:8; + u8 sbalic; + u16:16; + u8:8; + u8 mmwc; } __attribute__ ((packed)); /* params are: ccw_device, qdio_error, queue_number, diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index af867731a5f4..e3ea1d5f2810 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -203,6 +203,9 @@ struct qdio_output_q { /* PCIs are enabled for the queue */ int pci_out_enabled; + /* IQDIO: output multiple buffers (enhanced SIGA) */ + int use_enh_siga; + /* timer to check for more outbound work */ struct timer_list timer; }; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 719066ec0c01..a50682d2a0fa 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -316,6 +316,9 @@ static inline int qdio_do_siga_output(struct qdio_q *q, unsigned int *busy_bit) unsigned int fc = 0; unsigned long schid; + if (q->u.out.use_enh_siga) { + fc = 3; + } if (!is_qebsm(q)) schid = *((u32 *)&q->irq_ptr->schid); else { @@ -1449,6 +1452,8 @@ int qdio_establish(struct qdio_initialize *init_data) } qdio_setup_ssqd_info(irq_ptr); + sprintf(dbf_text, "qDmmwc%2x", irq_ptr->ssqd_desc.mmwc); + QDIO_DBF_TEXT2(0, setup, dbf_text); sprintf(dbf_text, "qib ac%2x", irq_ptr->qib.ac); QDIO_DBF_TEXT2(0, setup, dbf_text); @@ -1621,12 +1626,21 @@ static void handle_outbound(struct qdio_q *q, unsigned int callflags, if (multicast_outbound(q)) qdio_kick_outbound_q(q); else - /* - * One siga-w per buffer required for unicast - * HiperSockets. - */ - while (count--) + if ((q->irq_ptr->ssqd_desc.mmwc > 1) && + (count > 1) && + (count <= q->irq_ptr->ssqd_desc.mmwc)) { + /* exploit enhanced SIGA */ + q->u.out.use_enh_siga = 1; qdio_kick_outbound_q(q); + } else { + /* + * One siga-w per buffer required for unicast + * HiperSockets. + */ + q->u.out.use_enh_siga = 0; + while (count--) + qdio_kick_outbound_q(q); + } goto out; } -- cgit v1.2.3 From d86730bb9597b02bff59a3a5a01c0094d71a265f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 10 Oct 2008 21:33:19 +0200 Subject: [S390] s390: use sys_pause for 31bit pause entry point sys32_pause is a useless copy of the generic sys_pause. (and it's certainly not there for old sparc32 binaries..) Signed-off-by: Christoph Hellwig Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_linux.c | 8 -------- arch/s390/kernel/compat_linux.h | 1 - arch/s390/kernel/compat_wrapper.S | 2 -- arch/s390/kernel/syscalls.S | 2 +- 4 files changed, 1 insertion(+), 12 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index d7f22226fc4e..98e246dc0233 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -608,14 +608,6 @@ asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct time return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } -/* These are here just in case some old sparc32 binary calls it. */ -asmlinkage long sys32_pause(void) -{ - current->state = TASK_INTERRUPTIBLE; - schedule(); - return -ERESTARTNOHAND; -} - asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 poshi, u32 poslo) { diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 20723a062017..05f8516366ab 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -206,7 +206,6 @@ long sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz); long sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz); -long sys32_pause(void); long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 poshi, u32 poslo); long sys32_pwrite64(unsigned int fd, const char __user *ubuf, diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 328a20e880b5..ee51ca9e23b5 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -128,8 +128,6 @@ sys32_alarm_wrapper: llgfr %r2,%r2 # unsigned int jg sys_alarm # branch to system call -#sys32_pause_wrapper # void - .globl compat_sys_utime_wrapper compat_sys_utime_wrapper: llgtr %r2,%r2 # char * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index c66d35e55142..3ae303914b42 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -37,7 +37,7 @@ SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall * SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper) NI_SYSCALL /* old fstat syscall */ -SYSCALL(sys_pause,sys_pause,sys32_pause) +SYSCALL(sys_pause,sys_pause,sys_pause) SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */ NI_SYSCALL /* old stty syscall */ NI_SYSCALL /* old gtty syscall */ -- cgit v1.2.3 From 753c4dd6a2fa2af81f5d809d610d29f2d9dd9bc1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 10 Oct 2008 21:33:20 +0200 Subject: [S390] ptrace changes * System call parameter and result access functions * Add tracehook calls * Split syscall_trace into two functions do_syscall_trace_enter and do_syscall_trace_exit Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/include/asm/ptrace.h | 1 + arch/s390/include/asm/syscall.h | 80 +++++++++++++++++++++++++++++++++++++ arch/s390/include/asm/thread_info.h | 2 + arch/s390/kernel/entry.S | 50 ++++++++++++++++++----- arch/s390/kernel/entry64.S | 42 ++++++++++++++----- arch/s390/kernel/ptrace.c | 61 +++++++++++++++------------- arch/s390/kernel/signal.c | 13 ++++++ 8 files changed, 202 insertions(+), 48 deletions(-) create mode 100644 arch/s390/include/asm/syscall.h (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8d41908e2513..4c03049e7db9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -74,6 +74,7 @@ config S390 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_KVM if 64BIT + select HAVE_ARCH_TRACEHOOK source "init/Kconfig" diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index af2c9ac28a07..a7226f8143fb 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -490,6 +490,7 @@ extern void user_disable_single_step(struct task_struct *); #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) +#define user_stack_pointer(regs)((regs)->gprs[15]) #define regs_return_value(regs)((regs)->gprs[2]) #define profile_pc(regs) instruction_pointer(regs) extern void show_regs(struct pt_regs * regs); diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h new file mode 100644 index 000000000000..6e623971fbb9 --- /dev/null +++ b/arch/s390/include/asm/syscall.h @@ -0,0 +1,80 @@ +/* + * Access to user system call parameters and results + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H 1 + +#include + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + if (regs->trap != __LC_SVC_OLD_PSW) + return -1; + return regs->gprs[2]; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->gprs[2] = regs->orig_gpr2; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->gprs[2]; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->gprs[2] = error ? -error : val; +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(i + n > 6); +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) { + if (i + n == 6) + args[--n] = (u32) regs->args[0]; + while (n-- > 0) + args[n] = (u32) regs->gprs[2 + i + n]; + } +#endif + if (i + n == 6) + args[--n] = regs->args[0]; + memcpy(args, ®s->gprs[2 + i], n * sizeof(args[0])); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + BUG_ON(i + n > 6); + if (i + n == 6) + regs->args[0] = args[--n]; + memcpy(®s->gprs[2 + i], args, n * sizeof(args[0])); +} + +#endif /* _ASM_SYSCALL_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 91a8f93ad355..ea40a9d690fc 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -86,6 +86,7 @@ static inline struct thread_info *current_thread_info(void) * thread information flags bit numbers */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTART_SVC 4 /* restart svc with new svc number */ @@ -100,6 +101,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */ #define _TIF_SYSCALL_TRACE (1< #include #include +#include #include #include @@ -639,40 +640,44 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } #endif -asmlinkage void -syscall_trace(struct pt_regs *regs, int entryexit) +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { - if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); - - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - goto out; - if (!(current->ptrace & PT_PTRACED)) - goto out; - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); + long ret; /* - * If the debuffer has set an invalid system call number, - * we prepare to skip the system call restart handling. + * The sysc_tracesys code in entry.S stored the system + * call number to gprs[2]. */ - if (!entryexit && regs->gprs[2] >= NR_syscalls) + ret = regs->gprs[2]; + if (test_thread_flag(TIF_SYSCALL_TRACE) && + (tracehook_report_syscall_entry(regs) || + regs->gprs[2] >= NR_syscalls)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ regs->trap = -1; - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; + ret = -1; } - out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X, - regs->gprs[2], regs->orig_gpr2, regs->gprs[3], - regs->gprs[4], regs->gprs[5]); + + if (unlikely(current->audit_context)) + audit_syscall_entry(test_thread_flag(TIF_31BIT) ? + AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, + regs->gprs[2], regs->orig_gpr2, + regs->gprs[3], regs->gprs[4], + regs->gprs[5]); + return ret; +} + +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) +{ + if (unlikely(current->audit_context)) + audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), + regs->gprs[2]); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); } /* diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index b97682040215..4f7fc3059a8e 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -507,6 +508,12 @@ void do_signal(struct pt_regs *regs) */ if (current->thread.per_info.single_step) set_thread_flag(TIF_SINGLE_STEP); + + /* + * Let tracing know that we've done the handler setup. + */ + tracehook_signal_handler(signr, &info, &ka, regs, + test_thread_flag(TIF_SINGLE_STEP)); } return; } @@ -526,3 +533,9 @@ void do_signal(struct pt_regs *regs) set_thread_flag(TIF_RESTART_SVC); } } + +void do_notify_resume(struct pt_regs *regs) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); +} -- cgit v1.2.3 From b2300b9efe1b8174833e17f37e975c9da00c388a Mon Sep 17 00:00:00 2001 From: Hongjie Yang Date: Fri, 10 Oct 2008 21:33:21 +0200 Subject: [S390] dcssblk: add >2G DCSSs support and stacked contiguous DCSSs support. The DCSS block device driver is modified to add >2G DCSSs support and allow a DCSS block device to map to a set of contiguous DCSSs. The extmem code is also modified to use new Diagnose x'64' subcodes for >2G DCSSs. Signed-off-by: Hongjie Yang Signed-off-by: Martin Schwidefsky --- arch/s390/mm/extmem.c | 251 +++++++++++++++++---- drivers/s390/block/dcssblk.c | 515 ++++++++++++++++++++++++++++++++----------- 2 files changed, 596 insertions(+), 170 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index f231f5ec74b6..580fc64cc735 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -43,20 +43,40 @@ #define DCSS_FINDSEG 0x0c #define DCSS_LOADNOLY 0x10 #define DCSS_SEGEXT 0x18 +#define DCSS_LOADSHRX 0x20 +#define DCSS_LOADNSRX 0x24 +#define DCSS_FINDSEGX 0x2c +#define DCSS_SEGEXTX 0x38 #define DCSS_FINDSEGA 0x0c struct qrange { - unsigned int start; // 3byte start address, 1 byte type - unsigned int end; // 3byte end address, 1 byte reserved + unsigned long start; /* last byte type */ + unsigned long end; /* last byte reserved */ }; struct qout64 { + unsigned long segstart; + unsigned long segend; + int segcnt; + int segrcnt; + struct qrange range[6]; +}; + +#ifdef CONFIG_64BIT +struct qrange_old { + unsigned int start; /* last byte type */ + unsigned int end; /* last byte reserved */ +}; + +/* output area format for the Diag x'64' old subcode x'18' */ +struct qout64_old { int segstart; int segend; int segcnt; int segrcnt; - struct qrange range[6]; + struct qrange_old range[6]; }; +#endif struct qin64 { char qopcode; @@ -86,6 +106,55 @@ static DEFINE_MUTEX(dcss_lock); static LIST_HEAD(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; +static int loadshr_scode, loadnsr_scode, findseg_scode; +static int segext_scode, purgeseg_scode; +static int scode_set; + +/* set correct Diag x'64' subcodes. */ +static int +dcss_set_subcodes(void) +{ +#ifdef CONFIG_64BIT + char *name = kmalloc(8 * sizeof(char), GFP_DMA); + unsigned long rx, ry; + int rc; + + if (name == NULL) + return -ENOMEM; + + rx = (unsigned long) name; + ry = DCSS_FINDSEGX; + + strcpy(name, "dummy"); + asm volatile( + " diag %0,%1,0x64\n" + "0: ipm %2\n" + " srl %2,28\n" + " j 2f\n" + "1: la %2,3\n" + "2:\n" + EX_TABLE(0b, 1b) + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + + kfree(name); + /* Diag x'64' new subcodes are supported, set to new subcodes */ + if (rc != 3) { + loadshr_scode = DCSS_LOADSHRX; + loadnsr_scode = DCSS_LOADNSRX; + purgeseg_scode = DCSS_PURGESEG; + findseg_scode = DCSS_FINDSEGX; + segext_scode = DCSS_SEGEXTX; + return 0; + } +#endif + /* Diag x'64' new subcodes are not supported, set to old subcodes */ + loadshr_scode = DCSS_LOADNOLY; + loadnsr_scode = DCSS_LOADNSR; + purgeseg_scode = DCSS_PURGESEG; + findseg_scode = DCSS_FINDSEG; + segext_scode = DCSS_SEGEXT; + return 0; +} /* * Create the 8 bytes, ebcdic VM segment name from @@ -135,25 +204,45 @@ segment_by_name (char *name) * Perform a function on a dcss segment. */ static inline int -dcss_diag (__u8 func, void *parameter, +dcss_diag(int *func, void *parameter, unsigned long *ret1, unsigned long *ret2) { unsigned long rx, ry; int rc; + if (scode_set == 0) { + rc = dcss_set_subcodes(); + if (rc < 0) + return rc; + scode_set = 1; + } rx = (unsigned long) parameter; - ry = (unsigned long) func; - asm volatile( + ry = (unsigned long) *func; + #ifdef CONFIG_64BIT - " sam31\n" - " diag %0,%1,0x64\n" - " sam64\n" + /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */ + if (*func > DCSS_SEGEXT) + asm volatile( + " diag %0,%1,0x64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + /* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */ + else + asm volatile( + " sam31\n" + " diag %0,%1,0x64\n" + " sam64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); #else + asm volatile( " diag %0,%1,0x64\n" -#endif " ipm %2\n" " srl %2,28\n" : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); +#endif *ret1 = rx; *ret2 = ry; return rc; @@ -190,14 +279,45 @@ query_segment_type (struct dcss_segment *seg) qin->qoutlen = sizeof(struct qout64); memcpy (qin->qname, seg->dcss_name, 8); - diag_cc = dcss_diag (DCSS_SEGEXT, qin, &dummy, &vmrc); + diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc); + if (diag_cc < 0) { + rc = diag_cc; + goto out_free; + } if (diag_cc > 1) { PRINT_WARN ("segment_type: diag returned error %ld\n", vmrc); rc = dcss_diag_translate_rc (vmrc); goto out_free; } +#ifdef CONFIG_64BIT + /* Only old format of output area of Diagnose x'64' is supported, + copy data for the new format. */ + if (segext_scode == DCSS_SEGEXT) { + struct qout64_old *qout_old; + qout_old = kzalloc(sizeof(struct qout64_old), GFP_DMA); + if (qout_old == NULL) { + rc = -ENOMEM; + goto out_free; + } + memcpy(qout_old, qout, sizeof(struct qout64_old)); + qout->segstart = (unsigned long) qout_old->segstart; + qout->segend = (unsigned long) qout_old->segend; + qout->segcnt = qout_old->segcnt; + qout->segrcnt = qout_old->segrcnt; + + if (qout->segcnt > 6) + qout->segrcnt = 6; + for (i = 0; i < qout->segrcnt; i++) { + qout->range[i].start = + (unsigned long) qout_old->range[i].start; + qout->range[i].end = + (unsigned long) qout_old->range[i].end; + } + kfree(qout_old); + } +#endif if (qout->segcnt > 6) { rc = -ENOTSUPP; goto out_free; @@ -268,6 +388,30 @@ segment_type (char* name) return seg.vm_segtype; } +/* + * check if segment collides with other segments that are currently loaded + * returns 1 if this is the case, 0 if no collision was found + */ +static int +segment_overlaps_others (struct dcss_segment *seg) +{ + struct list_head *l; + struct dcss_segment *tmp; + + BUG_ON(!mutex_is_locked(&dcss_lock)); + list_for_each(l, &dcss_list) { + tmp = list_entry(l, struct dcss_segment, list); + if ((tmp->start_addr >> 20) > (seg->end >> 20)) + continue; + if ((tmp->end >> 20) < (seg->start_addr >> 20)) + continue; + if (seg == tmp) + continue; + return 1; + } + return 0; +} + /* * real segment loading function, called from segment_load */ @@ -276,7 +420,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long { struct dcss_segment *seg = kmalloc(sizeof(struct dcss_segment), GFP_DMA); - int dcss_command, rc, diag_cc; + int rc, diag_cc; + unsigned long start_addr, end_addr, dummy; if (seg == NULL) { rc = -ENOMEM; @@ -287,6 +432,13 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long if (rc < 0) goto out_free; + if (loadshr_scode == DCSS_LOADSHRX) { + if (segment_overlaps_others(seg)) { + rc = -EBUSY; + goto out_free; + } + } + rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); if (rc) @@ -316,20 +468,28 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long } if (do_nonshared) - dcss_command = DCSS_LOADNSR; + diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, + &start_addr, &end_addr); else - dcss_command = DCSS_LOADNOLY; - - diag_cc = dcss_diag(dcss_command, seg->dcss_name, - &seg->start_addr, &seg->end); + diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name, + &start_addr, &end_addr); + if (diag_cc < 0) { + dcss_diag(&purgeseg_scode, seg->dcss_name, + &dummy, &dummy); + rc = diag_cc; + goto out_resource; + } if (diag_cc > 1) { PRINT_WARN ("segment_load: could not load segment %s - " - "diag returned error (%ld)\n",name,seg->end); - rc = dcss_diag_translate_rc (seg->end); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, - &seg->start_addr, &seg->end); + "diag returned error (%ld)\n", + name, end_addr); + rc = dcss_diag_translate_rc(end_addr); + dcss_diag(&purgeseg_scode, seg->dcss_name, + &dummy, &dummy); goto out_resource; } + seg->start_addr = start_addr; + seg->end = end_addr; seg->do_nonshared = do_nonshared; atomic_set(&seg->ref_count, 1); list_add(&seg->list, &dcss_list); @@ -423,8 +583,8 @@ int segment_modify_shared (char *name, int do_nonshared) { struct dcss_segment *seg; - unsigned long dummy; - int dcss_command, rc, diag_cc; + unsigned long start_addr, end_addr, dummy; + int rc, diag_cc; mutex_lock(&dcss_lock); seg = segment_by_name (name); @@ -445,38 +605,51 @@ segment_modify_shared (char *name, int do_nonshared) goto out_unlock; } release_resource(seg->res); - if (do_nonshared) { - dcss_command = DCSS_LOADNSR; + if (do_nonshared) seg->res->flags &= ~IORESOURCE_READONLY; - } else { - dcss_command = DCSS_LOADNOLY; + else if (seg->vm_segtype == SEG_TYPE_SR || seg->vm_segtype == SEG_TYPE_ER) seg->res->flags |= IORESOURCE_READONLY; - } + if (request_resource(&iomem_resource, seg->res)) { PRINT_WARN("segment_modify_shared: could not reload segment %s" " - overlapping resources\n", name); rc = -EBUSY; kfree(seg->res); - goto out_del; + goto out_del_mem; + } + + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + if (do_nonshared) + diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, + &start_addr, &end_addr); + else + diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name, + &start_addr, &end_addr); + if (diag_cc < 0) { + rc = diag_cc; + goto out_del_res; } - dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); - diag_cc = dcss_diag(dcss_command, seg->dcss_name, - &seg->start_addr, &seg->end); if (diag_cc > 1) { PRINT_WARN ("segment_modify_shared: could not reload segment %s" - " - diag returned error (%ld)\n",name,seg->end); - rc = dcss_diag_translate_rc (seg->end); - goto out_del; + " - diag returned error (%ld)\n", + name, end_addr); + rc = dcss_diag_translate_rc(end_addr); + goto out_del_res; } + seg->start_addr = start_addr; + seg->end = end_addr; seg->do_nonshared = do_nonshared; rc = 0; goto out_unlock; - out_del: + out_del_res: + release_resource(seg->res); + kfree(seg->res); + out_del_mem: vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); kfree(seg); out_unlock: mutex_unlock(&dcss_lock); @@ -510,7 +683,7 @@ segment_unload(char *name) kfree(seg->res); vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); kfree(seg); out_unlock: mutex_unlock(&dcss_lock); @@ -545,7 +718,7 @@ segment_save(char *name) endpfn = (seg->end) >> PAGE_SHIFT; sprintf(cmd1, "DEFSEG %s", name); for (i=0; isegcnt; i++) { - sprintf(cmd1+strlen(cmd1), " %X-%X %s", + sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", seg->range[i].start >> PAGE_SHIFT, seg->range[i].end >> PAGE_SHIFT, segtype_string[seg->range[i].start & 0xff]); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index ea4272c8c677..a7ff167d5b81 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -31,7 +31,6 @@ #define PRINT_WARN(x...) printk(KERN_WARNING DCSSBLK_NAME " warning: " x) #define PRINT_ERR(x...) printk(KERN_ERR DCSSBLK_NAME " error: " x) - static int dcssblk_open(struct inode *inode, struct file *filp); static int dcssblk_release(struct inode *inode, struct file *filp); static int dcssblk_make_request(struct request_queue *q, struct bio *bio); @@ -48,6 +47,30 @@ static struct block_device_operations dcssblk_devops = { .direct_access = dcssblk_direct_access, }; +struct dcssblk_dev_info { + struct list_head lh; + struct device dev; + char segment_name[BUS_ID_SIZE]; + atomic_t use_count; + struct gendisk *gd; + unsigned long start; + unsigned long end; + int segment_type; + unsigned char save_pending; + unsigned char is_shared; + struct request_queue *dcssblk_queue; + int num_of_segments; + struct list_head seg_list; +}; + +struct segment_info { + struct list_head lh; + char segment_name[BUS_ID_SIZE]; + unsigned long start; + unsigned long end; + int segment_type; +}; + static ssize_t dcssblk_add_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t count); static ssize_t dcssblk_remove_store(struct device * dev, struct device_attribute *attr, const char * buf, @@ -58,30 +81,20 @@ static ssize_t dcssblk_save_show(struct device *dev, struct device_attribute *at static ssize_t dcssblk_shared_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t count); static ssize_t dcssblk_shared_show(struct device *dev, struct device_attribute *attr, char *buf); +static ssize_t dcssblk_seglist_show(struct device *dev, + struct device_attribute *attr, + char *buf); static DEVICE_ATTR(add, S_IWUSR, NULL, dcssblk_add_store); static DEVICE_ATTR(remove, S_IWUSR, NULL, dcssblk_remove_store); -static DEVICE_ATTR(save, S_IWUSR | S_IRUGO, dcssblk_save_show, +static DEVICE_ATTR(save, S_IWUSR | S_IRUSR, dcssblk_save_show, dcssblk_save_store); -static DEVICE_ATTR(shared, S_IWUSR | S_IRUGO, dcssblk_shared_show, +static DEVICE_ATTR(shared, S_IWUSR | S_IRUSR, dcssblk_shared_show, dcssblk_shared_store); +static DEVICE_ATTR(seglist, S_IRUSR, dcssblk_seglist_show, NULL); static struct device *dcssblk_root_dev; -struct dcssblk_dev_info { - struct list_head lh; - struct device dev; - char segment_name[BUS_ID_SIZE]; - atomic_t use_count; - struct gendisk *gd; - unsigned long start; - unsigned long end; - int segment_type; - unsigned char save_pending; - unsigned char is_shared; - struct request_queue *dcssblk_queue; -}; - static LIST_HEAD(dcssblk_devices); static struct rw_semaphore dcssblk_devices_sem; @@ -91,8 +104,15 @@ static struct rw_semaphore dcssblk_devices_sem; static void dcssblk_release_segment(struct device *dev) { - PRINT_DEBUG("segment release fn called for %s\n", dev_name(dev)); - kfree(container_of(dev, struct dcssblk_dev_info, dev)); + struct dcssblk_dev_info *dev_info; + struct segment_info *entry, *temp; + + dev_info = container_of(dev, struct dcssblk_dev_info, dev); + list_for_each_entry_safe(entry, temp, &dev_info->seg_list, lh) { + list_del(&entry->lh); + kfree(entry); + } + kfree(dev_info); module_put(THIS_MODULE); } @@ -142,6 +162,169 @@ dcssblk_get_device_by_name(char *name) return NULL; } +/* + * get the struct segment_info from seg_list + * for the given name. + * down_read(&dcssblk_devices_sem) must be held. + */ +static struct segment_info * +dcssblk_get_segment_by_name(char *name) +{ + struct dcssblk_dev_info *dev_info; + struct segment_info *entry; + + list_for_each_entry(dev_info, &dcssblk_devices, lh) { + list_for_each_entry(entry, &dev_info->seg_list, lh) { + if (!strcmp(name, entry->segment_name)) + return entry; + } + } + return NULL; +} + +/* + * get the highest address of the multi-segment block. + */ +static unsigned long +dcssblk_find_highest_addr(struct dcssblk_dev_info *dev_info) +{ + unsigned long highest_addr; + struct segment_info *entry; + + highest_addr = 0; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + if (highest_addr < entry->end) + highest_addr = entry->end; + } + return highest_addr; +} + +/* + * get the lowest address of the multi-segment block. + */ +static unsigned long +dcssblk_find_lowest_addr(struct dcssblk_dev_info *dev_info) +{ + int set_first; + unsigned long lowest_addr; + struct segment_info *entry; + + set_first = 0; + lowest_addr = 0; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + if (set_first == 0) { + lowest_addr = entry->start; + set_first = 1; + } else { + if (lowest_addr > entry->start) + lowest_addr = entry->start; + } + } + return lowest_addr; +} + +/* + * Check continuity of segments. + */ +static int +dcssblk_is_continuous(struct dcssblk_dev_info *dev_info) +{ + int i, j, rc; + struct segment_info *sort_list, *entry, temp; + + if (dev_info->num_of_segments <= 1) + return 0; + + sort_list = kzalloc( + sizeof(struct segment_info) * dev_info->num_of_segments, + GFP_KERNEL); + if (sort_list == NULL) + return -ENOMEM; + i = 0; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + memcpy(&sort_list[i], entry, sizeof(struct segment_info)); + i++; + } + + /* sort segments */ + for (i = 0; i < dev_info->num_of_segments; i++) + for (j = 0; j < dev_info->num_of_segments; j++) + if (sort_list[j].start > sort_list[i].start) { + memcpy(&temp, &sort_list[i], + sizeof(struct segment_info)); + memcpy(&sort_list[i], &sort_list[j], + sizeof(struct segment_info)); + memcpy(&sort_list[j], &temp, + sizeof(struct segment_info)); + } + + /* check continuity */ + for (i = 0; i < dev_info->num_of_segments - 1; i++) { + if ((sort_list[i].end + 1) != sort_list[i+1].start) { + PRINT_ERR("Segment %s is not contiguous with " + "segment %s\n", + sort_list[i].segment_name, + sort_list[i+1].segment_name); + rc = -EINVAL; + goto out; + } + /* EN and EW are allowed in a block device */ + if (sort_list[i].segment_type != sort_list[i+1].segment_type) { + if (!(sort_list[i].segment_type & SEGMENT_EXCLUSIVE) || + (sort_list[i].segment_type == SEG_TYPE_ER) || + !(sort_list[i+1].segment_type & + SEGMENT_EXCLUSIVE) || + (sort_list[i+1].segment_type == SEG_TYPE_ER)) { + PRINT_ERR("Segment %s has different type from " + "segment %s\n", + sort_list[i].segment_name, + sort_list[i+1].segment_name); + rc = -EINVAL; + goto out; + } + } + } + rc = 0; +out: + kfree(sort_list); + return rc; +} + +/* + * Load a segment + */ +static int +dcssblk_load_segment(char *name, struct segment_info **seg_info) +{ + int rc; + + /* already loaded? */ + down_read(&dcssblk_devices_sem); + *seg_info = dcssblk_get_segment_by_name(name); + up_read(&dcssblk_devices_sem); + if (*seg_info != NULL) + return -EEXIST; + + /* get a struct segment_info */ + *seg_info = kzalloc(sizeof(struct segment_info), GFP_KERNEL); + if (*seg_info == NULL) + return -ENOMEM; + + strcpy((*seg_info)->segment_name, name); + + /* load the segment */ + rc = segment_load(name, SEGMENT_SHARED, + &(*seg_info)->start, &(*seg_info)->end); + if (rc < 0) { + segment_warning(rc, (*seg_info)->segment_name); + kfree(*seg_info); + } else { + INIT_LIST_HEAD(&(*seg_info)->lh); + (*seg_info)->segment_type = rc; + } + return rc; +} + static void dcssblk_unregister_callback(struct device *dev) { device_unregister(dev); @@ -165,6 +348,7 @@ static ssize_t dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const char *inbuf, size_t count) { struct dcssblk_dev_info *dev_info; + struct segment_info *entry, *temp; int rc; if ((count > 1) && (inbuf[1] != '\n') && (inbuf[1] != '\0')) @@ -172,46 +356,46 @@ dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const ch down_write(&dcssblk_devices_sem); dev_info = container_of(dev, struct dcssblk_dev_info, dev); if (atomic_read(&dev_info->use_count)) { - PRINT_ERR("share: segment %s is busy!\n", - dev_info->segment_name); rc = -EBUSY; goto out; } if (inbuf[0] == '1') { - // reload segment in shared mode - rc = segment_modify_shared(dev_info->segment_name, - SEGMENT_SHARED); - if (rc < 0) { - BUG_ON(rc == -EINVAL); - if (rc != -EAGAIN) - goto removeseg; - } else { - dev_info->is_shared = 1; - switch (dev_info->segment_type) { - case SEG_TYPE_SR: - case SEG_TYPE_ER: - case SEG_TYPE_SC: - set_disk_ro(dev_info->gd,1); + /* reload segments in shared mode */ + list_for_each_entry(entry, &dev_info->seg_list, lh) { + rc = segment_modify_shared(entry->segment_name, + SEGMENT_SHARED); + if (rc < 0) { + BUG_ON(rc == -EINVAL); + if (rc != -EAGAIN) + goto removeseg; } } + dev_info->is_shared = 1; + switch (dev_info->segment_type) { + case SEG_TYPE_SR: + case SEG_TYPE_ER: + case SEG_TYPE_SC: + set_disk_ro(dev_info->gd, 1); + } } else if (inbuf[0] == '0') { - // reload segment in exclusive mode + /* reload segments in exclusive mode */ if (dev_info->segment_type == SEG_TYPE_SC) { PRINT_ERR("Segment type SC (%s) cannot be loaded in " - "non-shared mode\n", dev_info->segment_name); + "non-shared mode\n", dev_info->segment_name); rc = -EINVAL; goto out; } - rc = segment_modify_shared(dev_info->segment_name, - SEGMENT_EXCLUSIVE); - if (rc < 0) { - BUG_ON(rc == -EINVAL); - if (rc != -EAGAIN) - goto removeseg; - } else { - dev_info->is_shared = 0; - set_disk_ro(dev_info->gd, 0); + list_for_each_entry(entry, &dev_info->seg_list, lh) { + rc = segment_modify_shared(entry->segment_name, + SEGMENT_EXCLUSIVE); + if (rc < 0) { + BUG_ON(rc == -EINVAL); + if (rc != -EAGAIN) + goto removeseg; + } } + dev_info->is_shared = 0; + set_disk_ro(dev_info->gd, 0); } else { rc = -EINVAL; goto out; @@ -220,8 +404,14 @@ dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const ch goto out; removeseg: - PRINT_ERR("Could not reload segment %s, removing it now!\n", - dev_info->segment_name); + PRINT_ERR("Could not reload segment(s) of the device %s, removing " + "segment(s) now!\n", + dev_info->segment_name); + temp = entry; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + if (entry != temp) + segment_unload(entry->segment_name); + } list_del(&dev_info->lh); del_gendisk(dev_info->gd); @@ -254,6 +444,7 @@ static ssize_t dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char *inbuf, size_t count) { struct dcssblk_dev_info *dev_info; + struct segment_info *entry; if ((count > 1) && (inbuf[1] != '\n') && (inbuf[1] != '\0')) return -EINVAL; @@ -263,14 +454,16 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char if (inbuf[0] == '1') { if (atomic_read(&dev_info->use_count) == 0) { // device is idle => we save immediately - PRINT_INFO("Saving segment %s\n", + PRINT_INFO("Saving segment(s) of the device %s\n", dev_info->segment_name); - segment_save(dev_info->segment_name); + list_for_each_entry(entry, &dev_info->seg_list, lh) { + segment_save(entry->segment_name); + } } else { // device is busy => we save it when it becomes // idle in dcssblk_release - PRINT_INFO("Segment %s is currently busy, it will " - "be saved when it becomes idle...\n", + PRINT_INFO("Device %s is currently busy, segment(s) " + "will be saved when it becomes idle...\n", dev_info->segment_name); dev_info->save_pending = 1; } @@ -279,7 +472,8 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char // device is busy & the user wants to undo his save // request dev_info->save_pending = 0; - PRINT_INFO("Pending save for segment %s deactivated\n", + PRINT_INFO("Pending save for segment(s) of the device " + "%s deactivated\n", dev_info->segment_name); } } else { @@ -290,67 +484,124 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char return count; } +/* + * device attribute for showing all segments in a device + */ +static ssize_t +dcssblk_seglist_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int i; + + struct dcssblk_dev_info *dev_info; + struct segment_info *entry; + + down_read(&dcssblk_devices_sem); + dev_info = container_of(dev, struct dcssblk_dev_info, dev); + i = 0; + buf[0] = '\0'; + list_for_each_entry(entry, &dev_info->seg_list, lh) { + strcpy(&buf[i], entry->segment_name); + i += strlen(entry->segment_name); + buf[i] = '\n'; + i++; + } + up_read(&dcssblk_devices_sem); + return i; +} + /* * device attribute for adding devices */ static ssize_t dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int rc, i; + int rc, i, j, num_of_segments; struct dcssblk_dev_info *dev_info; + struct segment_info *seg_info, *temp; char *local_buf; unsigned long seg_byte_size; dev_info = NULL; + seg_info = NULL; if (dev != dcssblk_root_dev) { rc = -EINVAL; goto out_nobuf; } + if ((count < 1) || (buf[0] == '\0') || (buf[0] == '\n')) { + rc = -ENAMETOOLONG; + goto out_nobuf; + } + local_buf = kmalloc(count + 1, GFP_KERNEL); if (local_buf == NULL) { rc = -ENOMEM; goto out_nobuf; } + /* * parse input */ + num_of_segments = 0; for (i = 0; ((buf[i] != '\0') && (buf[i] != '\n') && i < count); i++) { - local_buf[i] = toupper(buf[i]); + for (j = i; (buf[j] != ':') && + (buf[j] != '\0') && + (buf[j] != '\n') && + j < count; j++) { + local_buf[j-i] = toupper(buf[j]); + } + local_buf[j-i] = '\0'; + if (((j - i) == 0) || ((j - i) > 8)) { + rc = -ENAMETOOLONG; + goto seg_list_del; + } + + rc = dcssblk_load_segment(local_buf, &seg_info); + if (rc < 0) + goto seg_list_del; + /* + * get a struct dcssblk_dev_info + */ + if (num_of_segments == 0) { + dev_info = kzalloc(sizeof(struct dcssblk_dev_info), + GFP_KERNEL); + if (dev_info == NULL) { + rc = -ENOMEM; + goto out; + } + strcpy(dev_info->segment_name, local_buf); + dev_info->segment_type = seg_info->segment_type; + INIT_LIST_HEAD(&dev_info->seg_list); + } + list_add_tail(&seg_info->lh, &dev_info->seg_list); + num_of_segments++; + i = j; + + if ((buf[j] == '\0') || (buf[j] == '\n')) + break; } - local_buf[i] = '\0'; - if ((i == 0) || (i > 8)) { + + /* no trailing colon at the end of the input */ + if ((i > 0) && (buf[i-1] == ':')) { rc = -ENAMETOOLONG; - goto out; - } - /* - * already loaded? - */ - down_read(&dcssblk_devices_sem); - dev_info = dcssblk_get_device_by_name(local_buf); - up_read(&dcssblk_devices_sem); - if (dev_info != NULL) { - PRINT_WARN("Segment %s already loaded!\n", local_buf); - rc = -EEXIST; - goto out; - } - /* - * get a struct dcssblk_dev_info - */ - dev_info = kzalloc(sizeof(struct dcssblk_dev_info), GFP_KERNEL); - if (dev_info == NULL) { - rc = -ENOMEM; - goto out; + goto seg_list_del; } + strlcpy(local_buf, buf, i + 1); + dev_info->num_of_segments = num_of_segments; + rc = dcssblk_is_continuous(dev_info); + if (rc < 0) + goto seg_list_del; + + dev_info->start = dcssblk_find_lowest_addr(dev_info); + dev_info->end = dcssblk_find_highest_addr(dev_info); - strcpy(dev_info->segment_name, local_buf); - dev_set_name(&dev_info->dev, local_buf); + dev_set_name(&dev_info->dev, dev_info->segment_name); dev_info->dev.release = dcssblk_release_segment; INIT_LIST_HEAD(&dev_info->lh); - dev_info->gd = alloc_disk(DCSSBLK_MINORS_PER_DISK); if (dev_info->gd == NULL) { rc = -ENOMEM; - goto free_dev_info; + goto seg_list_del; } dev_info->gd->major = dcssblk_major; dev_info->gd->fops = &dcssblk_devops; @@ -360,65 +611,52 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->gd->driverfs_dev = &dev_info->dev; blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request); blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096); - /* - * load the segment - */ - rc = segment_load(local_buf, SEGMENT_SHARED, - &dev_info->start, &dev_info->end); - if (rc < 0) { - segment_warning(rc, dev_info->segment_name); - goto dealloc_gendisk; - } + seg_byte_size = (dev_info->end - dev_info->start + 1); set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors - PRINT_INFO("Loaded segment %s, size = %lu Byte, " + PRINT_INFO("Loaded segment(s) %s, size = %lu Byte, " "capacity = %lu (512 Byte) sectors\n", local_buf, seg_byte_size, seg_byte_size >> 9); - dev_info->segment_type = rc; dev_info->save_pending = 0; dev_info->is_shared = 1; dev_info->dev.parent = dcssblk_root_dev; /* - * get minor, add to list + *get minor, add to list */ down_write(&dcssblk_devices_sem); - if (dcssblk_get_device_by_name(local_buf)) { - up_write(&dcssblk_devices_sem); + if (dcssblk_get_segment_by_name(local_buf)) { rc = -EEXIST; - goto unload_seg; + goto release_gd; } rc = dcssblk_assign_free_minor(dev_info); - if (rc) { - up_write(&dcssblk_devices_sem); - PRINT_ERR("No free minor number available! " - "Unloading segment...\n"); - goto unload_seg; - } + if (rc) + goto release_gd; sprintf(dev_info->gd->disk_name, "dcssblk%d", MINOR(disk_devt(dev_info->gd))); list_add_tail(&dev_info->lh, &dcssblk_devices); if (!try_module_get(THIS_MODULE)) { rc = -ENODEV; - goto list_del; + goto dev_list_del; } /* * register the device */ rc = device_register(&dev_info->dev); if (rc) { - PRINT_ERR("Segment %s could not be registered RC=%d\n", - local_buf, rc); module_put(THIS_MODULE); - goto list_del; + goto dev_list_del; } get_device(&dev_info->dev); rc = device_create_file(&dev_info->dev, &dev_attr_shared); if (rc) goto unregister_dev; rc = device_create_file(&dev_info->dev, &dev_attr_save); + if (rc) + goto unregister_dev; + rc = device_create_file(&dev_info->dev, &dev_attr_seglist); if (rc) goto unregister_dev; @@ -434,7 +672,6 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char set_disk_ro(dev_info->gd,0); break; } - PRINT_DEBUG("Segment %s loaded successfully\n", local_buf); up_write(&dcssblk_devices_sem); rc = count; goto out; @@ -445,20 +682,27 @@ unregister_dev: dev_info->gd->queue = NULL; put_disk(dev_info->gd); device_unregister(&dev_info->dev); - segment_unload(dev_info->segment_name); + list_for_each_entry(seg_info, &dev_info->seg_list, lh) { + segment_unload(seg_info->segment_name); + } put_device(&dev_info->dev); up_write(&dcssblk_devices_sem); goto out; -list_del: +dev_list_del: list_del(&dev_info->lh); - up_write(&dcssblk_devices_sem); -unload_seg: - segment_unload(local_buf); -dealloc_gendisk: +release_gd: blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); -free_dev_info: + up_write(&dcssblk_devices_sem); +seg_list_del: + if (dev_info == NULL) + goto out; + list_for_each_entry_safe(seg_info, temp, &dev_info->seg_list, lh) { + list_del(&seg_info->lh); + segment_unload(seg_info->segment_name); + kfree(seg_info); + } kfree(dev_info); out: kfree(local_buf); @@ -473,6 +717,7 @@ static ssize_t dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct dcssblk_dev_info *dev_info; + struct segment_info *entry; int rc, i; char *local_buf; @@ -499,26 +744,28 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch dev_info = dcssblk_get_device_by_name(local_buf); if (dev_info == NULL) { up_write(&dcssblk_devices_sem); - PRINT_WARN("Segment %s is not loaded!\n", local_buf); + PRINT_WARN("Device %s is not loaded!\n", local_buf); rc = -ENODEV; goto out_buf; } if (atomic_read(&dev_info->use_count) != 0) { up_write(&dcssblk_devices_sem); - PRINT_WARN("Segment %s is in use!\n", local_buf); + PRINT_WARN("Device %s is in use!\n", local_buf); rc = -EBUSY; goto out_buf; } - list_del(&dev_info->lh); + list_del(&dev_info->lh); del_gendisk(dev_info->gd); blk_cleanup_queue(dev_info->dcssblk_queue); dev_info->gd->queue = NULL; put_disk(dev_info->gd); device_unregister(&dev_info->dev); - segment_unload(dev_info->segment_name); - PRINT_DEBUG("Segment %s unloaded successfully\n", - dev_info->segment_name); + + /* unload all related segments */ + list_for_each_entry(entry, &dev_info->seg_list, lh) + segment_unload(entry->segment_name); + put_device(&dev_info->dev); up_write(&dcssblk_devices_sem); @@ -550,6 +797,7 @@ static int dcssblk_release(struct inode *inode, struct file *filp) { struct dcssblk_dev_info *dev_info; + struct segment_info *entry; int rc; dev_info = inode->i_bdev->bd_disk->private_data; @@ -560,9 +808,11 @@ dcssblk_release(struct inode *inode, struct file *filp) down_write(&dcssblk_devices_sem); if (atomic_dec_and_test(&dev_info->use_count) && (dev_info->save_pending)) { - PRINT_INFO("Segment %s became idle and is being saved now\n", + PRINT_INFO("Device %s became idle and is being saved now\n", dev_info->segment_name); - segment_save(dev_info->segment_name); + list_for_each_entry(entry, &dev_info->seg_list, lh) { + segment_save(entry->segment_name); + } dev_info->save_pending = 0; } up_write(&dcssblk_devices_sem); @@ -602,7 +852,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) case SEG_TYPE_SC: /* cannot write to these segments */ if (bio_data_dir(bio) == WRITE) { - PRINT_WARN("rejecting write to ro segment %s\n", + PRINT_WARN("rejecting write to ro device %s\n", dev_name(&dev_info->dev)); goto fail; } @@ -658,7 +908,7 @@ static void dcssblk_check_params(void) { int rc, i, j, k; - char buf[9]; + char buf[DCSSBLK_PARM_LEN + 1]; struct dcssblk_dev_info *dev_info; for (i = 0; (i < DCSSBLK_PARM_LEN) && (dcssblk_segments[i] != '\0'); @@ -666,15 +916,16 @@ dcssblk_check_params(void) for (j = i; (dcssblk_segments[j] != ',') && (dcssblk_segments[j] != '\0') && (dcssblk_segments[j] != '(') && - (j - i) < 8; j++) + (j < DCSSBLK_PARM_LEN); j++) { buf[j-i] = dcssblk_segments[j]; } buf[j-i] = '\0'; rc = dcssblk_add_store(dcssblk_root_dev, NULL, buf, j-i); if ((rc >= 0) && (dcssblk_segments[j] == '(')) { - for (k = 0; buf[k] != '\0'; k++) + for (k = 0; (buf[k] != ':') && (buf[k] != '\0'); k++) buf[k] = toupper(buf[k]); + buf[k] = '\0'; if (!strncmp(&dcssblk_segments[j], "(local)", 7)) { down_read(&dcssblk_devices_sem); dev_info = dcssblk_get_device_by_name(buf); @@ -741,10 +992,12 @@ module_exit(dcssblk_exit); module_param_string(segments, dcssblk_segments, DCSSBLK_PARM_LEN, 0444); MODULE_PARM_DESC(segments, "Name of DCSS segment(s) to be loaded, " - "comma-separated list, each name max. 8 chars.\n" - "Adding \"(local)\" to segment name equals echoing 0 to " - "/sys/devices/dcssblk//shared after loading " - "the segment - \n" - "e.g. segments=\"mydcss1,mydcss2,mydcss3(local)\""); + "comma-separated list, names in each set separated " + "by commas are separated by colons, each set contains " + "names of contiguous segments and each name max. 8 chars.\n" + "Adding \"(local)\" to the end of each set equals echoing 0 " + "to /sys/devices/dcssblk//shared after loading " + "the contiguous segments - \n" + "e.g. segments=\"mydcss1,mydcss2:mydcss3,mydcss4(local)\""); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 5a0d0e65379256b4da2c9092e197a2c761f51c01 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Oct 2008 21:33:22 +0200 Subject: [S390] Move private simple udelay function to arch/s390/lib/delay.c. Move cio's private simple udelay function to lib/delay.c and turn it into something much more readable. So we have all implementations at one place. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/delay.h | 1 + arch/s390/lib/delay.c | 13 +++++++++++++ drivers/s390/cio/cio.c | 17 ++--------------- 3 files changed, 16 insertions(+), 15 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h index 78357314c450..a356c958e260 100644 --- a/arch/s390/include/asm/delay.h +++ b/arch/s390/include/asm/delay.h @@ -15,6 +15,7 @@ #define _S390_DELAY_H extern void __udelay(unsigned long usecs); +extern void udelay_simple(unsigned long usecs); extern void __delay(unsigned long loops); #define udelay(n) __udelay(n) diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 0953cee05efc..6ccb9fab055a 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -92,3 +92,16 @@ out: local_irq_restore(flags); preempt_enable(); } + +/* + * Simple udelay variant. To be used on startup and reboot + * when the interrupt handler isn't working. + */ +void udelay_simple(unsigned long usecs) +{ + u64 end; + + end = get_clock() + ((u64) usecs << 12); + while (get_clock() < end) + cpu_relax(); +} diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index c0cb72547256..3db2c386546f 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -859,19 +859,6 @@ __disable_subchannel_easy(struct subchannel_id schid, struct schib *schib) return -EBUSY; /* uhm... */ } -/* we can't use the normal udelay here, since it enables external interrupts */ - -static void udelay_reset(unsigned long usecs) -{ - uint64_t start_cc, end_cc; - - asm volatile ("STCK %0" : "=m" (start_cc)); - do { - cpu_relax(); - asm volatile ("STCK %0" : "=m" (end_cc)); - } while (((end_cc - start_cc)/4096) < usecs); -} - static int __clear_io_subchannel_easy(struct subchannel_id schid) { @@ -887,7 +874,7 @@ __clear_io_subchannel_easy(struct subchannel_id schid) if (schid_equal(&ti.schid, &schid)) return 0; } - udelay_reset(100); + udelay_simple(100); } return -EBUSY; } @@ -895,7 +882,7 @@ __clear_io_subchannel_easy(struct subchannel_id schid) static void __clear_chsc_subchannel_easy(void) { /* It seems we can only wait for a bit here :/ */ - udelay_reset(100); + udelay_simple(100); } static int pgm_check_occured; -- cgit v1.2.3 From ab1d848fd6a9151b02c6cbf4bddce6e24707b094 Mon Sep 17 00:00:00 2001 From: Nigel Hislop Date: Fri, 10 Oct 2008 21:33:25 +0200 Subject: [S390] Add ioctl support for EMC Symmetrix Subsystem Control I/O EMC Symmetrix Subsystem Control I/O through CKD dasd requires a specific parameter list sent to the array via a Perform Subsystem Function CCW. The Symmetrix response is retrieved from the array via a Read Subsystem Data CCW. Signed-off-by: Nigel Hislop Signed-off-by: Hannes Reinecke Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/dasd.h | 13 ++++++ drivers/s390/block/dasd_eckd.c | 101 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/dasd.h b/arch/s390/include/asm/dasd.h index 3f002e13d024..55b2b80cdf6e 100644 --- a/arch/s390/include/asm/dasd.h +++ b/arch/s390/include/asm/dasd.h @@ -3,6 +3,8 @@ * Author(s)......: Holger Smolinski * Bugreports.to..: * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 + * EMC Symmetrix ioctl Copyright EMC Corporation, 2008 + * Author.........: Nigel Hislop * * This file is the interface of the DASD device driver, which is exported to user space * any future changes wrt the API will result in a change of the APIVERSION reported @@ -202,6 +204,16 @@ typedef struct attrib_data_t { #define DASD_SEQ_PRESTAGE 0x4 #define DASD_REC_ACCESS 0x5 +/* + * Perform EMC Symmetrix I/O + */ +typedef struct dasd_symmio_parms { + unsigned char reserved[8]; /* compat with older releases */ + unsigned long long psf_data; /* char * cast to u64 */ + unsigned long long rssd_result; /* char * cast to u64 */ + int psf_data_len; + int rssd_result_len; +} __attribute__ ((packed)) dasd_symmio_parms_t; /******************************************************************************** * SECTION: Definition of IOCTLs @@ -247,6 +259,7 @@ typedef struct attrib_data_t { /* Set Attributes (cache operations) */ #define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t) +#define BIODASDSYMMIO _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t) #endif /* DASD_H */ diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 8095629bc493..49f9d221e23d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -6,6 +6,8 @@ * Martin Schwidefsky * Bugreports.to..: * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 + * EMC Symmetrix ioctl Copyright EMC Corporation, 2008 + * Author.........: Nigel Hislop * */ @@ -2083,6 +2085,103 @@ dasd_eckd_set_attrib(struct dasd_device *device, void __user *argp) return 0; } +/* + * Issue syscall I/O to EMC Symmetrix array. + * CCWs are PSF and RSSD + */ +static int dasd_symm_io(struct dasd_device *device, void __user *argp) +{ + struct dasd_symmio_parms usrparm; + char *psf_data, *rssd_result; + struct dasd_ccw_req *cqr; + struct ccw1 *ccw; + int rc; + + /* Copy parms from caller */ + rc = -EFAULT; + if (copy_from_user(&usrparm, argp, sizeof(usrparm))) + goto out; +#ifndef CONFIG_64BIT + /* Make sure pointers are sane even on 31 bit. */ + if ((usrparm.psf_data >> 32) != 0 || (usrparm.rssd_result >> 32) != 0) { + rc = -EINVAL; + goto out; + } +#endif + /* alloc I/O data area */ + psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA); + rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA); + if (!psf_data || !rssd_result) { + rc = -ENOMEM; + goto out_free; + } + + /* get syscall header from user space */ + rc = -EFAULT; + if (copy_from_user(psf_data, + (void __user *)(unsigned long) usrparm.psf_data, + usrparm.psf_data_len)) + goto out_free; + + /* sanity check on syscall header */ + if (psf_data[0] != 0x17 && psf_data[1] != 0xce) { + rc = -EINVAL; + goto out_free; + } + + /* setup CCWs for PSF + RSSD */ + cqr = dasd_smalloc_request("ECKD", 2 , 0, device); + if (IS_ERR(cqr)) { + DEV_MESSAGE(KERN_WARNING, device, "%s", + "Could not allocate initialization request"); + rc = PTR_ERR(cqr); + goto out_free; + } + + cqr->startdev = device; + cqr->memdev = device; + cqr->retries = 3; + cqr->expires = 10 * HZ; + cqr->buildclk = get_clock(); + cqr->status = DASD_CQR_FILLED; + + /* Build the ccws */ + ccw = cqr->cpaddr; + + /* PSF ccw */ + ccw->cmd_code = DASD_ECKD_CCW_PSF; + ccw->count = usrparm.psf_data_len; + ccw->flags |= CCW_FLAG_CC; + ccw->cda = (__u32)(addr_t) psf_data; + + ccw++; + + /* RSSD ccw */ + ccw->cmd_code = DASD_ECKD_CCW_RSSD; + ccw->count = usrparm.rssd_result_len; + ccw->flags = CCW_FLAG_SLI ; + ccw->cda = (__u32)(addr_t) rssd_result; + + rc = dasd_sleep_on(cqr); + if (rc) + goto out_sfree; + + rc = -EFAULT; + if (copy_to_user((void __user *)(unsigned long) usrparm.rssd_result, + rssd_result, usrparm.rssd_result_len)) + goto out_sfree; + rc = 0; + +out_sfree: + dasd_sfree_request(cqr, cqr->memdev); +out_free: + kfree(rssd_result); + kfree(psf_data); +out: + DBF_DEV_EVENT(DBF_WARNING, device, "Symmetrix ioctl: rc=%d", rc); + return rc; +} + static int dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp) { @@ -2101,6 +2200,8 @@ dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp) return dasd_eckd_reserve(device); case BIODASDSLCK: return dasd_eckd_steal_lock(device); + case BIODASDSYMMIO: + return dasd_symm_io(device, argp); default: return -ENOIOCTLCMD; } -- cgit v1.2.3 From 15e86b0c752d50e910b2cca6e83ce74c4440d06c Mon Sep 17 00:00:00 2001 From: Florian Funke Date: Fri, 10 Oct 2008 21:33:26 +0200 Subject: [S390] introduce dirty bit for kvm live migration This patch defines a dirty bit in the PGSTE that can be used to implement dirty pages logging for KVM's live migration. The bit is set in the ptep_rcp_copy function, which is called to save dirty and referenced information from the storage key in the PGSTE. The bit can be tested and reset by KVM using the kvm_s390_test_and_clear_page_dirty function that is introduced by this patch. Acked-by: Carsten Otte Signed-off-by: Florian Funke Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 45 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0bdb704ae051..1a928f84afd6 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -281,6 +281,9 @@ extern char empty_zero_page[PAGE_SIZE]; #define RCP_GR_BIT 50 #define RCP_GC_BIT 49 +/* User dirty bit for KVM's migration feature */ +#define KVM_UD_BIT 47 + #ifndef __s390x__ /* Bits in the segment table address-space-control-element */ @@ -575,12 +578,16 @@ static inline void ptep_rcp_copy(pte_t *ptep) unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); skey = page_get_storage_key(page_to_phys(page)); - if (skey & _PAGE_CHANGED) + if (skey & _PAGE_CHANGED) { set_bit_simple(RCP_GC_BIT, pgste); + set_bit_simple(KVM_UD_BIT, pgste); + } if (skey & _PAGE_REFERENCED) set_bit_simple(RCP_GR_BIT, pgste); - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) + if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { SetPageDirty(page); + set_bit_simple(KVM_UD_BIT, pgste); + } if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) SetPageReferenced(page); #endif @@ -744,6 +751,40 @@ static inline pte_t pte_mkspecial(pte_t pte) return pte; } +#ifdef CONFIG_PGSTE +/* + * Get (and clear) the user dirty bit for a PTE. + */ +static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, + pte_t *ptep) +{ + int dirty; + unsigned long *pgste; + struct page *page; + unsigned int skey; + + if (!mm->context.pgstes) + return -EINVAL; + rcp_lock(ptep); + pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + page = virt_to_page(pte_val(*ptep)); + skey = page_get_storage_key(page_to_phys(page)); + if (skey & _PAGE_CHANGED) { + set_bit_simple(RCP_GC_BIT, pgste); + set_bit_simple(KVM_UD_BIT, pgste); + } + if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { + SetPageDirty(page); + set_bit_simple(KVM_UD_BIT, pgste); + } + dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); + if (skey & _PAGE_CHANGED) + page_clear_dirty(page); + rcp_unlock(ptep); + return dirty; +} +#endif + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -- cgit v1.2.3 From 4a672cfa3a7fcbc6f2adc558f34148be1096c561 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 10 Oct 2008 21:33:29 +0200 Subject: [S390] fix initialization of stp chsc_sstpc returns -EIO on error and 0 on success but stp_reset checks against 1 instead of 0. chsc_sstpc used to return 1 on success, one call location has not been updated .. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 06acb1a18bbc..b94e9e3b694a 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1356,7 +1356,7 @@ static void __init stp_reset(void) stp_page = alloc_bootmem_pages(PAGE_SIZE); rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); - if (rc == 1) + if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { printk(KERN_WARNING "Running on non STP capable machine.\n"); -- cgit v1.2.3