From ac94a2911e84a7b3d29d725f8f43b07db1c916f2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Jan 2021 19:56:12 +0100 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 1 + arch/s390/configs/defconfig | 1 + arch/s390/configs/zfcpdump_defconfig | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/s390/configs') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index c4f6ff98a612..c1c4f97897cd 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -40,6 +40,7 @@ CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y +CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 51135893cffe..467a06d92be6 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -38,6 +38,7 @@ CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y +CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 1ef211dae77a..0200ccf10ace 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -8,6 +8,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_COMPAT_BRK is not set +CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 -- cgit v1.2.3 From 56e62a73702836017564eaacd5212e4d0fa1c01d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 21 Nov 2020 11:14:56 +0100 Subject: s390: convert to generic entry This patch converts s390 to use the generic entry infrastructure from kernel/entry/*. There are a few special things on s390: - PIF_PER_TRAP is moved to TIF_PER_TRAP as the generic code doesn't know about our PIF flags in exit_to_user_mode_loop(). - The old code had several ways to restart syscalls: a) PIF_SYSCALL_RESTART, which was only set during execve to force a restart after upgrading a process (usually qemu-kvm) to pgste page table extensions. b) PIF_SYSCALL, which is set by do_signal() to indicate that the current syscall should be restarted. This is changed so that do_signal() now also uses PIF_SYSCALL_RESTART. Continuing to use PIF_SYSCALL doesn't work with the generic code, and changing it to PIF_SYSCALL_RESTART makes PIF_SYSCALL and PIF_SYSCALL_RESTART more unique. - On s390 calling sys_sigreturn or sys_rt_sigreturn is implemented by executing a svc instruction on the process stack which causes a fault. While handling that fault the fault code sets PIF_SYSCALL to hand over processing to the syscall code on exit to usermode. The patch introduces PIF_SYSCALL_RET_SET, which is set if ptrace sets a return value for a syscall. The s390x ptrace ABI uses r2 both for the syscall number and return value, so ptrace cannot set the syscall number + return value at the same time. The flag makes handling that a bit easier. do_syscall() will just skip executing the syscall if PIF_SYSCALL_RET_SET is set. CONFIG_DEBUG_ASCE was removd in favour of the generic CONFIG_DEBUG_ENTRY. CR1/7/13 will be checked both on kernel entry and exit to contain the correct asces. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/Kconfig.debug | 10 +- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/defconfig | 1 - arch/s390/include/asm/cputime.h | 2 + arch/s390/include/asm/elf.h | 7 +- arch/s390/include/asm/entry-common.h | 60 +++ arch/s390/include/asm/fpu/api.h | 2 + arch/s390/include/asm/idle.h | 4 +- arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/nmi.h | 1 + arch/s390/include/asm/processor.h | 52 ++- arch/s390/include/asm/ptrace.h | 9 +- arch/s390/include/asm/syscall.h | 11 +- arch/s390/include/asm/thread_info.h | 3 + arch/s390/include/asm/uaccess.h | 2 +- arch/s390/include/asm/vtime.h | 14 + arch/s390/include/uapi/asm/ptrace.h | 5 +- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/asm-offsets.c | 19 +- arch/s390/kernel/compat_signal.c | 1 + arch/s390/kernel/entry.S | 803 ++++------------------------------- arch/s390/kernel/entry.h | 12 +- arch/s390/kernel/fpu.c | 88 ++++ arch/s390/kernel/idle.c | 24 ++ arch/s390/kernel/irq.c | 89 +++- arch/s390/kernel/nmi.c | 19 +- arch/s390/kernel/process.c | 30 +- arch/s390/kernel/ptrace.c | 117 +---- arch/s390/kernel/setup.c | 3 +- arch/s390/kernel/signal.c | 12 +- arch/s390/kernel/smp.c | 2 +- arch/s390/kernel/sys_s390.c | 102 ----- arch/s390/kernel/syscall.c | 172 ++++++++ arch/s390/kernel/traps.c | 65 +++ arch/s390/kernel/uprobes.c | 6 +- arch/s390/kvm/kvm-s390.c | 3 + arch/s390/kvm/vsie.c | 3 + arch/s390/lib/uaccess.c | 12 +- arch/s390/mm/fault.c | 2 +- 40 files changed, 754 insertions(+), 1022 deletions(-) create mode 100644 arch/s390/include/asm/entry-common.h delete mode 100644 arch/s390/kernel/sys_s390.c create mode 100644 arch/s390/kernel/syscall.c (limited to 'arch/s390/configs') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c72874f09741..41a2c58c6e7a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -123,6 +123,7 @@ config S390 select GENERIC_ALLOCATOR select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES + select GENERIC_ENTRY select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_PTDUMP diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 6bfaceebbbc0..ef96c25fa921 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -6,10 +6,12 @@ config TRACE_IRQFLAGS_SUPPORT config EARLY_PRINTK def_bool y -config DEBUG_USER_ASCE - bool "Debug User ASCE" +config DEBUG_ENTRY + bool "Debug low-level entry code" + depends on DEBUG_KERNEL help - Check on exit to user space that address space control - elements are setup correctly. + This option enables sanity checks in s390 low-level entry code. + Some of these sanity checks may slow down kernel entries and + exits or otherwise impact performance. If unsure, say N. diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index c1c4f97897cd..2d8dcce6e028 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -833,7 +833,6 @@ CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set -CONFIG_DEBUG_USER_ASCE=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y @@ -857,3 +856,4 @@ CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m +CONFIG_DEBUG_ENTRY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 467a06d92be6..3eadcda4aca9 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -781,7 +781,6 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y -CONFIG_DEBUG_USER_ASCE=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index cb729d111e20..1d389847b588 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -35,4 +35,6 @@ u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) +void account_idle_time_irq(void); + #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 5775fc22f410..66d51ad090ab 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -233,8 +233,7 @@ extern char elf_platform[]; do { \ set_personality(PER_LINUX | \ (current->personality & (~PER_MASK))); \ - current->thread.sys_call_table = \ - (unsigned long) &sys_call_table; \ + current->thread.sys_call_table = sys_call_table; \ } while (0) #else /* CONFIG_COMPAT */ #define SET_PERSONALITY(ex) \ @@ -245,11 +244,11 @@ do { \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \ set_thread_flag(TIF_31BIT); \ current->thread.sys_call_table = \ - (unsigned long) &sys_call_table_emu; \ + sys_call_table_emu; \ } else { \ clear_thread_flag(TIF_31BIT); \ current->thread.sys_call_table = \ - (unsigned long) &sys_call_table; \ + sys_call_table; \ } \ } while (0) #endif /* CONFIG_COMPAT */ diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h new file mode 100644 index 000000000000..75cebc80474e --- /dev/null +++ b/arch/s390/include/asm/entry-common.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_S390_ENTRY_COMMON_H +#define ARCH_S390_ENTRY_COMMON_H + +#include +#include +#include +#include +#include +#include + +#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP) + +void do_per_trap(struct pt_regs *regs); +void do_syscall(struct pt_regs *regs); + +typedef void (*pgm_check_func)(struct pt_regs *regs); + +extern pgm_check_func pgm_check_table[128]; + +#ifdef CONFIG_DEBUG_ENTRY +static __always_inline void arch_check_user_regs(struct pt_regs *regs) +{ + debug_user_asce(0); +} + +#define arch_check_user_regs arch_check_user_regs +#endif /* CONFIG_DEBUG_ENTRY */ + +static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs, + unsigned long ti_work) +{ + if (ti_work & _TIF_PER_TRAP) { + clear_thread_flag(TIF_PER_TRAP); + do_per_trap(regs); + } + + if (ti_work & _TIF_GUARDED_STORAGE) + gs_load_bc_cb(regs); +} + +#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work + +static __always_inline void arch_exit_to_user_mode(void) +{ + if (test_cpu_flag(CIF_FPU)) + __load_fpu_regs(); + + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + debug_user_asce(1); +} + +#define arch_exit_to_user_mode arch_exit_to_user_mode + +static inline bool on_thread_stack(void) +{ + return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1)); +} + +#endif diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 34a7ae68485c..a959b815a58b 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -47,6 +47,8 @@ #include void save_fpu_regs(void); +void load_fpu_regs(void); +void __load_fpu_regs(void); static inline int test_fp_ctl(u32 fpc) { diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 6d4226dcf42a..b04f6a794cdf 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -20,11 +20,13 @@ struct s390_idle_data { unsigned long long clock_idle_exit; unsigned long long timer_idle_enter; unsigned long long timer_idle_exit; + unsigned long mt_cycles_enter[8]; }; extern struct device_attribute dev_attr_idle_count; extern struct device_attribute dev_attr_idle_time_us; -void psw_idle(struct s390_idle_data *, unsigned long); +void psw_idle(struct s390_idle_data *data, unsigned long psw_mask); +void psw_idle_exit(void); #endif /* _S390_IDLE_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 69ce9191eaf1..4d65c8e4e6d0 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -81,8 +81,8 @@ struct lowcore { psw_t return_mcck_psw; /* 0x02a0 */ /* CPU accounting and timing values. */ - __u64 sync_enter_timer; /* 0x02b0 */ - __u64 async_enter_timer; /* 0x02b8 */ + __u64 sys_enter_timer; /* 0x02b0 */ + __u8 pad_0x02b8[0x02c0-0x02b8]; /* 0x02b8 */ __u64 mcck_enter_timer; /* 0x02c0 */ __u64 exit_timer; /* 0x02c8 */ __u64 user_timer; /* 0x02d0 */ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 5afee80cff58..20e51c9ff240 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -99,6 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc); void nmi_free_per_cpu(struct lowcore *lc); void s390_handle_mcck(void); +void __s390_handle_mcck(void); int s390_do_machine_check(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2058a435add4..fa67b66bf144 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -38,6 +38,11 @@ #include #include #include +#include + +typedef long (*sys_call_ptr_t)(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); static inline void set_cpu_flag(int flag) { @@ -101,31 +106,32 @@ extern void __bpon(void); */ struct thread_struct { unsigned int acrs[NUM_ACRS]; - unsigned long ksp; /* kernel stack pointer */ - unsigned long user_timer; /* task cputime in user space */ - unsigned long guest_timer; /* task cputime in kvm guest */ - unsigned long system_timer; /* task cputime in kernel space */ - unsigned long hardirq_timer; /* task cputime in hardirq context */ - unsigned long softirq_timer; /* task cputime in softirq context */ - unsigned long sys_call_table; /* system call table address */ - unsigned long gmap_addr; /* address of last gmap fault. */ - unsigned int gmap_write_flag; /* gmap fault write indication */ - unsigned int gmap_int_code; /* int code of last gmap fault */ - unsigned int gmap_pfault; /* signal of a pending guest pfault */ + unsigned long ksp; /* kernel stack pointer */ + unsigned long user_timer; /* task cputime in user space */ + unsigned long guest_timer; /* task cputime in kvm guest */ + unsigned long system_timer; /* task cputime in kernel space */ + unsigned long hardirq_timer; /* task cputime in hardirq context */ + unsigned long softirq_timer; /* task cputime in softirq context */ + const sys_call_ptr_t *sys_call_table; /* system call table address */ + unsigned long gmap_addr; /* address of last gmap fault. */ + unsigned int gmap_write_flag; /* gmap fault write indication */ + unsigned int gmap_int_code; /* int code of last gmap fault */ + unsigned int gmap_pfault; /* signal of a pending guest pfault */ + /* Per-thread information related to debugging */ - struct per_regs per_user; /* User specified PER registers */ - struct per_event per_event; /* Cause of the last PER trap */ - unsigned long per_flags; /* Flags to control debug behavior */ - unsigned int system_call; /* system call number in signal */ - unsigned long last_break; /* last breaking-event-address. */ - /* pfault_wait is used to block the process on a pfault event */ + struct per_regs per_user; /* User specified PER registers */ + struct per_event per_event; /* Cause of the last PER trap */ + unsigned long per_flags; /* Flags to control debug behavior */ + unsigned int system_call; /* system call number in signal */ + unsigned long last_break; /* last breaking-event-address. */ + /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; - struct gs_cb *gs_cb; /* Current guarded storage cb */ - struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ - unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ + struct gs_cb *gs_cb; /* Current guarded storage cb */ + struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ + unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ /* * Warning: 'fpu' is dynamically-sized. It *MUST* be at * the end. @@ -184,6 +190,7 @@ static inline void release_thread(struct task_struct *tsk) { } /* Free guarded storage control block */ void guarded_storage_release(struct task_struct *tsk); +void gs_load_bc_cb(struct pt_regs *regs); unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ @@ -324,6 +331,11 @@ extern void memcpy_absolute(void *, void *, size_t); extern int s390_isolate_bp(void); extern int s390_isolate_bp_guest(void); +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) +{ + return arch_irqs_disabled_flags(regs->psw.mask); +} + #endif /* __ASSEMBLY__ */ #endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 73ca7f7cac33..f828be78937f 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -11,13 +11,13 @@ #include #define PIF_SYSCALL 0 /* inside a system call */ -#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ -#define PIF_SYSCALL_RESTART 2 /* restart the current system call */ +#define PIF_SYSCALL_RESTART 1 /* restart the current system call */ +#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) -#define _PIF_PER_TRAP BIT(PIF_PER_TRAP) #define _PIF_SYSCALL_RESTART BIT(PIF_SYSCALL_RESTART) +#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #ifndef __ASSEMBLY__ @@ -68,6 +68,9 @@ enum { &(*(struct psw_bits *)(&(__psw))); \ })) +#define PGM_INT_CODE_MASK 0x7f +#define PGM_INT_CODE_PER 0x80 + /* * The pt_regs struct defines the way the registers are stored on * the stack during a system call. diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index d9d5de0f67ff..9107e3dab68c 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -14,8 +14,8 @@ #include #include -extern const unsigned long sys_call_table[]; -extern const unsigned long sys_call_table_emu[]; +extern const sys_call_ptr_t sys_call_table[]; +extern const sys_call_ptr_t sys_call_table_emu[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) @@ -56,6 +56,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { + set_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); regs->gprs[2] = error ? error : val; } @@ -97,4 +98,10 @@ static inline int syscall_get_arch(struct task_struct *task) #endif return AUDIT_ARCH_S390X; } + +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) +{ + return false; +} + #endif /* _ASM_SYSCALL_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 3c5b1f909b6d..28696ca7680d 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -36,6 +36,7 @@ */ struct thread_info { unsigned long flags; /* low level flags */ + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ }; /* @@ -68,6 +69,7 @@ void arch_setup_new_exec(void); #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_ISOLATE_BP 8 /* Run process with isolated BP */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ +#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ @@ -91,6 +93,7 @@ void arch_setup_new_exec(void); #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) #define _TIF_ISOLATE_BP BIT(TIF_ISOLATE_BP) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) +#define _TIF_PER_TRAP BIT(TIF_PER_TRAP) #define _TIF_31BIT BIT(TIF_31BIT) #define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index c6707885e7c2..4756d2937e54 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -18,7 +18,7 @@ #include #include -void debug_user_asce(void); +void debug_user_asce(int exit); static inline int __range_ok(unsigned long addr, unsigned long size) { diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h index fac6a67988eb..fe17e448c0c5 100644 --- a/arch/s390/include/asm/vtime.h +++ b/arch/s390/include/asm/vtime.h @@ -4,4 +4,18 @@ #define __ARCH_HAS_VTIME_TASK_SWITCH +static inline void update_timer_sys(void) +{ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer; + S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer; + S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer; +} + +static inline void update_timer_mcck(void) +{ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer; + S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer; + S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer; +} + #endif /* _S390_VTIME_H */ diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index 543dd70e12c8..ad64d673b5e6 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -179,8 +179,9 @@ #define ACR_SIZE 4 -#define PTRACE_OLDSETOPTIONS 21 - +#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ #include #include diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index dd73b7f07423..c97818a382f3 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -34,7 +34,7 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o -obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o +obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 79724d861dc9..d22bb28ef50c 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -26,26 +26,14 @@ int main(void) BLANK(); /* thread struct offsets */ OFFSET(__THREAD_ksp, thread_struct, ksp); - OFFSET(__THREAD_sysc_table, thread_struct, sys_call_table); - OFFSET(__THREAD_last_break, thread_struct, last_break); - OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc); - OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs); - OFFSET(__THREAD_per_cause, thread_struct, per_event.cause); - OFFSET(__THREAD_per_address, thread_struct, per_event.address); - OFFSET(__THREAD_per_paid, thread_struct, per_event.paid); - OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb); BLANK(); /* thread info offsets */ OFFSET(__TI_flags, task_struct, thread_info.flags); BLANK(); /* pt_regs offsets */ - OFFSET(__PT_ARGS, pt_regs, args); OFFSET(__PT_PSW, pt_regs, psw); OFFSET(__PT_GPRS, pt_regs, gprs); OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); - OFFSET(__PT_INT_CODE, pt_regs, int_code); - OFFSET(__PT_INT_PARM, pt_regs, int_parm); - OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long); OFFSET(__PT_FLAGS, pt_regs, flags); OFFSET(__PT_CR1, pt_regs, cr1); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); @@ -64,6 +52,7 @@ int main(void) OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter); OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit); + OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter); BLANK(); /* hardware defined lowcore locations 0x000 - 0x1ff */ OFFSET(__LC_EXT_PARAMS, lowcore, ext_params); @@ -115,13 +104,9 @@ int main(void) OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags); OFFSET(__LC_RETURN_PSW, lowcore, return_psw); OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); - OFFSET(__LC_SYNC_ENTER_TIMER, lowcore, sync_enter_timer); - OFFSET(__LC_ASYNC_ENTER_TIMER, lowcore, async_enter_timer); + OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer); OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer); OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer); - OFFSET(__LC_USER_TIMER, lowcore, user_timer); - OFFSET(__LC_SYSTEM_TIMER, lowcore, system_timer); - OFFSET(__LC_STEAL_TIMER, lowcore, steal_timer); OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 38d4bdbc34b9..1d0e17ec93eb 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -118,6 +118,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f1ba197b10c0..785425b59ac1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -51,38 +51,8 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING | \ - _TIF_NOTIFY_SIGNAL) -_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ - _TIF_SYSCALL_TRACEPOINT) -_CIF_WORK = (_CIF_FPU) -_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) - _LPP_OFFSET = __LC_LPP - .macro TRACE_IRQS_ON -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - brasl %r14,trace_hardirqs_on_caller -#endif - .endm - - .macro TRACE_IRQS_OFF -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - brasl %r14,trace_hardirqs_off_caller -#endif - .endm - - .macro LOCKDEP_SYS_EXIT -#ifdef CONFIG_LOCKDEP - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jz .+10 - brasl %r14,lockdep_sys_exit -#endif - .endm - .macro CHECK_STACK savearea #ifdef CONFIG_CHECK_STACK tml %r15,STACK_SIZE - CONFIG_STACK_GUARD @@ -91,12 +61,6 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro DEBUG_USER_ASCE -#ifdef CONFIG_DEBUG_USER_ASCE - brasl %r14,debug_user_asce -#endif - .endm - .macro CHECK_VMAP_STACK savearea,oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 @@ -117,9 +81,9 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro SWITCH_ASYNC savearea,timer,clock + .macro SWITCH_KERNEL savearea tmhh %r8,0x0001 # interrupting from user ? - jnz 4f + jnz 1f #if IS_ENABLED(CONFIG_KVM) lgr %r14,%r9 larl %r13,.Lsie_gmap @@ -130,92 +94,16 @@ _LPP_OFFSET = __LC_LPP lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,.Lcleanup_sie #endif -0: larl %r13,.Lpsw_idle_exit - cgr %r13,%r9 - jne 3f - - larl %r1,smp_cpu_mtid - llgf %r1,0(%r1) - ltgr %r1,%r1 - jz 2f # no SMT, skip mt_cycles calculation - .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) - larl %r3,mt_cycles - ag %r3,__LC_PERCPU_OFFSET - la %r4,__SF_EMPTY+16(%r15) -1: lg %r0,0(%r3) - slg %r0,0(%r4) - alg %r0,64(%r4) - stg %r0,0(%r3) - la %r3,8(%r3) - la %r4,8(%r4) - brct %r1,1b - -2: mvc __CLOCK_IDLE_EXIT(8,%r2), \clock - mvc __TIMER_IDLE_EXIT(8,%r2), \timer - # account system time going idle - ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT - - lg %r13,__LC_STEAL_TIMER - alg %r13,__CLOCK_IDLE_ENTER(%r2) - slg %r13,__LC_LAST_UPDATE_CLOCK - stg %r13,__LC_STEAL_TIMER - - mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) - - lg %r13,__LC_SYSTEM_TIMER - alg %r13,__LC_LAST_UPDATE_TIMER - slg %r13,__TIMER_IDLE_ENTER(%r2) - stg %r13,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) - - nihh %r8,0xfcfd # clear wait state and irq bits -3: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? - slgr %r14,%r15 - srag %r14,%r14,STACK_SHIFT - jnz 5f - CHECK_STACK \savearea +0: CHECK_STACK \savearea + lgr %r11,%r15 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 6f -4: UPDATE_VTIME %r14,%r15,\timer - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -5: lg %r15,__LC_ASYNC_STACK # load async stack -6: la %r11,STACK_FRAME_OVERHEAD(%r15) - .endm - - .macro UPDATE_VTIME w1,w2,enter_timer - lg \w1,__LC_EXIT_TIMER - lg \w2,__LC_LAST_UPDATE_TIMER - slg \w1,\enter_timer - slg \w2,__LC_EXIT_TIMER - alg \w1,__LC_USER_TIMER - alg \w2,__LC_SYSTEM_TIMER - stg \w1,__LC_USER_TIMER - stg \w2,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer - .endm - - .macro RESTORE_SM_CLEAR_PER - stg %r8,__LC_RETURN_PSW - ni __LC_RETURN_PSW,0xbf - ssm __LC_RETURN_PSW - .endm - - .macro ENABLE_INTS - stosm __SF_EMPTY(%r15),3 - .endm - - .macro ENABLE_INTS_TRACE - TRACE_IRQS_ON - ENABLE_INTS - .endm - - .macro DISABLE_INTS - stnsm __SF_EMPTY(%r15),0xfc - .endm - - .macro DISABLE_INTS_TRACE - DISABLE_INTS - TRACE_IRQS_OFF + stg %r11,__SF_BACKCHAIN(%r15) + j 2f +1: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + lctlg %c1,%c1,__LC_KERNEL_ASCE + lg %r15,__LC_KERNEL_STACK + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro STCK savearea @@ -267,18 +155,17 @@ _LPP_OFFSET = __LC_LPP "jnz .+8; .long 0xb2e8d000", 82 .endm - GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 GEN_BR_THUNK %r14,%r11 .section .kprobes.text, "ax" .Ldummy: /* - * This nop exists only in order to avoid that __switch_to starts at + * This nop exists only in order to avoid that __bpon starts at * the beginning of the kprobes text section. In that case we would * have several symbols at the same address. E.g. objdump would take * an arbitrary symbol name when disassembling this code. - * With the added nop in between the __switch_to symbol is unique + * With the added nop in between the __bpon symbol is unique * again. */ nop 0 @@ -327,10 +214,6 @@ ENTRY(sie64a) stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags - TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ? - jno .Lsie_load_guest_gprs - brasl %r14,load_fpu_regs # load guest fp/vx regs -.Lsie_load_guest_gprs: lmg %r0,%r13,0(%r3) # load guest gprs 0-13 lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 @@ -370,7 +253,6 @@ sie_exit: stmg %r0,%r13,0(%r14) # save guest gprs 0-13 xgr %r0,%r0 # clear guest registers to xgr %r1,%r1 # prevent speculative use - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 @@ -397,249 +279,68 @@ EXPORT_SYMBOL(sie_exit) */ ENTRY(system_call) - stpt __LC_SYNC_ENTER_TIMER + stpt __LC_SYS_ENTER_TIMER stmg %r8,%r15,__LC_SAVE_AREA_SYNC BPOFF - lg %r12,__LC_CURRENT - lghi %r14,_PIF_SYSCALL + lghi %r14,0 .Lsysc_per: lctlg %c1,%c1,__LC_KERNEL_ASCE - lghi %r13,__TASK_thread + lg %r12,__LC_CURRENT lg %r15,__LC_KERNEL_STACK - la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs - UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC - stg %r14,__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - ENABLE_INTS -.Lsysc_do_svc: + stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP # clear user controlled register to prevent speculative use xgr %r0,%r0 - # load address of system call table - lg %r10,__THREAD_sysc_table(%r13,%r12) - llgh %r8,__PT_INT_CODE+2(%r11) - slag %r8,%r8,3 # shift and test for svc 0 - jnz .Lsysc_nr_ok - # svc 0: system call number in %r1 - llgfr %r1,%r1 # clear high word in r1 - sth %r1,__PT_INT_CODE+2(%r11) - cghi %r1,NR_syscalls - jnl .Lsysc_nr_ok - slag %r8,%r1,3 -.Lsysc_nr_ok: - stg %r2,__PT_ORIG_GPR2(%r11) - stg %r7,STACK_FRAME_OVERHEAD(%r15) - lg %r9,0(%r8,%r10) # get system call add. - TSTMSK __TI_flags(%r12),_TIF_TRACE - jnz .Lsysc_tracesys - BASR_EX %r14,%r9 # call sys_xxxx - stg %r2,__PT_R2(%r11) # store return value - -.Lsysc_return: -#ifdef CONFIG_DEBUG_RSEQ - lgr %r2,%r11 - brasl %r14,rseq_syscall -#endif - LOCKDEP_SYS_EXIT -.Lsysc_tif: - DISABLE_INTS - TSTMSK __PT_FLAGS(%r11),_PIF_WORK - jnz .Lsysc_work - TSTMSK __TI_flags(%r12),_TIF_WORK - jnz .Lsysc_work # check for work - DEBUG_USER_ASCE + xgr %r1,%r1 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r8,%r8 + xgr %r9,%r9 + xgr %r10,%r10 + xgr %r11,%r11 + la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs + lgr %r3,%r14 + brasl %r14,__do_syscall lctlg %c1,%c1,__LC_USER_ASCE - BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP - TSTMSK __LC_CPU_FLAGS, _CIF_FPU - jz .Lsysc_skip_fpu - brasl %r14,load_fpu_regs -.Lsysc_skip_fpu: - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) stpt __LC_EXIT_TIMER - lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE - -# -# One of the work bits is on. Find out which one. -# -.Lsysc_work: - ENABLE_INTS - TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED - jo .Lsysc_reschedule - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART - jo .Lsysc_syscall_restart -#ifdef CONFIG_UPROBES - TSTMSK __TI_flags(%r12),_TIF_UPROBE - jo .Lsysc_uprobe_notify -#endif - TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE - jo .Lsysc_guarded_storage - TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP - jo .Lsysc_singlestep -#ifdef CONFIG_LIVEPATCH - TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING - jo .Lsysc_patch_pending # handle live patching just before - # signals and possible syscall restart -#endif - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART - jo .Lsysc_syscall_restart - TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) - jnz .Lsysc_sigpending - TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME - jo .Lsysc_notify_resume - j .Lsysc_return - -# -# _TIF_NEED_RESCHED is set, call schedule -# -.Lsysc_reschedule: - larl %r14,.Lsysc_return - jg schedule - -# -# _TIF_SIGPENDING is set, call do_signal -# -.Lsysc_sigpending: - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_signal - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL - jno .Lsysc_return -.Lsysc_do_syscall: - lghi %r13,__TASK_thread - lmg %r2,%r7,__PT_R2(%r11) # load svc arguments - lghi %r1,0 # svc 0 returns -ENOSYS - j .Lsysc_do_svc - -# -# _TIF_NOTIFY_RESUME is set, call do_notify_resume -# -.Lsysc_notify_resume: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg do_notify_resume - -# -# _TIF_UPROBE is set, call uprobe_notify_resume -# -#ifdef CONFIG_UPROBES -.Lsysc_uprobe_notify: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg uprobe_notify_resume -#endif - -# -# _TIF_GUARDED_STORAGE is set, call guarded_storage_load -# -.Lsysc_guarded_storage: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg gs_load_bc_cb -# -# _TIF_PATCH_PENDING is set, call klp_update_patch_state -# -#ifdef CONFIG_LIVEPATCH -.Lsysc_patch_pending: - lg %r2,__LC_CURRENT # pass pointer to task struct - larl %r14,.Lsysc_return - jg klp_update_patch_state -#endif - -# -# _PIF_PER_TRAP is set, call do_per_trap -# -.Lsysc_singlestep: - ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg do_per_trap - -# -# _PIF_SYSCALL_RESTART is set, repeat the current system call -# -.Lsysc_syscall_restart: - ni __PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART - lmg %r1,%r7,__PT_R1(%r11) # load svc arguments - lg %r2,__PT_ORIG_GPR2(%r11) - j .Lsysc_do_svc - -# -# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before -# and after the system call -# -.Lsysc_tracesys: - lgr %r2,%r11 # pass pointer to pt_regs - la %r3,0 - llgh %r0,__PT_INT_CODE+2(%r11) - stg %r0,__PT_R2(%r11) - brasl %r14,do_syscall_trace_enter - lghi %r0,NR_syscalls - clgr %r0,%r2 - jnh .Lsysc_tracenogo - sllg %r8,%r2,3 - lg %r9,0(%r8,%r10) - lmg %r3,%r7,__PT_R3(%r11) - stg %r7,STACK_FRAME_OVERHEAD(%r15) - lg %r2,__PT_ORIG_GPR2(%r11) - BASR_EX %r14,%r9 # call sys_xxx - stg %r2,__PT_R2(%r11) # store return value -.Lsysc_tracenogo: - TSTMSK __TI_flags(%r12),_TIF_TRACE - jz .Lsysc_return - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg do_syscall_trace_exit ENDPROC(system_call) # # a new process exits the kernel with ret_from_fork # ENTRY(ret_from_fork) - la %r11,STACK_FRAME_OVERHEAD(%r15) - lg %r12,__LC_CURRENT - brasl %r14,schedule_tail - tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? - jne .Lsysc_tracenogo - # it's a kernel thread - lmg %r9,%r10,__PT_R9(%r11) # load gprs - la %r2,0(%r10) - BASR_EX %r14,%r9 - j .Lsysc_tracenogo + lgr %r3,%r11 + brasl %r14,__ret_from_fork + lctlg %c1,%c1,__LC_USER_ASCE + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + stpt __LC_EXIT_TIMER + b __LC_RETURN_LPSWE ENDPROC(ret_from_fork) -ENTRY(kernel_thread_starter) - la %r2,0(%r10) - BASR_EX %r14,%r9 - j .Lsysc_tracenogo -ENDPROC(kernel_thread_starter) - /* * Program check handler routine */ ENTRY(pgm_check_handler) - stpt __LC_SYNC_ENTER_TIMER + stpt __LC_SYS_ENTER_TIMER BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC - lg %r10,__LC_LAST_BREAK - srag %r11,%r10,12 - jnz 0f - /* if __LC_LAST_BREAK is < 4096, it contains one of - * the lpswe addresses in lowcore. Set it to 1 (initial state) - * to prevent leaking that address to userspace. - */ - lghi %r10,1 -0: lg %r12,__LC_CURRENT - lghi %r11,0 + lg %r12,__LC_CURRENT + lghi %r10,0 lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # coming from user space? jno .Lpgm_skip_asce lctlg %c1,%c1,__LC_KERNEL_ASCE - j 3f + j 3f # -> fault in user space .Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a @@ -653,7 +354,7 @@ ENTRY(pgm_check_handler) ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - lghi %r11,_PIF_GUEST_FAULT + lghi %r10,_PIF_GUEST_FAULT #endif 1: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 2f # -> enabled, can't be a double fault @@ -661,82 +362,37 @@ ENTRY(pgm_check_handler) jnz .Lpgm_svcper # -> single stepped svc 2: CHECK_STACK __LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 5f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f -3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + # CHECK_VMAP_STACK branches to stack_overflow or 4f + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f +3: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK - lgr %r14,%r12 - aghi %r14,__TASK_thread # pointer to thread_struct - lghi %r13,__LC_PGM_TDB - tm __LC_PGM_ILC+2,0x02 # check for transaction abort - jz 4f - mvc __THREAD_trap_tdb(256,%r14),0(%r13) -4: stg %r10,__THREAD_last_break(%r14) -5: lgr %r13,%r11 - la %r11,STACK_FRAME_OVERHEAD(%r15) +4: la %r11,STACK_FRAME_OVERHEAD(%r15) + stg %r10,__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + stmg %r8,%r9,__PT_PSW(%r11) + # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 xgr %r6,%r6 xgr %r7,%r7 - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - stmg %r8,%r9,__PT_PSW(%r11) - mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC - mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE - stg %r13,__PT_FLAGS(%r11) - stg %r10,__PT_ARGS(%r11) - tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 6f - tmhh %r8,0x0001 # kernel per event ? - jz .Lpgm_kprobe - oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP - mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS - mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE - mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -6: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - RESTORE_SM_CLEAR_PER - larl %r1,pgm_check_table - llgh %r10,__PT_INT_CODE+2(%r11) - nill %r10,0x007f - sll %r10,3 - je .Lpgm_return - lg %r9,0(%r10,%r1) # load address of handler routine - lgr %r2,%r11 # pass pointer to pt_regs - BASR_EX %r14,%r9 # branch to interrupt-handler -.Lpgm_return: - LOCKDEP_SYS_EXIT - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lpgm_restore - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL - jo .Lsysc_do_syscall - j .Lsysc_tif -.Lpgm_restore: - DISABLE_INTS - TSTMSK __LC_CPU_FLAGS, _CIF_FPU - jz .Lpgm_skip_fpu - brasl %r14,load_fpu_regs -.Lpgm_skip_fpu: - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + lgr %r2,%r11 + brasl %r14,__do_pgm_check + tmhh %r8,0x0001 # returning to user space? + jno .Lpgm_exit_kernel + lctlg %c1,%c1,__LC_USER_ASCE + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER - lmg %r0,%r15,__PT_R0(%r11) +.Lpgm_exit_kernel: + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) b __LC_RETURN_LPSWE -# -# PER event in supervisor state, must be kprobes -# -.Lpgm_kprobe: - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - RESTORE_SM_CLEAR_PER - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_per_trap - j .Lpgm_return - # # single stepped system call # @@ -744,26 +400,26 @@ ENTRY(pgm_check_handler) mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW larl %r14,.Lsysc_per stg %r14,__LC_RETURN_PSW+8 - lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP + lghi %r14,1 lpswe __LC_RETURN_PSW # branch to .Lsysc_per ENDPROC(pgm_check_handler) /* - * IO interrupt handler routine + * Interrupt handler macro used for external and IO interrupts. */ -ENTRY(io_int_handler) +.macro INT_HANDLER name,lc_old_psw,handler +ENTRY(\name) STCK __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER + stpt __LC_SYS_ENTER_TIMER BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT - lmg %r8,%r9,__LC_IO_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK + lmg %r8,%r9,\lc_old_psw + SWITCH_KERNEL __LC_SAVE_AREA_ASYNC stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 @@ -772,322 +428,48 @@ ENTRY(io_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) - tm __PT_PSW+1(%r11),0x01 # coming from user space? - jno .Lio_skip_asce + tm %r8,0x0001 # coming from user space? + jno 1f lctlg %c1,%c1,__LC_KERNEL_ASCE -.Lio_skip_asce: - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - TRACE_IRQS_OFF -.Lio_loop: - lgr %r2,%r11 # pass pointer to pt_regs - lghi %r3,IO_INTERRUPT - tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? - jz .Lio_call - lghi %r3,THIN_INTERRUPT -.Lio_call: - brasl %r14,do_IRQ - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR - jz .Lio_return - tpi 0 - jz .Lio_return - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - j .Lio_loop -.Lio_return: - LOCKDEP_SYS_EXIT - TSTMSK __TI_flags(%r12),_TIF_WORK - jnz .Lio_work # there is work to do (signals etc.) - TSTMSK __LC_CPU_FLAGS,_CIF_WORK - jnz .Lio_work -.Lio_restore: - TRACE_IRQS_ON +1: lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,\handler mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lio_exit_kernel - DEBUG_USER_ASCE + tmhh %r8,0x0001 # returning to user ? + jno 2f lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER -.Lio_exit_kernel: - lmg %r0,%r15,__PT_R0(%r11) +2: lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE -.Lio_done: - -# -# There is work todo, find out in which context we have been interrupted: -# 1) if we return to user space we can do all _TIF_WORK work -# 2) if we return to kernel code and kvm is enabled check if we need to -# modify the psw to leave SIE -# 3) if we return to kernel code and preemptive scheduling is enabled check -# the preemption counter and if it is zero call preempt_schedule_irq -# Before any work can be done, a switch to the kernel stack is required. -# -.Lio_work: - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jo .Lio_work_user # yes -> do resched & signal -#ifdef CONFIG_PREEMPTION - # check for preemptive scheduling - icm %r0,15,__LC_PREEMPT_COUNT - jnz .Lio_restore # preemption is disabled - TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED - jno .Lio_restore - # switch to kernel stack - lg %r1,__PT_R15(%r11) - aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r15,%r1 - brasl %r14,preempt_schedule_irq - j .Lio_return -#else - j .Lio_restore -#endif - -# -# Need to do work before returning to userspace, switch to kernel stack -# -.Lio_work_user: - lg %r1,__LC_KERNEL_STACK - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r15,%r1 - -# -# One of the work bits is on. Find out which one. -# - TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED - jo .Lio_reschedule -#ifdef CONFIG_LIVEPATCH - TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING - jo .Lio_patch_pending -#endif - TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) - jnz .Lio_sigpending - TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME - jo .Lio_notify_resume - TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE - jo .Lio_guarded_storage - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jo .Lio_vxrs - j .Lio_return - -# -# CIF_FPU is set, restore floating-point controls and floating-point registers. -# -.Lio_vxrs: - larl %r14,.Lio_return - jg load_fpu_regs - -# -# _TIF_GUARDED_STORAGE is set, call guarded_storage_load -# -.Lio_guarded_storage: - ENABLE_INTS_TRACE - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,gs_load_bc_cb - DISABLE_INTS_TRACE - j .Lio_return +ENDPROC(\name) +.endm -# -# _TIF_NEED_RESCHED is set, call schedule -# -.Lio_reschedule: - ENABLE_INTS_TRACE - brasl %r14,schedule # call scheduler - DISABLE_INTS_TRACE - j .Lio_return - -# -# _TIF_PATCH_PENDING is set, call klp_update_patch_state -# -#ifdef CONFIG_LIVEPATCH -.Lio_patch_pending: - lg %r2,__LC_CURRENT # pass pointer to task struct - larl %r14,.Lio_return - jg klp_update_patch_state -#endif - -# -# _TIF_SIGPENDING or is set, call do_signal -# -.Lio_sigpending: - ENABLE_INTS_TRACE - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_signal - DISABLE_INTS_TRACE - j .Lio_return - -# -# _TIF_NOTIFY_RESUME or is set, call do_notify_resume -# -.Lio_notify_resume: - ENABLE_INTS_TRACE - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_notify_resume - DISABLE_INTS_TRACE - j .Lio_return -ENDPROC(io_int_handler) - -/* - * External interrupt handler routine - */ -ENTRY(ext_int_handler) - STCK __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER - BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lg %r12,__LC_CURRENT - lmg %r8,%r9,__LC_EXT_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK - stmg %r0,%r7,__PT_R0(%r11) - # clear user controlled registers to prevent speculative use - xgr %r0,%r0 - xgr %r1,%r1 - xgr %r2,%r2 - xgr %r3,%r3 - xgr %r4,%r4 - xgr %r5,%r5 - xgr %r6,%r6 - xgr %r7,%r7 - xgr %r10,%r10 - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - stmg %r8,%r9,__PT_PSW(%r11) - tm __PT_PSW+1(%r11),0x01 # coming from user space? - jno .Lext_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE -.Lext_skip_asce: - lghi %r1,__LC_EXT_PARAMS2 - mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR - mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS - mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) - xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - TRACE_IRQS_OFF - lgr %r2,%r11 # pass pointer to pt_regs - lghi %r3,EXT_INTERRUPT - brasl %r14,do_IRQ - j .Lio_return -ENDPROC(ext_int_handler) +INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq +INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq /* * Load idle PSW. */ ENTRY(psw_idle) stg %r3,__SF_EMPTY(%r15) - larl %r1,.Lpsw_idle_exit + larl %r1,psw_idle_exit stg %r1,__SF_EMPTY+8(%r15) larl %r1,smp_cpu_mtid llgf %r1,0(%r1) ltgr %r1,%r1 jz .Lpsw_idle_stcctm - .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15) + .insn rsy,0xeb0000000017,%r1,5,__MT_CYCLES_ENTER(%r2) .Lpsw_idle_stcctm: oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT BPON STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) lpswe __SF_EMPTY(%r15) -.Lpsw_idle_exit: +.globl psw_idle_exit +psw_idle_exit: BR_EX %r14 ENDPROC(psw_idle) -/* - * Store floating-point controls and floating-point or vector register - * depending whether the vector facility is available. A critical section - * cleanup assures that the registers are stored even if interrupted for - * some other work. The CIF_FPU flag is set to trigger a lazy restore - * of the register contents at return from io or a system call. - */ -ENTRY(save_fpu_regs) - stnsm __SF_EMPTY(%r15),0xfc - lg %r2,__LC_CURRENT - aghi %r2,__TASK_thread - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jo .Lsave_fpu_regs_exit - stfpc __THREAD_FPU_fpc(%r2) - lg %r3,__THREAD_FPU_regs(%r2) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - jz .Lsave_fpu_regs_fp # no -> store FP regs - VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) - VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) - j .Lsave_fpu_regs_done # -> set CIF_FPU flag -.Lsave_fpu_regs_fp: - std 0,0(%r3) - std 1,8(%r3) - std 2,16(%r3) - std 3,24(%r3) - std 4,32(%r3) - std 5,40(%r3) - std 6,48(%r3) - std 7,56(%r3) - std 8,64(%r3) - std 9,72(%r3) - std 10,80(%r3) - std 11,88(%r3) - std 12,96(%r3) - std 13,104(%r3) - std 14,112(%r3) - std 15,120(%r3) -.Lsave_fpu_regs_done: - oi __LC_CPU_FLAGS+7,_CIF_FPU -.Lsave_fpu_regs_exit: - ssm __SF_EMPTY(%r15) - BR_EX %r14 -.Lsave_fpu_regs_end: -ENDPROC(save_fpu_regs) -EXPORT_SYMBOL(save_fpu_regs) - -/* - * Load floating-point controls and floating-point or vector registers. - * A critical section cleanup assures that the register contents are - * loaded even if interrupted for some other work. - * - * There are special calling conventions to fit into sysc and io return work: - * %r15: - * The function requires: - * %r4 - */ -load_fpu_regs: - stnsm __SF_EMPTY(%r15),0xfc - lg %r4,__LC_CURRENT - aghi %r4,__TASK_thread - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jno .Lload_fpu_regs_exit - lfpc __THREAD_FPU_fpc(%r4) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz .Lload_fpu_regs_fp # -> no VX, load FP regs - VLM %v0,%v15,0,%r4 - VLM %v16,%v31,256,%r4 - j .Lload_fpu_regs_done -.Lload_fpu_regs_fp: - ld 0,0(%r4) - ld 1,8(%r4) - ld 2,16(%r4) - ld 3,24(%r4) - ld 4,32(%r4) - ld 5,40(%r4) - ld 6,48(%r4) - ld 7,56(%r4) - ld 8,64(%r4) - ld 9,72(%r4) - ld 10,80(%r4) - ld 11,88(%r4) - ld 12,96(%r4) - ld 13,104(%r4) - ld 14,112(%r4) - ld 15,120(%r4) -.Lload_fpu_regs_done: - ni __LC_CPU_FLAGS+7,255-_CIF_FPU -.Lload_fpu_regs_exit: - ssm __SF_EMPTY(%r15) - BR_EX %r14 -.Lload_fpu_regs_end: -ENDPROC(load_fpu_regs) - /* * Machine check handler routines */ @@ -1146,11 +528,8 @@ ENTRY(mcck_int_handler) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID jo 3f - la %r14,__LC_SYNC_ENTER_TIMER - clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER - jl 0f - la %r14,__LC_ASYNC_ENTER_TIMER -0: clc 0(8,%r14),__LC_EXIT_TIMER + la %r14,__LC_SYS_ENTER_TIMER + clc 0(8,%r14),__LC_EXIT_TIMER jl 1f la %r14,__LC_EXIT_TIMER 1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER @@ -1165,14 +544,13 @@ ENTRY(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK + SWITCH_KERNEL __LC_GPREGS_SAVE_AREA+64 .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 @@ -1183,7 +561,6 @@ ENTRY(mcck_int_handler) stmg %r8,%r9,__PT_PSW(%r11) la %r14,4095 mvc __PT_CR1(8,%r11),__LC_CREGS_SAVE_AREA-4095+8(%r14) - lctlg %c1,%c1,__LC_KERNEL_ASCE xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs @@ -1195,9 +572,7 @@ ENTRY(mcck_int_handler) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - TRACE_IRQS_OFF brasl %r14,s390_handle_mcck - TRACE_IRQS_ON .Lmcck_return: lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index a16c33b32ab0..3d0c0ac5c20e 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -17,8 +17,9 @@ void io_int_handler(void); void mcck_int_handler(void); void restart_int_handler(void); -asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); -asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); +void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs); +void __do_pgm_check(struct pt_regs *regs); +void __do_syscall(struct pt_regs *regs, int per_trap); void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); @@ -48,9 +49,7 @@ void translation_exception(struct pt_regs *regs); void vector_exception(struct pt_regs *regs); void monitor_event_exception(struct pt_regs *regs); -void do_per_trap(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); -void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, @@ -58,7 +57,8 @@ void handle_signal32(struct ksignal *ksig, sigset_t *oldset, void do_notify_resume(struct pt_regs *regs); void __init init_IRQ(void); -void do_IRQ(struct pt_regs *regs, int irq); +void do_io_irq(struct pt_regs *regs); +void do_ext_irq(struct pt_regs *regs); void do_restart(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); @@ -82,8 +82,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); -void gs_load_bc_cb(struct pt_regs *regs); - unsigned long stack_alloc(void); void stack_free(unsigned long stack); diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 0da378e2eb25..d864c9a325e2 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -175,3 +175,91 @@ void __kernel_fpu_end(struct kernel_fpu *state, u32 flags) : "1", "cc"); } EXPORT_SYMBOL(__kernel_fpu_end); + +void __load_fpu_regs(void) +{ + struct fpu *state = ¤t->thread.fpu; + unsigned long *regs = current->thread.fpu.regs; + + asm volatile("lfpc %0" : : "Q" (state->fpc)); + if (likely(MACHINE_HAS_VX)) { + asm volatile("lgr 1,%0\n" + "VLM 0,15,0,1\n" + "VLM 16,31,256,1\n" + : + : "d" (regs) + : "1", "cc", "memory"); + } else { + asm volatile("ld 0,%0" : : "Q" (regs[0])); + asm volatile("ld 1,%0" : : "Q" (regs[1])); + asm volatile("ld 2,%0" : : "Q" (regs[2])); + asm volatile("ld 3,%0" : : "Q" (regs[3])); + asm volatile("ld 4,%0" : : "Q" (regs[4])); + asm volatile("ld 5,%0" : : "Q" (regs[5])); + asm volatile("ld 6,%0" : : "Q" (regs[6])); + asm volatile("ld 7,%0" : : "Q" (regs[7])); + asm volatile("ld 8,%0" : : "Q" (regs[8])); + asm volatile("ld 9,%0" : : "Q" (regs[9])); + asm volatile("ld 10,%0" : : "Q" (regs[10])); + asm volatile("ld 11,%0" : : "Q" (regs[11])); + asm volatile("ld 12,%0" : : "Q" (regs[12])); + asm volatile("ld 13,%0" : : "Q" (regs[13])); + asm volatile("ld 14,%0" : : "Q" (regs[14])); + asm volatile("ld 15,%0" : : "Q" (regs[15])); + } + clear_cpu_flag(CIF_FPU); +} +EXPORT_SYMBOL(__load_fpu_regs); + +void load_fpu_regs(void) +{ + raw_local_irq_disable(); + __load_fpu_regs(); + raw_local_irq_enable(); +} +EXPORT_SYMBOL(load_fpu_regs); + +void save_fpu_regs(void) +{ + unsigned long flags, *regs; + struct fpu *state; + + local_irq_save(flags); + + if (test_cpu_flag(CIF_FPU)) + goto out; + + state = ¤t->thread.fpu; + regs = current->thread.fpu.regs; + + asm volatile("stfpc %0" : "=Q" (state->fpc)); + if (likely(MACHINE_HAS_VX)) { + asm volatile("lgr 1,%0\n" + "VSTM 0,15,0,1\n" + "VSTM 16,31,256,1\n" + : + : "d" (regs) + : "1", "cc", "memory"); + } else { + asm volatile("std 0,%0" : "=Q" (regs[0])); + asm volatile("std 1,%0" : "=Q" (regs[1])); + asm volatile("std 2,%0" : "=Q" (regs[2])); + asm volatile("std 3,%0" : "=Q" (regs[3])); + asm volatile("std 4,%0" : "=Q" (regs[4])); + asm volatile("std 5,%0" : "=Q" (regs[5])); + asm volatile("std 6,%0" : "=Q" (regs[6])); + asm volatile("std 7,%0" : "=Q" (regs[7])); + asm volatile("std 8,%0" : "=Q" (regs[8])); + asm volatile("std 9,%0" : "=Q" (regs[9])); + asm volatile("std 10,%0" : "=Q" (regs[10])); + asm volatile("std 11,%0" : "=Q" (regs[11])); + asm volatile("std 12,%0" : "=Q" (regs[12])); + asm volatile("std 13,%0" : "=Q" (regs[13])); + asm volatile("std 14,%0" : "=Q" (regs[14])); + asm volatile("std 15,%0" : "=Q" (regs[15])); + } + set_cpu_flag(CIF_FPU); +out: + local_irq_restore(flags); +} +EXPORT_SYMBOL(save_fpu_regs); diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index a5d4d80d6ede..812073ea073e 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -14,12 +14,36 @@ #include #include #include +#include #include #include #include "entry.h" static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); +void account_idle_time_irq(void) +{ + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + u64 cycles_new[8]; + int i; + + clear_cpu_flag(CIF_ENABLED_WAIT); + if (smp_cpu_mtid) { + stcctm(MT_DIAG, smp_cpu_mtid, cycles_new); + for (i = 0; i < smp_cpu_mtid; i++) + this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); + } + + idle->clock_idle_exit = S390_lowcore.int_clock; + idle->timer_idle_exit = S390_lowcore.sys_enter_timer; + + S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock; + S390_lowcore.last_update_clock = idle->clock_idle_exit; + + S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter; + S390_lowcore.last_update_timer = idle->timer_idle_exit; +} + void arch_cpu_idle(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f8a8b9428ae2..c6d40bcf4a68 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -95,19 +96,97 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, }; -void do_IRQ(struct pt_regs *regs, int irq) +static void do_IRQ(struct pt_regs *regs, int irq) { - struct pt_regs *old_regs; - - old_regs = set_irq_regs(regs); - irq_enter(); if (tod_after_eq(S390_lowcore.int_clock, S390_lowcore.clock_comparator)) /* Serve timer interrupts first. */ clock_comparator_work(); generic_handle_irq(irq); +} + +static int on_async_stack(void) +{ + unsigned long frame = current_frame_address(); + + return !!!((S390_lowcore.async_stack - frame) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)); +} + +static void do_irq_async(struct pt_regs *regs, int irq) +{ + if (on_async_stack()) + do_IRQ(regs, irq); + else + CALL_ON_STACK(do_IRQ, S390_lowcore.async_stack, 2, regs, irq); +} + +static int irq_pending(struct pt_regs *regs) +{ + int cc; + + asm volatile("tpi 0\n" + "ipm %0" : "=d" (cc) : : "cc"); + return cc >> 28; +} + +void noinstr do_io_irq(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + struct pt_regs *old_regs = set_irq_regs(regs); + int from_idle; + + irq_enter(); + + if (user_mode(regs)) + update_timer_sys(); + + from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; + if (from_idle) + account_idle_time_irq(); + + do { + memcpy(®s->int_code, &S390_lowcore.subchannel_id, 12); + if (S390_lowcore.io_int_word & BIT(31)) + do_irq_async(regs, THIN_INTERRUPT); + else + do_irq_async(regs, IO_INTERRUPT); + } while (MACHINE_IS_LPAR && irq_pending(regs)); + + irq_exit(); + set_irq_regs(old_regs); + irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); +} + +void noinstr do_ext_irq(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + struct pt_regs *old_regs = set_irq_regs(regs); + int from_idle; + + irq_enter(); + + if (user_mode(regs)) + update_timer_sys(); + + memcpy(®s->int_code, &S390_lowcore.ext_cpu_addr, 4); + regs->int_parm = S390_lowcore.ext_params; + regs->int_parm_long = *(unsigned long *)S390_lowcore.ext_params2; + + from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; + if (from_idle) + account_idle_time_irq(); + + do_irq_async(regs, EXT_INTERRUPT); + irq_exit(); set_irq_regs(old_regs); + irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); } static void show_msi_interrupt(struct seq_file *p, int irq) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 86c8d5370e7f..11f8c296f60d 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -131,12 +131,11 @@ static notrace void s390_handle_damage(void) NOKPROBE_SYMBOL(s390_handle_damage); /* - * Main machine check handler function. Will be called with interrupts enabled - * or disabled and machine checks enabled or disabled. + * Main machine check handler function. Will be called with interrupts disabled + * and machine checks enabled. */ -void s390_handle_mcck(void) +void __s390_handle_mcck(void) { - unsigned long flags; struct mcck_struct mcck; /* @@ -144,12 +143,10 @@ void s390_handle_mcck(void) * machine checks. Afterwards delete the old state and enable machine * checks again. */ - local_irq_save(flags); local_mcck_disable(); mcck = *this_cpu_ptr(&cpu_mcck); memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck)); local_mcck_enable(); - local_irq_restore(flags); if (mcck.channel_report) crw_handle_channel_report(); @@ -181,8 +178,13 @@ void s390_handle_mcck(void) do_exit(SIGSEGV); } } -EXPORT_SYMBOL_GPL(s390_handle_mcck); +void noinstr s390_handle_mcck(void) +{ + trace_hardirqs_off(); + __s390_handle_mcck(); + trace_hardirqs_on(); +} /* * returns 0 if all required registers are available * returns 1 otherwise @@ -344,6 +346,9 @@ int notrace s390_do_machine_check(struct pt_regs *regs) int mcck_pending = 0; nmi_enter(); + + if (user_mode(regs)) + update_timer_mcck(); inc_irq_stat(NMI_NMI); mci.val = S390_lowcore.mcck_interruption_code; mcck = this_cpu_ptr(&cpu_mcck); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index bc3ca54edfb4..367bd000f6d1 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -43,9 +44,22 @@ #include #include "entry.h" -asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); +void ret_from_fork(void) asm("ret_from_fork"); -extern void kernel_thread_starter(void); +void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs) +{ + void (*func)(void *arg); + + schedule_tail(prev); + + if (!user_mode(regs)) { + /* Kernel thread */ + func = (void *)regs->gprs[9]; + func((void *)regs->gprs[10]); + } + clear_pt_regs_flag(regs, PIF_SYSCALL); + syscall_exit_to_user_mode(regs); +} void flush_thread(void) { @@ -108,10 +122,12 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, p->thread.last_break = 1; frame->sf.back_chain = 0; + frame->sf.gprs[5] = (unsigned long)frame + sizeof(struct stack_frame); + frame->sf.gprs[6] = (unsigned long)p; /* new return point is ret_from_fork */ - frame->sf.gprs[8] = (unsigned long) ret_from_fork; + frame->sf.gprs[8] = (unsigned long)ret_from_fork; /* fake return stack for resume(), don't go back to schedule */ - frame->sf.gprs[9] = (unsigned long) frame; + frame->sf.gprs[9] = (unsigned long)frame; /* Store access registers to kernel stack of new process. */ if (unlikely(p->flags & PF_KTHREAD)) { @@ -120,10 +136,10 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; frame->childregs.psw.addr = - (unsigned long) kernel_thread_starter; + (unsigned long)__ret_from_fork; frame->childregs.gprs[9] = new_stackp; /* function */ frame->childregs.gprs[10] = arg; - frame->childregs.gprs[11] = (unsigned long) do_exit; + frame->childregs.gprs[11] = (unsigned long)do_exit; frame->childregs.orig_gpr2 = -1; return 0; @@ -153,7 +169,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, return 0; } -asmlinkage void execve_tail(void) +void execve_tail(void) { current->thread.fpu.fpc = 0; asm volatile("sfpc %0" : : "d" (0)); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index a76dd27fb2e8..18b3416fd663 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -7,6 +7,7 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ +#include "asm/ptrace.h" #include #include #include @@ -37,9 +38,6 @@ #include "compat_ptrace.h" #endif -#define CREATE_TRACE_POINTS -#include - void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); @@ -140,7 +138,7 @@ void ptrace_disable(struct task_struct *task) memset(&task->thread.per_user, 0, sizeof(task->thread.per_user)); memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); - clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP); + clear_tsk_thread_flag(task, TIF_PER_TRAP); task->thread.per_flags = 0; } @@ -322,25 +320,6 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } -static void fixup_int_code(struct task_struct *child, addr_t data) -{ - struct pt_regs *regs = task_pt_regs(child); - int ilc = regs->int_code >> 16; - u16 insn; - - if (ilc > 6) - return; - - if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), - &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) - return; - - /* double check that tracee stopped on svc instruction */ - if ((insn >> 8) != 0xa) - return; - - regs->int_code = 0x20000 | (data & 0xffff); -} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -374,10 +353,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) } if (test_pt_regs_flag(regs, PIF_SYSCALL) && - addr == offsetof(struct user, regs.gprs[2])) - fixup_int_code(child, data); - *(addr_t *)((addr_t) ®s->psw + addr) = data; + addr == offsetof(struct user, regs.gprs[2])) { + struct pt_regs *regs = task_pt_regs(child); + regs->int_code = 0x20000 | (data & 0xffff); + } + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* * access registers are stored in the thread structure @@ -742,10 +723,12 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { - if (test_pt_regs_flag(regs, PIF_SYSCALL) && - addr == offsetof(struct compat_user, regs.gprs[2])) - fixup_int_code(child, data); + addr == offsetof(struct compat_user, regs.gprs[2])) { + struct pt_regs *regs = task_pt_regs(child); + + regs->int_code = 0x20000 | (data & 0xffff); + } /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } @@ -862,82 +845,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } #endif -asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) -{ - unsigned long mask = -1UL; - long ret = -1; - - if (is_compat_task()) - mask = 0xffffffff; - - /* - * The sysc_tracesys code in entry.S stored the system - * call number to gprs[2]. - */ - if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) { - /* - * Tracing decided this syscall should not happen. Skip - * the system call and the system call restart handling. - */ - goto skip; - } - -#ifdef CONFIG_SECCOMP - /* Do the secure computing check after ptrace. */ - if (unlikely(test_thread_flag(TIF_SECCOMP))) { - struct seccomp_data sd; - - if (is_compat_task()) { - sd.instruction_pointer = regs->psw.addr & 0x7fffffff; - sd.arch = AUDIT_ARCH_S390; - } else { - sd.instruction_pointer = regs->psw.addr; - sd.arch = AUDIT_ARCH_S390X; - } - - sd.nr = regs->int_code & 0xffff; - sd.args[0] = regs->orig_gpr2 & mask; - sd.args[1] = regs->gprs[3] & mask; - sd.args[2] = regs->gprs[4] & mask; - sd.args[3] = regs->gprs[5] & mask; - sd.args[4] = regs->gprs[6] & mask; - sd.args[5] = regs->gprs[7] & mask; - - if (__secure_computing(&sd) == -1) - goto skip; - } -#endif /* CONFIG_SECCOMP */ - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->int_code & 0xffff); - - - audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask, - regs->gprs[3] &mask, regs->gprs[4] &mask, - regs->gprs[5] &mask); - - if ((signed long)regs->gprs[2] >= NR_syscalls) { - regs->gprs[2] = -ENOSYS; - ret = -ENOSYS; - } - return regs->gprs[2]; -skip: - clear_pt_regs_flag(regs, PIF_SYSCALL); - return ret; -} - -asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) -{ - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->gprs[2]); - - if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); -} - /* * user_regset definitions. */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1fbed91c73bc..c7feda84edbb 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -411,8 +411,7 @@ static void __init setup_lowcore_dat_off(void) memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, sizeof(lc->alt_stfle_fac_list)); nmi_alloc_boot_cpu(lc); - lc->sync_enter_timer = S390_lowcore.sync_enter_timer; - lc->async_enter_timer = S390_lowcore.async_enter_timer; + lc->sys_enter_timer = S390_lowcore.sys_enter_timer; lc->exit_timer = S390_lowcore.exit_timer; lc->user_timer = S390_lowcore.user_timer; lc->system_timer = S390_lowcore.system_timer; diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index b27b6c1f058d..fce1b2a28a40 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -170,6 +170,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) fpregs_load(&user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); return 0; } @@ -459,7 +460,8 @@ static void handle_signal(struct ksignal *ksig, sigset_t *oldset, * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -void do_signal(struct pt_regs *regs) + +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { struct ksignal ksig; sigset_t *oldset = sigmask_to_save(); @@ -472,7 +474,7 @@ void do_signal(struct pt_regs *regs) current->thread.system_call = test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; - if (test_thread_flag(TIF_SIGPENDING) && get_signal(&ksig)) { + if (has_signal && get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ if (current->thread.system_call) { regs->int_code = current->thread.system_call; @@ -498,6 +500,7 @@ void do_signal(struct pt_regs *regs) } /* No longer in a system call */ clear_pt_regs_flag(regs, PIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); rseq_signal_deliver(&ksig, regs); if (is_compat_task()) handle_signal32(&ksig, oldset, regs); @@ -508,6 +511,7 @@ void do_signal(struct pt_regs *regs) /* No handlers present - check for system call restart */ clear_pt_regs_flag(regs, PIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); if (current->thread.system_call) { regs->int_code = current->thread.system_call; switch (regs->gprs[2]) { @@ -520,9 +524,9 @@ void do_signal(struct pt_regs *regs) case -ERESTARTNOINTR: /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - set_pt_regs_flag(regs, PIF_SYSCALL); + set_pt_regs_flag(regs, PIF_SYSCALL_RESTART); if (test_thread_flag(TIF_SINGLE_STEP)) - clear_pt_regs_flag(regs, PIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); break; } } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 27c763014114..c5abbb94ac6e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -499,7 +499,7 @@ static void smp_handle_ext_call(void) if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); if (test_bit(ec_mcck_pending, &bits)) - s390_handle_mcck(); + __s390_handle_mcck(); } static void do_ext_call_interrupt(struct ext_code ext_code, diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c deleted file mode 100644 index 202fa73ac167..000000000000 --- a/arch/s390/kernel/sys_s390.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * S390 version - * Copyright IBM Corp. 1999, 2000 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Thomas Spatzier (tspat@de.ibm.com) - * - * Derived from "arch/i386/kernel/sys_i386.c" - * - * This file contains various random system calls that - * have a non-standard calling sequence on the Linux/s390 - * platform. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "entry.h" - -/* - * Perform the mmap() system call. Linux for S/390 isn't able to handle more - * than 5 system call parameters, so this system call uses a memory block - * for parameter passing. - */ - -struct s390_mmap_arg_struct { - unsigned long addr; - unsigned long len; - unsigned long prot; - unsigned long flags; - unsigned long fd; - unsigned long offset; -}; - -SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) -{ - struct s390_mmap_arg_struct a; - int error = -EFAULT; - - if (copy_from_user(&a, arg, sizeof(a))) - goto out; - error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; -} - -#ifdef CONFIG_SYSVIPC -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls. - */ -SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, - unsigned long, third, void __user *, ptr) -{ - if (call >> 16) - return -EINVAL; - /* The s390 sys_ipc variant has only five parameters instead of six - * like the generic variant. The only difference is the handling of - * the SEMTIMEDOP subcall where on s390 the third parameter is used - * as a pointer to a struct timespec where the generic variant uses - * the fifth parameter. - * Therefore we can call the generic variant by simply passing the - * third parameter also as fifth parameter. - */ - return ksys_ipc(call, first, second, third, ptr, third); -} -#endif /* CONFIG_SYSVIPC */ - -SYSCALL_DEFINE1(s390_personality, unsigned int, personality) -{ - unsigned int ret = current->personality; - - if (personality(current->personality) == PER_LINUX32 && - personality(personality) == PER_LINUX) - personality |= PER_LINUX32; - - if (personality != 0xffffffff) - set_personality(personality); - - if (personality(ret) == PER_LINUX32) - ret &= ~PER_LINUX32; - - return ret; -} - -SYSCALL_DEFINE0(ni_syscall) -{ - return -ENOSYS; -} diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c new file mode 100644 index 000000000000..25c0fb19b0a5 --- /dev/null +++ b/arch/s390/kernel/syscall.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Thomas Spatzier (tspat@de.ibm.com) + * + * Derived from "arch/i386/kernel/sys_i386.c" + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/s390 + * platform. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "entry.h" + +/* + * Perform the mmap() system call. Linux for S/390 isn't able to handle more + * than 5 system call parameters, so this system call uses a memory block + * for parameter passing. + */ + +struct s390_mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) +{ + struct s390_mmap_arg_struct a; + int error = -EFAULT; + + if (copy_from_user(&a, arg, sizeof(a))) + goto out; + error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); +out: + return error; +} + +#ifdef CONFIG_SYSVIPC +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls. + */ +SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr) +{ + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return ksys_ipc(call, first, second, third, ptr, third); +} +#endif /* CONFIG_SYSVIPC */ + +SYSCALL_DEFINE1(s390_personality, unsigned int, personality) +{ + unsigned int ret = current->personality; + + if (personality(current->personality) == PER_LINUX32 && + personality(personality) == PER_LINUX) + personality |= PER_LINUX32; + + if (personality != 0xffffffff) + set_personality(personality); + + if (personality(ret) == PER_LINUX32) + ret &= ~PER_LINUX32; + + return ret; +} + +SYSCALL_DEFINE0(ni_syscall) +{ + return -ENOSYS; +} + +void do_syscall(struct pt_regs *regs) +{ + unsigned long nr; + + nr = regs->int_code & 0xffff; + if (!nr) { + nr = regs->gprs[1] & 0xffff; + regs->int_code &= ~0xffffUL; + regs->int_code |= nr; + } + + regs->gprs[2] = nr; + + nr = syscall_enter_from_user_mode_work(regs, nr); + + /* + * In the s390 ptrace ABI, both the syscall number and the return value + * use gpr2. However, userspace puts the syscall number either in the + * svc instruction itself, or uses gpr1. To make at least skipping syscalls + * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here + * and if set, the syscall will be skipped. + */ + if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) { + regs->gprs[2] = -ENOSYS; + if (likely(nr < NR_syscalls)) { + regs->gprs[2] = current->thread.sys_call_table[nr]( + regs->orig_gpr2, regs->gprs[3], + regs->gprs[4], regs->gprs[5], + regs->gprs[6], regs->gprs[7]); + } + } else { + clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); + } + syscall_exit_to_user_mode_work(regs); +} + +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) +{ + enter_from_user_mode(regs); + + memcpy(®s->gprs[8], S390_lowcore.save_area_sync, 8 * sizeof(unsigned long)); + memcpy(®s->int_code, &S390_lowcore.svc_ilc, sizeof(regs->int_code)); + regs->psw = S390_lowcore.svc_old_psw; + + update_timer_sys(); + + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + + if (per_trap) + set_thread_flag(TIF_PER_TRAP); + + for (;;) { + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); + do_syscall(regs); + if (!test_pt_regs_flag(regs, PIF_SYSCALL_RESTART)) + break; + local_irq_enable(); + } + exit_to_user_mode(); +} diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 8d1e8a1a97df..db7dd59b570c 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -13,6 +13,8 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'asm.s'. */ +#include "asm/irqflags.h" +#include "asm/ptrace.h" #include #include #include @@ -23,7 +25,9 @@ #include #include #include +#include #include +#include #include "entry.h" static inline void __user *get_trap_ip(struct pt_regs *regs) @@ -288,3 +292,64 @@ void __init trap_init(void) local_mcck_enable(); test_monitor_call(); } + +void noinstr __do_pgm_check(struct pt_regs *regs) +{ + unsigned long last_break = S390_lowcore.breaking_event_addr; + unsigned int trapnr, syscall_redirect = 0; + irqentry_state_t state; + + regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc; + regs->int_parm_long = S390_lowcore.trans_exc_code; + + state = irqentry_enter(regs); + + if (user_mode(regs)) { + update_timer_sys(); + if (last_break < 4096) + last_break = 1; + current->thread.last_break = last_break; + regs->args[0] = last_break; + } + + if (S390_lowcore.pgm_code & 0x0200) { + /* transaction abort */ + memcpy(¤t->thread.trap_tdb, &S390_lowcore.pgm_tdb, 256); + } + + if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) { + if (user_mode(regs)) { + struct per_event *ev = ¤t->thread.per_event; + + set_thread_flag(TIF_PER_TRAP); + ev->address = S390_lowcore.per_address; + ev->cause = *(u16 *)&S390_lowcore.per_code; + ev->paid = S390_lowcore.per_access_id; + } else { + /* PER event in kernel is kprobes */ + __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); + do_per_trap(regs); + goto out; + } + } + + if (!irqs_disabled_flags(regs->psw.mask)) + trace_hardirqs_on(); + __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); + + trapnr = regs->int_code & PGM_INT_CODE_MASK; + if (trapnr) + pgm_check_table[trapnr](regs); + syscall_redirect = user_mode(regs) && test_pt_regs_flag(regs, PIF_SYSCALL); +out: + local_irq_disable(); + irqentry_exit(regs, state); + + if (syscall_redirect) { + enter_from_user_mode(regs); + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + do_syscall(regs); + exit_to_user_mode(); + } +} diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 5007fac01bb5..bbf8622bbf5d 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -32,7 +32,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) return -EINVAL; if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) return -EINVAL; - clear_pt_regs_flag(regs, PIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); auprobe->saved_per = psw_bits(regs->psw).per; auprobe->saved_int_code = regs->int_code; regs->int_code = UPROBE_TRAP_NR; @@ -103,7 +103,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) /* fix per address */ current->thread.per_event.address = utask->vaddr; /* trigger per event */ - set_pt_regs_flag(regs, PIF_PER_TRAP); + set_thread_flag(TIF_PER_TRAP); } return 0; } @@ -259,7 +259,7 @@ static void sim_stor_event(struct pt_regs *regs, void *addr, int len) return; current->thread.per_event.address = regs->psw.addr; current->thread.per_event.cause = PER_EVENT_STORE >> 16; - set_pt_regs_flag(regs, PIF_PER_TRAP); + set_thread_flag(TIF_PER_TRAP); } /* diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dbafd057ca6a..759bbc012b6c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" @@ -4147,6 +4148,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) vcpu->run->s.regs.gprs, sizeof(sie_page->pv_grregs)); } + if (test_cpu_flag(CIF_FPU)) + load_fpu_regs(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); if (kvm_s390_pv_cpu_is_protected(vcpu)) { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c5d0a58b2c29..bd803e091918 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" @@ -1028,6 +1029,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) */ vcpu->arch.sie_block->prog0c |= PROG_IN_SIE; barrier(); + if (test_cpu_flag(CIF_FPU)) + load_fpu_regs(); if (!kvm_s390_vcpu_sie_inhibited(vcpu)) rc = sie64a(scb_s, vcpu->run->s.regs.gprs); barrier(); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index e8f642446fed..2fece1fd210a 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -16,8 +16,8 @@ #include #include -#ifdef CONFIG_DEBUG_USER_ASCE -void debug_user_asce(void) +#ifdef CONFIG_DEBUG_ENTRY +void debug_user_asce(int exit) { unsigned long cr1, cr7; @@ -25,12 +25,14 @@ void debug_user_asce(void) __ctl_store(cr7, 7, 7); if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce) return; - panic("incorrect ASCE on kernel exit\n" + panic("incorrect ASCE on kernel %s\n" "cr1: %016lx cr7: %016lx\n" "kernel: %016llx user: %016llx\n", - cr1, cr7, S390_lowcore.kernel_asce, S390_lowcore.user_asce); + exit ? "exit" : "entry", cr1, cr7, + S390_lowcore.kernel_asce, S390_lowcore.user_asce); + } -#endif /*CONFIG_DEBUG_USER_ASCE */ +#endif /*CONFIG_DEBUG_ENTRY */ #ifndef CONFIG_HAVE_MARCH_Z10_FEATURES static DEFINE_STATIC_KEY_FALSE(have_mvcos); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index b8210103de14..e30c7c781172 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -385,7 +385,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) * The instruction that caused the program check has * been nullified. Don't signal single step via SIGTRAP. */ - clear_pt_regs_flag(regs, PIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); if (kprobe_page_fault(regs, 14)) return 0; -- cgit v1.2.3 From d010b378736898d7a65a9f9105088f1d335da48d Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 28 Jan 2021 16:33:54 +0100 Subject: s390: update defconfigs Disable CONFIG_TMPFS_INODE64 which is currently broken on s390x because size of ino_t on s390x is 4 bytes. This fixes the following error with kdump: [ 9.415082] [608]: Remounting '/' read-only in with options 'size=238372k,nr_inodes=59593,inode64'. [ 9.415093] rootfs: Cannot use inode64 with <64bit inums in kernel [ 9.415093] [ 9.415100] [608]: Failed to remount '/' read-only: Invalid argument Fixes: 5c60ed283e1d ("s390: update defconfigs") Signed-off-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/s390/configs') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 2d8dcce6e028..5233da9e3ea6 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -631,7 +631,6 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 3eadcda4aca9..b32e0e68891e 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -619,7 +619,6 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m -- cgit v1.2.3 From 9f9b312db40b3860dfd874e9e4e8aa2c3efcbe2b Mon Sep 17 00:00:00 2001 From: Marc Hartmayer Date: Fri, 5 Feb 2021 15:22:03 +0000 Subject: s390/debug_config: enable kmemleak detector ...but set it to off by default. Use the kernel command line option `kmemleak=on` to enable it. Signed-off-by: Marc Hartmayer Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390/configs') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 5233da9e3ea6..75f0c977c73c 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -792,6 +792,8 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_VMACACHE=y -- cgit v1.2.3 From 42d7ccca37108991dc9cffa729f27328995532a3 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Thu, 4 Feb 2021 16:22:05 +0100 Subject: s390/defconfig: add some NFT modules Since Fedora 33 the virtualization stack of Fedora requires a couple of netfilter modules to function properly. Let's add these to defconfig and debug_defconfig. Signed-off-by: Halil Pasic Reported-by: Marc Hartmayer Tested-by: Bjoern Walk Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 6 ++++++ arch/s390/configs/defconfig | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'arch/s390/configs') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 75f0c977c73c..2ae38ef35d52 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -178,13 +178,17 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_OBJREF=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m +CONFIG_NFT_FIB_INET=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -276,6 +280,7 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -296,6 +301,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b32e0e68891e..057ad15bdc63 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -169,13 +169,17 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_OBJREF=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m +CONFIG_NFT_FIB_INET=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -267,6 +271,7 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -287,6 +292,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m -- cgit v1.2.3 From eeab78b05d202f15e58ab10675a4f736a1c9bd29 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 5 Feb 2021 16:19:32 +0100 Subject: s390/vdso: implement generic vdso time namespace support Implement generic vdso time namespace support which also enables time namespaces for s390. This is quite similar to what arm64 has. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/configs/zfcpdump_defconfig | 1 + arch/s390/include/asm/vdso.h | 2 + arch/s390/include/asm/vdso/gettimeofday.h | 7 ++ arch/s390/kernel/vdso.c | 102 ++++++++++++++++++++++++++++-- arch/s390/kernel/vdso64/vdso64.lds.S | 5 +- 6 files changed, 110 insertions(+), 8 deletions(-) (limited to 'arch/s390/configs') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 41a2c58c6e7a..5de9f409e4d0 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -129,6 +129,7 @@ config S390 select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_TIME_NS select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 0200ccf10ace..acf982a2ae4c 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -3,6 +3,7 @@ CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set +# CONFIG_TIME_NS is not set # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index e4ea142a082c..b45e3dddd2c2 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -7,6 +7,8 @@ /* Default link address for the vDSO */ #define VDSO64_LBASE 0 +#define __VVAR_PAGES 2 + #define VDSO_VERSION_STRING LINUX_2.6.29 #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index c92b0dec0d79..ed89ef742530 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -67,4 +67,11 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) return r2; } +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return _timens_data; +} +#endif + #endif diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 31920b76ae6d..dd967af29d2b 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -15,12 +15,15 @@ #include #include #include +#include #include #include extern char vdso64_start[], vdso64_end[]; static unsigned int vdso_pages; +static struct vm_special_mapping vvar_mapping; + static union { struct vdso_data data[CS_BASES]; u8 page[PAGE_SIZE]; @@ -28,6 +31,12 @@ static union { struct vdso_data *vdso_data = vdso_data_store.data; +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + unsigned int __read_mostly vdso_enabled = 1; static int __init vdso_setup(char *str) @@ -40,12 +49,89 @@ static int __init vdso_setup(char *str) } __setup("vdso=", vdso_setup); +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops(). + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + return NULL; +} + +/* + * The VVAR page layout depends on whether a task belongs to the root or + * non-root time namespace. Whenever a task changes its namespace, the VVAR + * page tables are cleared and then they will be re-faulted with a + * corresponding layout. + * See also the comment near timens_setup_vdso_data() for details. + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (!vma_is_special_mapping(vma, &vvar_mapping)) + continue; + zap_page_range(vma, vma->vm_start, size); + break; + } + mmap_read_unlock(mm); + return 0; +} +#else +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { - if (vmf->pgoff == 0) - return vmf_insert_pfn(vma, vmf->address, virt_to_pfn(vdso_data)); - return VM_FAULT_SIGBUS; + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = virt_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = virt_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + return vmf_insert_pfn(vma, vmf->address, pfn); } static int vdso_mremap(const struct vm_special_mapping *sm, @@ -80,23 +166,25 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) struct vm_area_struct *vma; int rc; + BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); if (!vdso_enabled || is_compat_task()) return 0; if (mmap_write_lock_killable(mm)) return -EINTR; vdso_text_len = vdso_pages << PAGE_SHIFT; - vdso_mapping_len = vdso_text_len + PAGE_SIZE; + vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_PFNMAP, + vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, + VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| + VM_PFNMAP, &vvar_mapping); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + PAGE_SIZE; + vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC| diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 99063b4c6e27..518f1ea405f4 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -13,7 +13,10 @@ ENTRY(_start) SECTIONS { - PROVIDE(_vdso_data = . - PAGE_SIZE); + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif . = VDSO64_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text -- cgit v1.2.3