From 9285ec4c8b61d4930a575081abeba2cd4f449a74 Mon Sep 17 00:00:00 2001 From: Jason A. Donenfeld Date: Fri, 21 Jun 2019 22:32:48 +0200 Subject: timekeeping: Use proper clock specifier names in functions This makes boot uniformly boottime and tai uniformly clocktai, to address the remaining oversights. Signed-off-by: Jason A. Donenfeld Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Link: https://lkml.kernel.org/r/20190621203249.3909-2-Jason@zx2c4.com --- arch/x86/kvm/pmu.c | 4 ++-- arch/x86/kvm/x86.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index dd745b58ffd8..1aea628ef6b8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -264,10 +264,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) ctr_val = rdtsc(); break; case VMWARE_BACKDOOR_PMC_REAL_TIME: - ctr_val = ktime_get_boot_ns(); + ctr_val = ktime_get_boottime_ns(); break; case VMWARE_BACKDOOR_PMC_APPARENT_TIME: - ctr_val = ktime_get_boot_ns() + + ctr_val = ktime_get_boottime_ns() + vcpu->kvm->arch.kvmclock_offset; break; default: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 83aefd759846..81a0914a1ec1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1731,7 +1731,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); - ns = ktime_get_boot_ns(); + ns = ktime_get_boottime_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { @@ -2073,7 +2073,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { spin_unlock(&ka->pvclock_gtod_sync_lock); - return ktime_get_boot_ns() + ka->kvmclock_offset; + return ktime_get_boottime_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; @@ -2089,7 +2089,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) &hv_clock.tsc_to_system_mul); ret = __pvclock_read_cycles(&hv_clock, rdtsc()); } else - ret = ktime_get_boot_ns() + ka->kvmclock_offset; + ret = ktime_get_boottime_ns() + ka->kvmclock_offset; put_cpu(); @@ -2188,7 +2188,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) } if (!use_master_clock) { host_tsc = rdtsc(); - kernel_ns = ktime_get_boot_ns(); + kernel_ns = ktime_get_boottime_ns(); } tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); @@ -9018,7 +9018,7 @@ int kvm_arch_hardware_enable(void) * before any KVM threads can be running. Unfortunately, we can't * bring the TSCs fully up to date with real time, as we aren't yet far * enough into CPU bringup that we know how much real time has actually - * elapsed; our helper function, ktime_get_boot_ns() will be using boot + * elapsed; our helper function, ktime_get_boottime_ns() will be using boot * variables that haven't been updated yet. * * So we simply find the maximum observed TSC above, then record the @@ -9246,7 +9246,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.apic_map_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); - kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); + kvm->arch.kvmclock_offset = -ktime_get_boottime_ns(); pvclock_update_vm_gtod_copy(kvm); kvm->arch.guest_can_read_msr_platform_info = true; -- cgit v1.2.3 From 28b1a824a4f44da46983cd2c3249f910bd4b797b Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:31 +0100 Subject: arm64: vdso: Substitute gettimeofday() with C implementation To take advantage of the commonly defined vdso interface for gettimeofday() the architectural code requires an adaptation. Re-implement the gettimeofday VDSO in C in order to use lib/vdso. With the new implementation arm64 gains support for CLOCK_BOOTTIME and CLOCK_TAI. [ tglx: Reformatted the function line breaks ] Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-5-vincenzo.frascino@arm.com --- arch/arm64/Kconfig | 2 + arch/arm64/include/asm/vdso/gettimeofday.h | 84 ++++++++ arch/arm64/include/asm/vdso/vsyscall.h | 53 +++++ arch/arm64/kernel/asm-offsets.c | 33 ++- arch/arm64/kernel/vdso.c | 51 +---- arch/arm64/kernel/vdso/Makefile | 34 ++- arch/arm64/kernel/vdso/gettimeofday.S | 323 ----------------------------- arch/arm64/kernel/vdso/vgettimeofday.c | 27 +++ 8 files changed, 220 insertions(+), 387 deletions(-) create mode 100644 arch/arm64/include/asm/vdso/gettimeofday.h create mode 100644 arch/arm64/include/asm/vdso/vsyscall.h create mode 100644 arch/arm64/kernel/vdso/vgettimeofday.c (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 697ea0510729..952c9f8cf3b8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -107,6 +107,7 @@ config ARM64 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI @@ -160,6 +161,7 @@ config ARM64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_GENERIC_VDSO select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..447ef417de45 --- /dev/null +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include +#include + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("x1") = _tz; + register struct __kernel_old_timeval *tv asm("x0") = _tv; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_gettimeofday; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("x1") = _ts; + register clockid_t clkid asm("x0") = _clkid; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_clock_gettime; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("x1") = _ts; + register clockid_t clkid asm("x0") = _clkid; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_clock_getres; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + u64 res; + + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); + + return res; +} + +static __always_inline +const struct vdso_data *__arch_get_vdso_data(void) +{ + return _vdso_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..0c731bfc7c8c --- /dev/null +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include +#include + +#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) + +extern struct vdso_data *vdso_data; + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__arm64_get_k_vdso_data(void) +{ + return vdso_data; +} +#define __arch_get_k_vdso_data __arm64_get_k_vdso_data + +static __always_inline +int __arm64_get_clock_mode(struct timekeeper *tk) +{ + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; + + return use_syscall; +} +#define __arch_get_clock_mode __arm64_get_clock_mode + +static __always_inline +int __arm64_use_vsyscall(struct vdso_data *vdata) +{ + return !vdata[CS_HRES_COARSE].clock_mode; +} +#define __arch_use_vsyscall __arm64_use_vsyscall + +static __always_inline +void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) +{ + vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; + vdata[CS_RAW].mask = VDSO_PRECISION_MASK; +} +#define __arch_update_vsyscall __arm64_update_vsyscall + +/* The asm-generic header needs to be included after the definitions above */ +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 02f08768c298..14c99b7a0c0e 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -14,13 +14,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include @@ -89,17 +89,28 @@ int main(void) DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); BLANK(); - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); + DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq)); + DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode)); + DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last)); + DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask)); + DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult)); + DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift)); + DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec)); + DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec)); + DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec)); + DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec)); + DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec)); + DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec)); + DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec)); + DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec)); + DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec)); + DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec)); + DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec)); + DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec)); + DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec)); + DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec)); DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); BLANK(); DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 663b166241d0..478ec865a413 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -20,11 +20,13 @@ #include #include #include +#include +#include +#include #include #include #include -#include extern char vdso_start[], vdso_end[]; static unsigned long vdso_pages __ro_after_init; @@ -33,10 +35,10 @@ static unsigned long vdso_pages __ro_after_init; * The vDSO data page. */ static union { - struct vdso_data data; + struct vdso_data data[CS_BASES]; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_data *vdso_data = vdso_data_store.data; #ifdef CONFIG_COMPAT /* @@ -269,46 +271,3 @@ up_fail: up_write(&mm->mmap_sem); return PTR_ERR(ret); } - -/* - * Update the vDSO data page to keep in sync with kernel timekeeping. - */ -void update_vsyscall(struct timekeeper *tk) -{ - u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; - - ++vdso_data->tb_seq_count; - smp_wmb(); - - vdso_data->use_syscall = use_syscall; - vdso_data->xtime_coarse_sec = tk->xtime_sec; - vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >> - tk->tkr_mono.shift; - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; - - /* Read without the seqlock held by clock_getres() */ - WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution); - - if (!use_syscall) { - /* tkr_mono.cycle_last == tkr_raw.cycle_last */ - vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; - vdso_data->raw_time_sec = tk->raw_sec; - vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; - vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->cs_mono_mult = tk->tkr_mono.mult; - vdso_data->cs_raw_mult = tk->tkr_raw.mult; - /* tkr_mono.shift == tkr_raw.shift */ - vdso_data->cs_shift = tk->tkr_mono.shift; - } - - smp_wmb(); - ++vdso_data->tb_seq_count; -} - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; -} diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index fa230ff09aa1..3acfc813e966 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -6,7 +6,12 @@ # Heavily based on the vDSO Makefiles for other archs. # -obj-vdso := gettimeofday.o note.o sigreturn.o +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64 +include $(srctree)/lib/vdso/Makefile + +obj-vdso := vgettimeofday.o note.o sigreturn.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg @@ -15,6 +20,24 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ --build-id -n -T +ccflags-y := -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Bsymbolic + +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +KBUILD_CFLAGS += $(DISABLE_LTO) +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +OBJECT_FILES_NON_STANDARD := y +KCOV_INSTRUMENT := n + +ifeq ($(c-gettimeofday-y),) +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny +else +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y) +endif + # Disable gcov profiling for VDSO code GCOV_PROFILE := n @@ -28,6 +51,7 @@ $(obj)/vdso.o : $(obj)/vdso.so # Link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,ld) + $(call if_changed,vdso_check) # Strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -42,13 +66,9 @@ quiet_cmd_vdsosym = VDSOSYM $@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) -# Assembly rules for the .S files -$(obj-vdso): %.o: %.S FORCE - $(call if_changed_dep,vdsoas) - # Actual build commands -quiet_cmd_vdsoas = VDSOA $@ - cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdsocc = VDSOCC $@ + cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $< # Install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index 80f780f56e0d..e69de29bb2d1 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -1,323 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Userspace implementations of gettimeofday() and friends. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - */ - -#include -#include -#include - -#define NSEC_PER_SEC_LO16 0xca00 -#define NSEC_PER_SEC_HI16 0x3b9a - -vdso_data .req x6 -seqcnt .req w7 -w_tmp .req w8 -x_tmp .req x8 - -/* - * Conventions for macro arguments: - * - An argument is write-only if its name starts with "res". - * - All other arguments are read-only, unless otherwise specified. - */ - - .macro seqcnt_acquire -9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] - tbnz seqcnt, #0, 9999b - dmb ishld - .endm - - .macro seqcnt_check fail - dmb ishld - ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] - cmp w_tmp, seqcnt - b.ne \fail - .endm - - .macro syscall_check fail - ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] - cbnz w_tmp, \fail - .endm - - .macro get_nsec_per_sec res - mov \res, #NSEC_PER_SEC_LO16 - movk \res, #NSEC_PER_SEC_HI16, lsl #16 - .endm - - /* - * Returns the clock delta, in nanoseconds left-shifted by the clock - * shift. - */ - .macro get_clock_shifted_nsec res, cycle_last, mult - /* Read the virtual counter. */ - isb - mrs x_tmp, cntvct_el0 - /* Calculate cycle delta and convert to ns. */ - sub \res, x_tmp, \cycle_last - /* We can only guarantee 56 bits of precision. */ - movn x_tmp, #0xff00, lsl #48 - and \res, x_tmp, \res - mul \res, \res, \mult - /* - * Fake address dependency from the value computed from the counter - * register to subsequent data page accesses so that the sequence - * locking also orders the read of the counter. - */ - and x_tmp, \res, xzr - add vdso_data, vdso_data, x_tmp - .endm - - /* - * Returns in res_{sec,nsec} the REALTIME timespec, based on the - * "wall time" (xtime) and the clock_mono delta. - */ - .macro get_ts_realtime res_sec, res_nsec, \ - clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec - add \res_nsec, \clock_nsec, \xtime_nsec - udiv x_tmp, \res_nsec, \nsec_to_sec - add \res_sec, \xtime_sec, x_tmp - msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec - .endm - - /* - * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, - * used for CLOCK_MONOTONIC_RAW. - */ - .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec - udiv \res_sec, \clock_nsec, \nsec_to_sec - msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec - .endm - - /* sec and nsec are modified in place. */ - .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec - /* Add timespec. */ - add \sec, \sec, \ts_sec - add \nsec, \nsec, \ts_nsec - - /* Normalise the new timespec. */ - cmp \nsec, \nsec_to_sec - b.lt 9999f - sub \nsec, \nsec, \nsec_to_sec - add \sec, \sec, #1 -9999: - cmp \nsec, #0 - b.ge 9998f - add \nsec, \nsec, \nsec_to_sec - sub \sec, \sec, #1 -9998: - .endm - - .macro clock_gettime_return, shift=0 - .if \shift == 1 - lsr x11, x11, x12 - .endif - stp x10, x11, [x1, #TSPEC_TV_SEC] - mov x0, xzr - ret - .endm - - .macro jump_slot jumptable, index, label - .if (. - \jumptable) != 4 * (\index) - .error "Jump slot index mismatch" - .endif - b \label - .endm - - .text - -/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ -ENTRY(__kernel_gettimeofday) - .cfi_startproc - adr vdso_data, _vdso_data - /* If tv is NULL, skip to the timezone code. */ - cbz x0, 2f - - /* Compute the time of day. */ -1: seqcnt_acquire - syscall_check fail=4f - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=1b - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - - /* Convert ns to us. */ - mov x13, #1000 - lsl x13, x13, x12 - udiv x11, x11, x13 - stp x10, x11, [x0, #TVAL_TV_SEC] -2: - /* If tz is NULL, return 0. */ - cbz x1, 3f - ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST] - stp w4, w5, [x1, #TZ_MINWEST] -3: - mov x0, xzr - ret -4: - /* Syscall fallback. */ - mov x8, #__NR_gettimeofday - svc #0 - ret - .cfi_endproc -ENDPROC(__kernel_gettimeofday) - -#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE - -/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ -ENTRY(__kernel_clock_gettime) - .cfi_startproc - cmp w0, #JUMPSLOT_MAX - b.hi syscall - adr vdso_data, _vdso_data - adr x_tmp, jumptable - add x_tmp, x_tmp, w0, uxtw #2 - br x_tmp - - ALIGN -jumptable: - jump_slot jumptable, CLOCK_REALTIME, realtime - jump_slot jumptable, CLOCK_MONOTONIC, monotonic - b syscall - b syscall - jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw - jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse - jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse - - .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) - .error "Wrong jumptable size" - .endif - - ALIGN -realtime: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - - /* All computations are done with left-shifted nsecs. */ - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=realtime - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -monotonic: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] - - /* All computations are done with left-shifted nsecs. */ - lsl x4, x4, x12 - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=monotonic - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - - add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -monotonic_raw: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_raw_mult, w12 = cs_shift */ - ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] - ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] - - /* All computations are done with left-shifted nsecs. */ - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=monotonic_raw - get_ts_clock_raw res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, nsec_to_sec=x9 - - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -realtime_coarse: - seqcnt_acquire - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] - seqcnt_check fail=realtime_coarse - clock_gettime_return - - ALIGN -monotonic_coarse: - seqcnt_acquire - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] - seqcnt_check fail=monotonic_coarse - - /* Computations are done in (non-shifted) nsecs. */ - get_nsec_per_sec res=x9 - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 - clock_gettime_return - - ALIGN -syscall: /* Syscall fallback. */ - mov x8, #__NR_clock_gettime - svc #0 - ret - .cfi_endproc -ENDPROC(__kernel_clock_gettime) - -/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */ -ENTRY(__kernel_clock_getres) - .cfi_startproc - cmp w0, #CLOCK_REALTIME - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne - ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne - b.ne 1f - - adr vdso_data, _vdso_data - ldr w2, [vdso_data, #CLOCK_REALTIME_RES] - b 2f -1: - cmp w0, #CLOCK_REALTIME_COARSE - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne - b.ne 4f - ldr x2, 5f -2: - cbz x1, 3f - stp xzr, x2, [x1] - -3: /* res == NULL. */ - mov w0, wzr - ret - -4: /* Syscall fallback. */ - mov x8, #__NR_clock_getres - svc #0 - ret -5: - .quad CLOCK_COARSE_RES - .cfi_endproc -ENDPROC(__kernel_clock_getres) diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c new file mode 100644 index 000000000000..747635501a14 --- /dev/null +++ b/arch/arm64/kernel/vdso/vgettimeofday.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 userspace implementations of gettimeofday() and similar. + * + * Copyright (C) 2018 ARM Limited + * + */ +#include +#include + +int __kernel_clock_gettime(clockid_t clock, + struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __kernel_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __kernel_clock_getres(clockid_t clock_id, + struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock_id, res); +} -- cgit v1.2.3 From 98cd3c3f83fbba27a6bacd75ad12e8388a61a32a Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 21 Jun 2019 10:52:32 +0100 Subject: arm64: vdso: Build vDSO with -ffixed-x18 The vDSO needs to be built with x18 reserved in order to accommodate userspace platform ABIs built on top of Linux that use the register to carry inter-procedural state, as provided for by the AAPCS. An example of such a platform ABI is the one that will be used by an upcoming version of Android. Although this change is currently a no-op due to the fact that the vDSO is currently implemented in pure assembly on arm64, it is necessary in order to prepare for using the generic C implementation of the vDSO. [ tglx: Massaged changelog ] Signed-off-by: Peter Collingbourne Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Cc: Mark Salyzyn Link: https://lkml.kernel.org/r/20190621095252.32307-6-vincenzo.frascino@arm.com --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 3acfc813e966..ec81d28aeb5d 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -20,7 +20,7 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ --build-id -n -T -ccflags-y := -fno-common -fno-builtin -fno-stack-protector +ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING VDSO_LDFLAGS := -Bsymbolic -- cgit v1.2.3 From 53c489e1dfeb6092b9fb14eb73c2cbcb07224798 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:33 +0100 Subject: arm64: compat: Add missing syscall numbers vDSO requires gettimeofday() and clock_gettime() syscalls to implement the fallback mechanism. Add the missing syscall numbers to unistd.h for arm64. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-7-vincenzo.frascino@arm.com --- arch/arm64/include/asm/unistd.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index c9f8dd421c5f..2a23614198f1 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -22,8 +22,13 @@ #define __NR_compat_exit 1 #define __NR_compat_read 3 #define __NR_compat_write 4 +#define __NR_compat_gettimeofday 78 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 +#define __NR_compat_clock_getres 247 +#define __NR_compat_clock_gettime 263 +#define __NR_compat_clock_gettime64 403 +#define __NR_compat_clock_getres_time64 406 /* * The following SVCs are ARM private. -- cgit v1.2.3 From 206c0dfa3c55bf31f9d78da3d7384b9343745153 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:34 +0100 Subject: arm64: compat: Expose signal related structures The compat signal data structures are required as part of the compat vDSO implementation in order to provide the unwinding information for the sigreturn trampolines. Expose these data structures as part of signal32.h. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-8-vincenzo.frascino@arm.com --- arch/arm64/include/asm/signal32.h | 46 +++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/signal32.c | 46 --------------------------------------- 2 files changed, 46 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h index 0418c67f2b8b..bd43d1cf724b 100644 --- a/arch/arm64/include/asm/signal32.h +++ b/arch/arm64/include/asm/signal32.h @@ -9,6 +9,52 @@ #ifdef CONFIG_COMPAT #include +struct compat_sigcontext { + /* We always set these two fields to 0 */ + compat_ulong_t trap_no; + compat_ulong_t error_code; + + compat_ulong_t oldmask; + compat_ulong_t arm_r0; + compat_ulong_t arm_r1; + compat_ulong_t arm_r2; + compat_ulong_t arm_r3; + compat_ulong_t arm_r4; + compat_ulong_t arm_r5; + compat_ulong_t arm_r6; + compat_ulong_t arm_r7; + compat_ulong_t arm_r8; + compat_ulong_t arm_r9; + compat_ulong_t arm_r10; + compat_ulong_t arm_fp; + compat_ulong_t arm_ip; + compat_ulong_t arm_sp; + compat_ulong_t arm_lr; + compat_ulong_t arm_pc; + compat_ulong_t arm_cpsr; + compat_ulong_t fault_address; +}; + +struct compat_ucontext { + compat_ulong_t uc_flags; + compat_uptr_t uc_link; + compat_stack_t uc_stack; + struct compat_sigcontext uc_mcontext; + compat_sigset_t uc_sigmask; + int __unused[32 - (sizeof(compat_sigset_t) / sizeof(int))]; + compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8))); +}; + +struct compat_sigframe { + struct compat_ucontext uc; + compat_ulong_t retcode[2]; +}; + +struct compat_rt_sigframe { + struct compat_siginfo info; + struct compat_sigframe sig; +}; + int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs); int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 331d1e5acad4..8a9a5ceb63b7 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -19,42 +19,6 @@ #include #include -struct compat_sigcontext { - /* We always set these two fields to 0 */ - compat_ulong_t trap_no; - compat_ulong_t error_code; - - compat_ulong_t oldmask; - compat_ulong_t arm_r0; - compat_ulong_t arm_r1; - compat_ulong_t arm_r2; - compat_ulong_t arm_r3; - compat_ulong_t arm_r4; - compat_ulong_t arm_r5; - compat_ulong_t arm_r6; - compat_ulong_t arm_r7; - compat_ulong_t arm_r8; - compat_ulong_t arm_r9; - compat_ulong_t arm_r10; - compat_ulong_t arm_fp; - compat_ulong_t arm_ip; - compat_ulong_t arm_sp; - compat_ulong_t arm_lr; - compat_ulong_t arm_pc; - compat_ulong_t arm_cpsr; - compat_ulong_t fault_address; -}; - -struct compat_ucontext { - compat_ulong_t uc_flags; - compat_uptr_t uc_link; - compat_stack_t uc_stack; - struct compat_sigcontext uc_mcontext; - compat_sigset_t uc_sigmask; - int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))]; - compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8))); -}; - struct compat_vfp_sigframe { compat_ulong_t magic; compat_ulong_t size; @@ -81,16 +45,6 @@ struct compat_aux_sigframe { unsigned long end_magic; } __attribute__((__aligned__(8))); -struct compat_sigframe { - struct compat_ucontext uc; - compat_ulong_t retcode[2]; -}; - -struct compat_rt_sigframe { - struct compat_siginfo info; - struct compat_sigframe sig; -}; - #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) -- cgit v1.2.3 From f14d8025d263f3c8236775df724a7c1f14e0dc94 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:35 +0100 Subject: arm64: compat: Generate asm offsets for signals Update asm-offsets for arm64 to generate the correct offsets for compat signals. They will be useful for the implementation of the compat sigreturn trampolines in vDSO context. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-9-vincenzo.frascino@arm.com --- arch/arm64/kernel/asm-offsets.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 14c99b7a0c0e..e6f7409a78a4 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,11 @@ int main(void) DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); +#ifdef CONFIG_COMPAT + DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); + DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0)); + BLANK(); +#endif DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); -- cgit v1.2.3 From a7f71a2c8903f8599312f75233946287c558d9f6 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:37 +0100 Subject: arm64: compat: Add vDSO Provide the arm64 compat (AArch32) vDSO in kernel/vdso32 in a similar way to what happens in kernel/vdso. The compat vDSO leverages on an adaptation of the arm architecture code with few changes: - Use of lib/vdso for gettimeofday - Implement a syscall based fallback - Introduce clock_getres() for the compat library - Implement trampolines - Implement elf note To build the compat vDSO a 32 bit compiler is required and needs to be specified via CONFIG_CROSS_COMPILE_COMPAT_VDSO. The code is not yet enabled as other prerequisites are missing. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-11-vincenzo.frascino@arm.com --- arch/arm64/include/asm/vdso/compat_barrier.h | 51 ++++++ arch/arm64/include/asm/vdso/compat_gettimeofday.h | 108 +++++++++++++ arch/arm64/kernel/vdso32/.gitignore | 2 + arch/arm64/kernel/vdso32/Makefile | 186 ++++++++++++++++++++++ arch/arm64/kernel/vdso32/note.c | 15 ++ arch/arm64/kernel/vdso32/sigreturn.S | 62 ++++++++ arch/arm64/kernel/vdso32/vdso.S | 19 +++ arch/arm64/kernel/vdso32/vdso.lds.S | 82 ++++++++++ arch/arm64/kernel/vdso32/vgettimeofday.c | 59 +++++++ 9 files changed, 584 insertions(+) create mode 100644 arch/arm64/include/asm/vdso/compat_barrier.h create mode 100644 arch/arm64/include/asm/vdso/compat_gettimeofday.h create mode 100644 arch/arm64/kernel/vdso32/.gitignore create mode 100644 arch/arm64/kernel/vdso32/Makefile create mode 100644 arch/arm64/kernel/vdso32/note.c create mode 100644 arch/arm64/kernel/vdso32/sigreturn.S create mode 100644 arch/arm64/kernel/vdso32/vdso.S create mode 100644 arch/arm64/kernel/vdso32/vdso.lds.S create mode 100644 arch/arm64/kernel/vdso32/vgettimeofday.c (limited to 'arch') diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h new file mode 100644 index 000000000000..ea24ea856b07 --- /dev/null +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __COMPAT_BARRIER_H +#define __COMPAT_BARRIER_H + +#ifndef __ASSEMBLY__ +/* + * Warning: This code is meant to be used with + * ENABLE_COMPAT_VDSO only. + */ +#ifndef ENABLE_COMPAT_VDSO +#error This header is meant to be used with ENABLE_COMPAT_VDSO only +#endif + +#ifdef dmb +#undef dmb +#endif + +#if __LINUX_ARM_ARCH__ >= 7 +#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") +#elif __LINUX_ARM_ARCH__ == 6 +#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ + : : "r" (0) : "memory") +#else +#define dmb(x) __asm__ __volatile__ ("" : : : "memory") +#endif + +#if __LINUX_ARM_ARCH__ >= 8 +#define aarch32_smp_mb() dmb(ish) +#define aarch32_smp_rmb() dmb(ishld) +#define aarch32_smp_wmb() dmb(ishst) +#else +#define aarch32_smp_mb() dmb(ish) +#define aarch32_smp_rmb() aarch32_smp_mb() +#define aarch32_smp_wmb() dmb(ishst) +#endif + + +#undef smp_mb +#undef smp_rmb +#undef smp_wmb + +#define smp_mb() aarch32_smp_mb() +#define smp_rmb() aarch32_smp_rmb() +#define smp_wmb() aarch32_smp_wmb() + +#endif /* !__ASSEMBLY__ */ + +#endif /* __COMPAT_BARRIER_H */ diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h new file mode 100644 index 000000000000..93dbd935b66d --- /dev/null +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include +#include + +#include + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("r1") = _tz; + register struct __kernel_old_timeval *tv asm("r0") = _tv; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_gettimeofday; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_clock_gettime64; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_clock_getres_time64; + + /* The checks below are required for ABI consistency with arm */ + if ((_clkid >= MAX_CLOCKS) && (_ts == NULL)) + return -EINVAL; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + u64 res; + + isb(); + asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res)); + + return res; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + const struct vdso_data *ret; + + /* + * This simply puts &_vdso_data into ret. The reason why we don't use + * `ret = _vdso_data` is that the compiler tends to optimise this in a + * very suboptimal way: instead of keeping &_vdso_data in a register, + * it goes through a relocation almost every time _vdso_data must be + * accessed (even in subfunctions). This is both time and space + * consuming: each relocation uses a word in the code section, and it + * has to be loaded at runtime. + * + * This trick hides the assignment from the compiler. Since it cannot + * track where the pointer comes from, it will only use one relocation + * where __arch_get_vdso_data() is called, and then keep the result in + * a register. + */ + asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data)); + + return ret; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore new file mode 100644 index 000000000000..4fea950fa5ed --- /dev/null +++ b/arch/arm64/kernel/vdso32/.gitignore @@ -0,0 +1,2 @@ +vdso.lds +vdso.so.raw diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile new file mode 100644 index 000000000000..288c14d30b45 --- /dev/null +++ b/arch/arm64/kernel/vdso32/Makefile @@ -0,0 +1,186 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for vdso32 +# + +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32 +include $(srctree)/lib/vdso/Makefile + +COMPATCC := $(CROSS_COMPILE_COMPAT)gcc + +# Same as cc-*option, but using COMPATCC instead of CC +cc32-option = $(call try-run,\ + $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) +cc32-disable-warning = $(call try-run,\ + $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) +cc32-ldoption = $(call try-run,\ + $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) + +# We cannot use the global flags to compile the vDSO files, the main reason +# being that the 32-bit compiler may be older than the main (64-bit) compiler +# and therefore may not understand flags set using $(cc-option ...). Besides, +# arch-specific options should be taken from the arm Makefile instead of the +# arm64 one. +# As a result we set our own flags here. + +# From top-level Makefile +# NOSTDINC_FLAGS +VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include) +VDSO_CPPFLAGS += $(LINUXINCLUDE) +VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS) + +# Common C and assembly flags +# From top-level Makefile +VDSO_CAFLAGS := $(VDSO_CPPFLAGS) +VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) +ifdef CONFIG_DEBUG_INFO +VDSO_CAFLAGS += -g +endif +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y) +VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO +endif + +# From arm Makefile +VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm) +VDSO_CAFLAGS += -mabi=aapcs-linux -mfloat-abi=soft +ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) +VDSO_CAFLAGS += -mbig-endian +else +VDSO_CAFLAGS += -mlittle-endian +endif + +# From arm vDSO Makefile +VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector +VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING + +# Try to compile for ARMv8. If the compiler is too old and doesn't support it, +# fall back to v7. There is no easy way to check for what architecture the code +# is being compiled, so define a macro specifying that (see arch/arm/Makefile). +VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\ + -march=armv7-a -D__LINUX_ARM_ARCH__=7) + +VDSO_CFLAGS := $(VDSO_CAFLAGS) +VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1 +# KBUILD_CFLAGS from top-level Makefile +VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ + -fno-strict-aliasing -fno-common \ + -Werror-implicit-function-declaration \ + -Wno-format-security \ + -std=gnu89 +VDSO_CFLAGS += -O2 +# Some useful compiler-dependent flags from top-level Makefile +VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,) +VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) +VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow) +VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) +VDSO_CFLAGS += $(call cc32-option,-Werror=date-time) +VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types) + +# The 32-bit compiler does not provide 128-bit integers, which are used in +# some headers that are indirectly included from the vDSO code. +# This hack makes the compiler happy and should trigger a warning/error if +# variables of such type are referenced. +VDSO_CFLAGS += -D__uint128_t='void*' +# Silence some warnings coming from headers that operate on long's +# (on GCC 4.8 or older, there is unfortunately no way to silence this warning) +VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow) +VDSO_CFLAGS += -Wno-int-to-pointer-cast + +VDSO_AFLAGS := $(VDSO_CAFLAGS) +VDSO_AFLAGS += -D__ASSEMBLY__ + +VDSO_LDFLAGS := $(VDSO_CPPFLAGS) +# From arm vDSO Makefile +VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft +VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) + + +# Borrow vdsomunge.c from the arm vDSO +# We have to use a relative path because scripts/Makefile.host prefixes +# $(hostprogs-y) with $(obj) +munge := ../../../arm/vdso/vdsomunge +hostprogs-y := $(munge) + +c-obj-vdso := note.o +c-obj-vdso-gettimeofday := vgettimeofday.o +asm-obj-vdso := sigreturn.o + +ifneq ($(c-gettimeofday-y),) +VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y) +endif + +VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os + +# Build rules +targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw +c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso)) +c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday)) +asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso)) +obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) + +obj-y += vdso.o +extra-y += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (vdso.s includes vdso.so through incbin) +$(obj)/vdso.o: $(obj)/vdso.so + +include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +# Strip rule for vdso.so +$(obj)/vdso.so: OBJCOPYFLAGS := -S +$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE + $(call if_changed,objcopy) + +$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE + $(call if_changed,vdsomunge) + +# Link rule for the .so file, .lds has to be first +$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold) + $(call if_changed,vdso_check) + +# Compilation rules for the vDSO sources +$(c-obj-vdso): %.o: %.c FORCE + $(call if_changed_dep,vdsocc) +$(c-obj-vdso-gettimeofday): %.o: %.c FORCE + $(call if_changed_dep,vdsocc_gettimeofday) +$(asm-obj-vdso): %.o: %.S FORCE + $(call if_changed_dep,vdsoas) + +# Actual build commands +quiet_cmd_vdsold = VDSOL $@ + cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ +quiet_cmd_vdsocc = VDSOC $@ + cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< +quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@ + cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $< +quiet_cmd_vdsoas = VDSOA $@ + cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $< + +quiet_cmd_vdsomunge = MUNGE $@ + cmd_vdsomunge = $(obj)/$(munge) $< $@ + +# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO) +gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ +# The AArch64 nm should be able to read an AArch32 binary + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +# Install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/arm64/kernel/vdso32/note.c b/arch/arm64/kernel/vdso32/note.c new file mode 100644 index 000000000000..eff5bf9efb8b --- /dev/null +++ b/arch/arm64/kernel/vdso32/note.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2012-2018 ARM Limited + * + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include +#include +#include + +ELFNOTE32("Linux", 0, LINUX_VERSION_CODE); +BUILD_SALT; diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S new file mode 100644 index 000000000000..1a81277c2d09 --- /dev/null +++ b/arch/arm64/kernel/vdso32/sigreturn.S @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file provides both A32 and T32 versions, in accordance with the + * arm sigreturn code. + * + * Copyright (C) 2018 ARM Limited + */ + +#include +#include +#include + +#define ARM_ENTRY(name) \ + ENTRY(name) + +#define ARM_ENDPROC(name) \ + .type name, %function; \ + END(name) + + .text + + .arm + .fnstart + .save {r0-r15} + .pad #COMPAT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_sigreturn_arm) + mov r7, #__NR_compat_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_sigreturn_arm) + + .fnstart + .save {r0-r15} + .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_rt_sigreturn_arm) + mov r7, #__NR_compat_rt_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_rt_sigreturn_arm) + + .thumb + .fnstart + .save {r0-r15} + .pad #COMPAT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_sigreturn_thumb) + mov r7, #__NR_compat_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_sigreturn_thumb) + + .fnstart + .save {r0-r15} + .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_rt_sigreturn_thumb) + mov r7, #__NR_compat_rt_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_rt_sigreturn_thumb) diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S new file mode 100644 index 000000000000..e72ac7bc4c04 --- /dev/null +++ b/arch/arm64/kernel/vdso32/vdso.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Limited + */ + +#include +#include +#include +#include + + .globl vdso32_start, vdso32_end + .section .rodata + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/arm64/kernel/vdso32/vdso.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S new file mode 100644 index 000000000000..a3944927eaeb --- /dev/null +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Adapted from arm64 version. + * + * GNU linker script for the VDSO library. + * Heavily based on the vDSO linker scripts for other archs. + * + * Copyright (C) 2012-2018 ARM Limited + */ + +#include +#include +#include + +OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") +OUTPUT_ARCH(arm) + +SECTIONS +{ + PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE); + . = VDSO_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + .text : { *(.text*) } :text =0xe7f001f2 + + .got : { *(.got) } + .rel.plt : { *(.rel.plt) } + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ +} + +VERSION +{ + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_clock_getres; + __kernel_sigreturn_arm; + __kernel_sigreturn_thumb; + __kernel_rt_sigreturn_arm; + __kernel_rt_sigreturn_thumb; + __vdso_clock_gettime64; + local: *; + }; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm; +VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb; +VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm; +VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb; diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c new file mode 100644 index 000000000000..54fc1c2ce93f --- /dev/null +++ b/arch/arm64/kernel/vdso32/vgettimeofday.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 compat userspace implementations of gettimeofday() and similar. + * + * Copyright (C) 2018 ARM Limited + * + */ +#include +#include + +int __vdso_clock_gettime(clockid_t clock, + struct old_timespec32 *ts) +{ + /* The checks below are required for ABI consistency with arm */ + if ((u32)ts >= TASK_SIZE_32) + return -EFAULT; + + return __cvdso_clock_gettime32(clock, ts); +} + +int __vdso_clock_gettime64(clockid_t clock, + struct __kernel_timespec *ts) +{ + /* The checks below are required for ABI consistency with arm */ + if ((u32)ts >= TASK_SIZE_32) + return -EFAULT; + + return __cvdso_clock_gettime(clock, ts); +} + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __vdso_clock_getres(clockid_t clock_id, + struct old_timespec32 *res) +{ + /* The checks below are required for ABI consistency with arm */ + if ((u32)res >= TASK_SIZE_32) + return -EFAULT; + + return __cvdso_clock_getres_time32(clock_id, res); +} + +/* Avoid unresolved references emitted by GCC */ + +void __aeabi_unwind_cpp_pr0(void) +{ +} + +void __aeabi_unwind_cpp_pr1(void) +{ +} + +void __aeabi_unwind_cpp_pr2(void) +{ +} -- cgit v1.2.3 From c7aa2d71020d74d4c673922e295b07f6adafd6e0 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:38 +0100 Subject: arm64: vdso: Refactor vDSO code Most of the code for initializing the vDSOs in arm64 and compat will be shared, hence refactoring of the current code is required to avoid duplication and to simplify maintainability. No functional change. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-12-vincenzo.frascino@arm.com --- arch/arm64/kernel/vdso.c | 215 +++++++++++++++++++++++++++++++---------------- 1 file changed, 144 insertions(+), 71 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 478ec865a413..be23efc3f60d 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -29,7 +29,31 @@ #include extern char vdso_start[], vdso_end[]; -static unsigned long vdso_pages __ro_after_init; + +/* vdso_lookup arch_index */ +enum arch_vdso_type { + ARM64_VDSO = 0, +}; +#define VDSO_TYPES (ARM64_VDSO + 1) + +struct __vdso_abi { + const char *name; + const char *vdso_code_start; + const char *vdso_code_end; + unsigned long vdso_pages; + /* Data Mapping */ + struct vm_special_mapping *dm; + /* Code Mapping */ + struct vm_special_mapping *cm; +}; + +static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = { + { + .name = "vdso", + .vdso_code_start = vdso_start, + .vdso_code_end = vdso_end, + }, +}; /* * The vDSO data page. @@ -40,10 +64,110 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = vdso_data_store.data; +static int __vdso_remap(enum arch_vdso_type arch_index, + const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end - + vdso_lookup[arch_index].vdso_code_start; + + if (vdso_size != new_size) + return -EINVAL; + + current->mm->context.vdso = (void *)new_vma->vm_start; + + return 0; +} + +static int __vdso_init(enum arch_vdso_type arch_index) +{ + int i; + struct page **vdso_pagelist; + unsigned long pfn; + + if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; + } + + vdso_lookup[arch_index].vdso_pages = ( + vdso_lookup[arch_index].vdso_code_end - + vdso_lookup[arch_index].vdso_code_start) >> + PAGE_SHIFT; + + /* Allocate the vDSO pagelist, plus a page for the data. */ + vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1, + sizeof(struct page *), + GFP_KERNEL); + if (vdso_pagelist == NULL) + return -ENOMEM; + + /* Grab the vDSO data page. */ + vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); + + + /* Grab the vDSO code pages. */ + pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start); + + for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++) + vdso_pagelist[i + 1] = pfn_to_page(pfn + i); + + vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0]; + vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1]; + + return 0; +} + +static int __setup_additional_pages(enum arch_vdso_type arch_index, + struct mm_struct *mm, + struct linux_binprm *bprm, + int uses_interp) +{ + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + void *ret; + + vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT; + /* Be sure to map the data page */ + vdso_mapping_len = vdso_text_len + PAGE_SIZE; + + vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + ret = ERR_PTR(vdso_base); + goto up_fail; + } + + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + vdso_lookup[arch_index].dm); + if (IS_ERR(ret)) + goto up_fail; + + vdso_base += PAGE_SIZE; + mm->context.vdso = (void *)vdso_base; + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_lookup[arch_index].cm); + if (IS_ERR(ret)) + goto up_fail; + + return 0; + +up_fail: + mm->context.vdso = NULL; + return PTR_ERR(ret); +} + #ifdef CONFIG_COMPAT /* * Create and map the vectors page for AArch32 tasks. */ +/* + * aarch32_vdso_pages: + * 0 - kuser helpers + * 1 - sigreturn code + */ #define C_VECTORS 0 #define C_SIGPAGE 1 #define C_PAGES (C_SIGPAGE + 1) @@ -172,18 +296,18 @@ out: static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - unsigned long new_size = new_vma->vm_end - new_vma->vm_start; - unsigned long vdso_size = vdso_end - vdso_start; - - if (vdso_size != new_size) - return -EINVAL; - - current->mm->context.vdso = (void *)new_vma->vm_start; - - return 0; + return __vdso_remap(ARM64_VDSO, sm, new_vma); } -static struct vm_special_mapping vdso_spec[2] __ro_after_init = { +/* + * aarch64_vdso_pages: + * 0 - vvar + * 1 - vdso + */ +#define A_VVAR 0 +#define A_VDSO 1 +#define A_PAGES (A_VDSO + 1) +static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { { .name = "[vvar]", }, @@ -195,37 +319,10 @@ static struct vm_special_mapping vdso_spec[2] __ro_after_init = { static int __init vdso_init(void) { - int i; - struct page **vdso_pagelist; - unsigned long pfn; - - if (memcmp(vdso_start, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); - return -EINVAL; - } - - vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; - - /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), - GFP_KERNEL); - if (vdso_pagelist == NULL) - return -ENOMEM; - - /* Grab the vDSO data page. */ - vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); - + vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR]; + vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO]; - /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(vdso_start); - - for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i + 1] = pfn_to_page(pfn + i); - - vdso_spec[0].pages = &vdso_pagelist[0]; - vdso_spec[1].pages = &vdso_pagelist[1]; - - return 0; + return __vdso_init(ARM64_VDSO); } arch_initcall(vdso_init); @@ -233,41 +330,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_text_len, vdso_mapping_len; - void *ret; - - vdso_text_len = vdso_pages << PAGE_SHIFT; - /* Be sure to map the data page */ - vdso_mapping_len = vdso_text_len + PAGE_SIZE; + int ret; if (down_write_killable(&mm->mmap_sem)) return -EINTR; - vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - ret = ERR_PTR(vdso_base); - goto up_fail; - } - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, - &vdso_spec[0]); - if (IS_ERR(ret)) - goto up_fail; - - vdso_base += PAGE_SIZE; - mm->context.vdso = (void *)vdso_base; - ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - &vdso_spec[1]); - if (IS_ERR(ret)) - goto up_fail; + ret = __setup_additional_pages(ARM64_VDSO, + mm, + bprm, + uses_interp); up_write(&mm->mmap_sem); - return 0; -up_fail: - mm->context.vdso = NULL; - up_write(&mm->mmap_sem); - return PTR_ERR(ret); + return ret; } -- cgit v1.2.3 From 7c1deeeb01308426a27a70d5a506aa5fae66dc62 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:39 +0100 Subject: arm64: compat: VDSO setup for compat layer If CONFIG_GENERIC_COMPAT_VDSO is enabled, compat vDSO is installed in a compat (32 bit) process instead of sigpage. Add the necessary code to setup the vDSO required pages. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-13-vincenzo.frascino@arm.com --- arch/arm64/kernel/vdso.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index be23efc3f60d..354b11e27c07 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -29,12 +29,22 @@ #include extern char vdso_start[], vdso_end[]; +#ifdef CONFIG_COMPAT_VDSO +extern char vdso32_start[], vdso32_end[]; +#endif /* CONFIG_COMPAT_VDSO */ /* vdso_lookup arch_index */ enum arch_vdso_type { ARM64_VDSO = 0, +#ifdef CONFIG_COMPAT_VDSO + ARM64_VDSO32 = 1, +#endif /* CONFIG_COMPAT_VDSO */ }; +#ifdef CONFIG_COMPAT_VDSO +#define VDSO_TYPES (ARM64_VDSO32 + 1) +#else #define VDSO_TYPES (ARM64_VDSO + 1) +#endif /* CONFIG_COMPAT_VDSO */ struct __vdso_abi { const char *name; @@ -53,6 +63,13 @@ static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = { .vdso_code_start = vdso_start, .vdso_code_end = vdso_end, }, +#ifdef CONFIG_COMPAT_VDSO + { + .name = "vdso32", + .vdso_code_start = vdso32_start, + .vdso_code_end = vdso32_end, + }, +#endif /* CONFIG_COMPAT_VDSO */ }; /* @@ -163,24 +180,52 @@ up_fail: /* * Create and map the vectors page for AArch32 tasks. */ +#ifdef CONFIG_COMPAT_VDSO +static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + return __vdso_remap(ARM64_VDSO32, sm, new_vma); +} +#endif /* CONFIG_COMPAT_VDSO */ + /* * aarch32_vdso_pages: * 0 - kuser helpers * 1 - sigreturn code + * or (CONFIG_COMPAT_VDSO): + * 0 - kuser helpers + * 1 - vdso data + * 2 - vdso code */ #define C_VECTORS 0 +#ifdef CONFIG_COMPAT_VDSO +#define C_VVAR 1 +#define C_VDSO 2 +#define C_PAGES (C_VDSO + 1) +#else #define C_SIGPAGE 1 #define C_PAGES (C_SIGPAGE + 1) +#endif /* CONFIG_COMPAT_VDSO */ static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init; -static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { +static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { { .name = "[vectors]", /* ABI */ .pages = &aarch32_vdso_pages[C_VECTORS], }, +#ifdef CONFIG_COMPAT_VDSO + { + .name = "[vvar]", + }, + { + .name = "[vdso]", + .mremap = aarch32_vdso_mremap, + }, +#else { .name = "[sigpage]", /* ABI */ .pages = &aarch32_vdso_pages[C_SIGPAGE], }, +#endif /* CONFIG_COMPAT_VDSO */ }; static int aarch32_alloc_kuser_vdso_page(void) @@ -203,7 +248,33 @@ static int aarch32_alloc_kuser_vdso_page(void) return 0; } -static int __init aarch32_alloc_vdso_pages(void) +#ifdef CONFIG_COMPAT_VDSO +static int __aarch32_alloc_vdso_pages(void) +{ + int ret; + + vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR]; + vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO]; + + ret = __vdso_init(ARM64_VDSO32); + if (ret) + return ret; + + ret = aarch32_alloc_kuser_vdso_page(); + if (ret) { + unsigned long c_vvar = + (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]); + unsigned long c_vdso = + (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]); + + free_page(c_vvar); + free_page(c_vdso); + } + + return ret; +} +#else +static int __aarch32_alloc_vdso_pages(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; @@ -224,6 +295,12 @@ static int __init aarch32_alloc_vdso_pages(void) return ret; } +#endif /* CONFIG_COMPAT_VDSO */ + +static int __init aarch32_alloc_vdso_pages(void) +{ + return __aarch32_alloc_vdso_pages(); +} arch_initcall(aarch32_alloc_vdso_pages); static int aarch32_kuser_helpers_setup(struct mm_struct *mm) @@ -245,6 +322,7 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) return PTR_ERR_OR_ZERO(ret); } +#ifndef CONFIG_COMPAT_VDSO static int aarch32_sigreturn_setup(struct mm_struct *mm) { unsigned long addr; @@ -272,6 +350,7 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) out: return PTR_ERR_OR_ZERO(ret); } +#endif /* !CONFIG_COMPAT_VDSO */ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { @@ -285,7 +364,14 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (ret) goto out; +#ifdef CONFIG_COMPAT_VDSO + ret = __setup_additional_pages(ARM64_VDSO32, + mm, + bprm, + uses_interp); +#else ret = aarch32_sigreturn_setup(mm); +#endif /* CONFIG_COMPAT_VDSO */ out: up_write(&mm->mmap_sem); -- cgit v1.2.3 From 1e3f17f55aec6510f88ff65dcbaae13435af0ba6 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:40 +0100 Subject: arm64: elf: VDSO code page discovery Like in normal vDSOs, when compat vDSOs are enabled the auxiliary vector symbol AT_SYSINFO_EHDR needs to point to the address of the vDSO code, to allow the dynamic linker to find it. Add the necessary code to the elf arm64 module to make this possible. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-14-vincenzo.frascino@arm.com --- arch/arm64/include/asm/elf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 325d9515c0f8..3c7037c6ba9b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -202,7 +202,21 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; ({ \ set_thread_flag(TIF_32BIT); \ }) +#ifdef CONFIG_GENERIC_COMPAT_VDSO +#define COMPAT_ARCH_DLINFO \ +do { \ + /* \ + * Note that we use Elf64_Off instead of elf_addr_t because \ + * elf_addr_t in compat is defined as Elf32_Addr and casting \ + * current->mm->context.vdso to it triggers a cast warning of \ + * cast from pointer to integer of different size. \ + */ \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (Elf64_Off)current->mm->context.vdso); \ +} while (0) +#else #define COMPAT_ARCH_DLINFO +#endif extern int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); #define compat_arch_setup_additional_pages \ -- cgit v1.2.3 From f01703b3d2e6faf7233cedf78f1e2d31b39fa90f Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:41 +0100 Subject: arm64: compat: Get sigreturn trampolines from vDSO When the compat vDSO is enabled, the sigreturn trampolines are not anymore available through [sigpage] but through [vdso]. Add the relevant code the enable the feature. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-15-vincenzo.frascino@arm.com --- arch/arm64/include/asm/vdso.h | 3 +++ arch/arm64/kernel/signal32.c | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 1f94ec19903c..9c15e0a06301 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -17,6 +17,9 @@ #ifndef __ASSEMBLY__ #include +#ifdef CONFIG_COMPAT_VDSO +#include +#endif #define VDSO_SYMBOL(base, name) \ ({ \ diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 8a9a5ceb63b7..12a585386c2f 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -18,6 +18,7 @@ #include #include #include +#include struct compat_vfp_sigframe { compat_ulong_t magic; @@ -341,6 +342,30 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = ptr_to_compat(ka->sa.sa_restorer); } else { /* Set up sigreturn pointer */ +#ifdef CONFIG_COMPAT_VDSO + void *vdso_base = current->mm->context.vdso; + void *vdso_trampoline; + + if (ka->sa.sa_flags & SA_SIGINFO) { + if (thumb) { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_rt_sigreturn_thumb); + } else { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_rt_sigreturn_arm); + } + } else { + if (thumb) { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_sigreturn_thumb); + } else { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_sigreturn_arm); + } + } + + retcode = ptr_to_compat(vdso_trampoline) + thumb; +#else unsigned int idx = thumb << 1; if (ka->sa.sa_flags & SA_SIGINFO) @@ -348,6 +373,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = (unsigned long)current->mm->context.vdso + (idx << 2) + thumb; +#endif } regs->regs[0] = usig; -- cgit v1.2.3 From bfe801ebe84f42b4666d3f0adde90f504d56e35b Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:42 +0100 Subject: arm64: vdso: Enable vDSO compat support Add vDSO compat support to the arm64 build system. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Shijith Thotton Tested-by: Andre Przywara Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Link: https://lkml.kernel.org/r/20190621095252.32307-16-vincenzo.frascino@arm.com --- arch/arm64/Kconfig | 1 + arch/arm64/Makefile | 23 +++++++++++++++++++++-- arch/arm64/kernel/Makefile | 6 +++++- 3 files changed, 27 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 952c9f8cf3b8..f5eb592b8579 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -108,6 +108,7 @@ config ARM64 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY + select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT) select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index e9d2e578cbe6..e3d3fd0a4268 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -49,10 +49,26 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable) endif endif -KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) +ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y) + CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%) + + ifeq ($(CONFIG_CC_IS_CLANG), y) + $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built) + else ifeq ($(CROSS_COMPILE_COMPAT),) + $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built) + else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),) + $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT) + else + export CROSS_COMPILE_COMPAT + export CONFIG_COMPAT_VDSO := y + compat_vdso := -DCONFIG_COMPAT_VDSO=1 + endif +endif + +KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-disable-warning, psabi) -KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) +KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) @@ -164,6 +180,9 @@ ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h + $(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \ + $(build)=arch/arm64/kernel/vdso32 \ + include/generated/vdso32-offsets.h) endif define archhelp diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 9e7dcb2c31c7..478491f07b4f 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -28,7 +28,10 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ - sigreturn32.o sys_compat.o + sys_compat.o +ifneq ($(CONFIG_COMPAT_VDSO), y) +obj-$(CONFIG_COMPAT) += sigreturn32.o +endif obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o @@ -62,6 +65,7 @@ obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-y += vdso/ probes/ +obj-$(CONFIG_COMPAT_VDSO) += vdso32/ head-y := head.o extra-y += $(head-y) vmlinux.lds -- cgit v1.2.3 From 7ac8707479886c75f353bfb6a8273f423cfccb23 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:49 +0100 Subject: x86/vdso: Switch to generic vDSO implementation The x86 vDSO library requires some adaptations to take advantage of the newly introduced generic vDSO library. Introduce the following changes: - Modification of vdso.c to be compliant with the common vdso datapage - Use of lib/vdso for gettimeofday [ tglx: Massaged changelog and cleaned up the function signature formatting ] Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Cc: Shijith Thotton Cc: Andre Przywara Link: https://lkml.kernel.org/r/20190621095252.32307-23-vincenzo.frascino@arm.com --- arch/x86/Kconfig | 3 + arch/x86/entry/vdso/Makefile | 9 ++ arch/x86/entry/vdso/vclock_gettime.c | 245 ++++--------------------------- arch/x86/entry/vdso/vdsox32.lds.S | 1 + arch/x86/entry/vsyscall/Makefile | 2 - arch/x86/entry/vsyscall/vsyscall_gtod.c | 83 ----------- arch/x86/include/asm/pvclock.h | 2 +- arch/x86/include/asm/vdso/gettimeofday.h | 191 ++++++++++++++++++++++++ arch/x86/include/asm/vdso/vsyscall.h | 44 ++++++ arch/x86/include/asm/vgtod.h | 75 +--------- arch/x86/include/asm/vvar.h | 7 +- arch/x86/kernel/pvclock.c | 1 + 12 files changed, 284 insertions(+), 379 deletions(-) delete mode 100644 arch/x86/entry/vsyscall/vsyscall_gtod.c create mode 100644 arch/x86/include/asm/vdso/gettimeofday.h create mode 100644 arch/x86/include/asm/vdso/vsyscall.h (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2bbbd4d1ba31..51a98d6eae8e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -17,6 +17,7 @@ config X86_32 select HAVE_DEBUG_STACKOVERFLOW select MODULES_USE_ELF_REL select OLD_SIGACTION + select GENERIC_VDSO_32 config X86_64 def_bool y @@ -121,6 +122,7 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI @@ -202,6 +204,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER + select HAVE_GENERIC_VDSO select HOTPLUG_SMT if SMP select IRQ_FORCED_THREADING select NEED_SG_DMA_LENGTH diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 42fe42e82baf..39106111be86 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,6 +3,12 @@ # Building vDSO images for x86. # +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| +ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE +include $(srctree)/lib/vdso/Makefile + KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n @@ -51,6 +57,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) + $(call if_changed,vdso_check) HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi hostprogs-y += vdso2c @@ -121,6 +128,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE $(call if_changed,vdso) + $(call if_changed,vdso_check) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 @@ -160,6 +168,7 @@ $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/system_call.o \ $(obj)/vdso32/sigreturn.o $(call if_changed,vdso) + $(call if_changed,vdso_check) # # The DSO images are built using a special linker script. diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 4aed41f638bb..f01a3f0787ca 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -1,251 +1,60 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright 2006 Andi Kleen, SUSE Labs. - * * Fast user context implementation of clock_gettime, gettimeofday, and time. * + * Copyright 2006 Andi Kleen, SUSE Labs. + * Copyright 2019 ARM Limited + * * 32 Bit compat layer by Stefani Seibold * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * The code should have no internal unresolved relocations. - * Check with readelf after changing. */ - -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include -#define gtod (&VVAR(vsyscall_gtod_data)) +#include "../../../../lib/vdso/gettimeofday.c" -extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); -extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); +extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); -#ifdef CONFIG_PARAVIRT_CLOCK -extern u8 pvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifdef CONFIG_HYPERV_TSCPAGE -extern u8 hvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifndef BUILD_VDSO32 - -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) -{ - long ret; - asm ("syscall" : "=a" (ret), "=m" (*ts) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : - "rcx", "r11"); - return ret; -} - -#else - -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*ts) - : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) - : "edx"); - return ret; + return __cvdso_gettimeofday(tv, tz); } -#endif - -#ifdef CONFIG_PARAVIRT_CLOCK -static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) -{ - return (const struct pvclock_vsyscall_time_info *)&pvclock_page; -} +int gettimeofday(struct __kernel_old_timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); -static notrace u64 vread_pvclock(void) +time_t __vdso_time(time_t *t) { - const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; - u32 version; - u64 ret; - - /* - * Note: The kernel and hypervisor must guarantee that cpu ID - * number maps 1:1 to per-CPU pvclock time info. - * - * Because the hypervisor is entirely unaware of guest userspace - * preemption, it cannot guarantee that per-CPU pvclock time - * info is updated if the underlying CPU changes or that that - * version is increased whenever underlying CPU changes. - * - * On KVM, we are guaranteed that pvti updates for any vCPU are - * atomic as seen by *all* vCPUs. This is an even stronger - * guarantee than we get with a normal seqlock. - * - * On Xen, we don't appear to have that guarantee, but Xen still - * supplies a valid seqlock using the version field. - * - * We only do pvclock vdso timing at all if - * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to - * mean that all vCPUs have matching pvti and that the TSC is - * synced, so we can just look at vCPU 0's pvti. - */ - - do { - version = pvclock_read_begin(pvti); - - if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) - return U64_MAX; - - ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); - } while (pvclock_read_retry(pvti, version)); - - return ret; + return __cvdso_time(t); } -#endif -#ifdef CONFIG_HYPERV_TSCPAGE -static notrace u64 vread_hvclock(void) -{ - const struct ms_hyperv_tsc_page *tsc_pg = - (const struct ms_hyperv_tsc_page *)&hvclock_page; - return hv_read_tsc_page(tsc_pg); -} -#endif +time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); -notrace static inline u64 vgetcyc(int mode) -{ - if (mode == VCLOCK_TSC) - return (u64)rdtsc_ordered(); - /* - * For any memory-mapped vclock type, we need to make sure that gcc - * doesn't cleverly hoist a load before the mode check. Otherwise we - * might end up touching the memory-mapped page even if the vclock in - * question isn't enabled, which will segfault. Hence the barriers. - */ -#ifdef CONFIG_PARAVIRT_CLOCK - if (mode == VCLOCK_PVCLOCK) { - barrier(); - return vread_pvclock(); - } -#endif -#ifdef CONFIG_HYPERV_TSCPAGE - if (mode == VCLOCK_HVCLOCK) { - barrier(); - return vread_hvclock(); - } -#endif - return U64_MAX; -} +#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64) +/* both 64-bit and x32 use these */ +extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); -notrace static int do_hres(clockid_t clk, struct timespec *ts) +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { - struct vgtod_ts *base = >od->basetime[clk]; - u64 cycles, last, sec, ns; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - cycles = vgetcyc(gtod->vclock_mode); - ns = base->nsec; - last = gtod->cycle_last; - if (unlikely((s64)cycles < 0)) - return vdso_fallback_gettime(clk, ts); - if (cycles > last) - ns += (cycles - last) * gtod->mult; - ns >>= gtod->shift; - sec = base->sec; - } while (unlikely(gtod_read_retry(gtod, seq))); - - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return 0; + return __cvdso_clock_gettime(clock, ts); } -notrace static void do_coarse(clockid_t clk, struct timespec *ts) -{ - struct vgtod_ts *base = >od->basetime[clk]; - unsigned int seq; +int clock_gettime(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); - do { - seq = gtod_read_begin(gtod); - ts->tv_sec = base->sec; - ts->tv_nsec = base->nsec; - } while (unlikely(gtod_read_retry(gtod, seq))); -} +#else +/* i386 only */ +extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); -notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) { - unsigned int msk; - - /* Sort out negative (CPU/FD) and invalid clocks */ - if (unlikely((unsigned int) clock >= MAX_CLOCKS)) - return vdso_fallback_gettime(clock, ts); - - /* - * Convert the clockid to a bitmask and use it to check which - * clocks are handled in the VDSO directly. - */ - msk = 1U << clock; - if (likely(msk & VGTOD_HRES)) { - return do_hres(clock, ts); - } else if (msk & VGTOD_COARSE) { - do_coarse(clock, ts); - return 0; - } - return vdso_fallback_gettime(clock, ts); + return __cvdso_clock_gettime32(clock, ts); } -int clock_gettime(clockid_t, struct timespec *) +int clock_gettime(clockid_t, struct old_timespec32 *) __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - if (likely(tv != NULL)) { - struct timespec *ts = (struct timespec *) tv; - - do_hres(CLOCK_REALTIME, ts); - tv->tv_usec /= 1000; - } - if (unlikely(tz != NULL)) { - tz->tz_minuteswest = gtod->tz_minuteswest; - tz->tz_dsttime = gtod->tz_dsttime; - } - - return 0; -} -int gettimeofday(struct timeval *, struct timezone *) - __attribute__((weak, alias("__vdso_gettimeofday"))); - -/* - * This will break when the xtime seconds get inaccurate, but that is - * unlikely - */ -notrace time_t __vdso_time(time_t *t) -{ - /* This is atomic on x86 so we don't need any locks. */ - time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); - - if (t) - *t = result; - return result; -} -time_t time(time_t *t) - __attribute__((weak, alias("__vdso_time"))); +#endif diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S index 05cd1c5c4a15..16a8050a4fb6 100644 --- a/arch/x86/entry/vdso/vdsox32.lds.S +++ b/arch/x86/entry/vdso/vdsox32.lds.S @@ -21,6 +21,7 @@ VERSION { __vdso_gettimeofday; __vdso_getcpu; __vdso_time; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile index 1ac4dd116c26..93c1b3e949a7 100644 --- a/arch/x86/entry/vsyscall/Makefile +++ b/arch/x86/entry/vsyscall/Makefile @@ -2,7 +2,5 @@ # # Makefile for the x86 low level vsyscall code # -obj-y := vsyscall_gtod.o - obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c deleted file mode 100644 index cfcdba082feb..000000000000 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 Andrea Arcangeli SuSE - * Copyright 2003 Andi Kleen, SuSE Labs. - * - * Modified for x86 32 bit architecture by - * Stefani Seibold - * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * Thanks to hpa@transmeta.com for some useful hint. - * Special thanks to Ingo Molnar for his early experience with - * a different vsyscall implementation for Linux/IA32 and for the name. - * - */ - -#include -#include -#include - -int vclocks_used __read_mostly; - -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); - -void update_vsyscall_tz(void) -{ - vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; - vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; -} - -void update_vsyscall(struct timekeeper *tk) -{ - int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; - struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - struct vgtod_ts *base; - u64 nsec; - - /* Mark the new vclock used. */ - BUILD_BUG_ON(VCLOCK_MAX >= 32); - WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); - - gtod_write_begin(vdata); - - /* copy vsyscall data */ - vdata->vclock_mode = vclock_mode; - vdata->cycle_last = tk->tkr_mono.cycle_last; - vdata->mask = tk->tkr_mono.mask; - vdata->mult = tk->tkr_mono.mult; - vdata->shift = tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_REALTIME]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_TAI]; - base->sec = tk->xtime_sec + (s64)tk->tai_offset; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_MONOTONIC]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec; - nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); - while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { - nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; - base->sec++; - } - base->nsec = nsec; - - base = &vdata->basetime[CLOCK_REALTIME_COARSE]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_MONOTONIC_COARSE]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - nsec += tk->wall_to_monotonic.tv_nsec; - while (nsec >= NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - base->sec++; - } - base->nsec = nsec; - - gtod_write_end(vdata); -} diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index b6033680d458..19b695ff2c68 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PVCLOCK_H #define _ASM_X86_PVCLOCK_H -#include +#include #include /* some helper functions for xen and kvm pv clock sources */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..0e2650fc191b --- /dev/null +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * Copyright (C) 2019 ARM Limited. + * Copyright 2006 Andi Kleen, SUSE Labs. + * 32 Bit compat layer by Stefani Seibold + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include +#include +#include + +#define __vdso_data (VVAR(_vdso_data)) + +#define VDSO_HAS_TIME 1 + +#ifdef CONFIG_PARAVIRT_CLOCK +extern u8 pvclock_page[PAGE_SIZE] + __attribute__((visibility("hidden"))); +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +extern u8 hvclock_page[PAGE_SIZE] + __attribute__((visibility("hidden"))); +#endif + +#ifndef BUILD_VDSO32 + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); + + return ret; +} + +#else + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) + : "memory", "edx"); + + return ret; +} + +#endif + +#ifdef CONFIG_PARAVIRT_CLOCK +static const struct pvclock_vsyscall_time_info *get_pvti0(void) +{ + return (const struct pvclock_vsyscall_time_info *)&pvclock_page; +} + +static u64 vread_pvclock(void) +{ + const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; + u32 version; + u64 ret; + + /* + * Note: The kernel and hypervisor must guarantee that cpu ID + * number maps 1:1 to per-CPU pvclock time info. + * + * Because the hypervisor is entirely unaware of guest userspace + * preemption, it cannot guarantee that per-CPU pvclock time + * info is updated if the underlying CPU changes or that that + * version is increased whenever underlying CPU changes. + * + * On KVM, we are guaranteed that pvti updates for any vCPU are + * atomic as seen by *all* vCPUs. This is an even stronger + * guarantee than we get with a normal seqlock. + * + * On Xen, we don't appear to have that guarantee, but Xen still + * supplies a valid seqlock using the version field. + * + * We only do pvclock vdso timing at all if + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to + * mean that all vCPUs have matching pvti and that the TSC is + * synced, so we can just look at vCPU 0's pvti. + */ + + do { + version = pvclock_read_begin(pvti); + + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) + return U64_MAX; + + ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); + } while (pvclock_read_retry(pvti, version)); + + return ret; +} +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +static u64 vread_hvclock(void) +{ + const struct ms_hyperv_tsc_page *tsc_pg = + (const struct ms_hyperv_tsc_page *)&hvclock_page; + + return hv_read_tsc_page(tsc_pg); +} +#endif + +static inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + if (clock_mode == VCLOCK_TSC) + return (u64)rdtsc_ordered(); + /* + * For any memory-mapped vclock type, we need to make sure that gcc + * doesn't cleverly hoist a load before the mode check. Otherwise we + * might end up touching the memory-mapped page even if the vclock in + * question isn't enabled, which will segfault. Hence the barriers. + */ +#ifdef CONFIG_PARAVIRT_CLOCK + if (clock_mode == VCLOCK_PVCLOCK) { + barrier(); + return vread_pvclock(); + } +#endif +#ifdef CONFIG_HYPERV_TSCPAGE + if (clock_mode == VCLOCK_HVCLOCK) { + barrier(); + return vread_hvclock(); + } +#endif + return U64_MAX; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return __vdso_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..0026ab2123ce --- /dev/null +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +int vclocks_used __read_mostly; + +DEFINE_VVAR(struct vdso_data, _vdso_data); +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__x86_get_k_vdso_data(void) +{ + return _vdso_data; +} +#define __arch_get_k_vdso_data __x86_get_k_vdso_data + +static __always_inline +int __x86_get_clock_mode(struct timekeeper *tk) +{ + int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + + /* Mark the new vclock used. */ + BUILD_BUG_ON(VCLOCK_MAX >= 32); + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); + + return vclock_mode; +} +#define __arch_get_clock_mode __x86_get_clock_mode + +/* The asm-generic header needs to be included after the definitions above */ +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 913a133f8e6f..a2638c6124ed 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -3,7 +3,9 @@ #define _ASM_X86_VGTOD_H #include -#include +#include +#include +#include #include @@ -13,81 +15,10 @@ typedef u64 gtod_long_t; typedef unsigned long gtod_long_t; #endif -/* - * There is one of these objects in the vvar page for each - * vDSO-accelerated clockid. For high-resolution clocks, this encodes - * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse - * clocks, this encodes the actual time. - * - * To confuse the reader, for high-resolution clocks, nsec is left-shifted - * by vsyscall_gtod_data.shift. - */ -struct vgtod_ts { - u64 sec; - u64 nsec; -}; - -#define VGTOD_BASES (CLOCK_TAI + 1) -#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI)) -#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) - -/* - * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time - * so be carefull by modifying this structure. - */ -struct vsyscall_gtod_data { - unsigned int seq; - - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - - struct vgtod_ts basetime[VGTOD_BASES]; - - int tz_minuteswest; - int tz_dsttime; -}; -extern struct vsyscall_gtod_data vsyscall_gtod_data; - extern int vclocks_used; static inline bool vclock_was_used(int vclock) { return READ_ONCE(vclocks_used) & (1 << vclock); } -static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s) -{ - unsigned int ret; - -repeat: - ret = READ_ONCE(s->seq); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - smp_rmb(); - return ret; -} - -static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, - unsigned int start) -{ - smp_rmb(); - return unlikely(s->seq != start); -} - -static inline void gtod_write_begin(struct vsyscall_gtod_data *s) -{ - ++s->seq; - smp_wmb(); -} - -static inline void gtod_write_end(struct vsyscall_gtod_data *s) -{ - smp_wmb(); - ++s->seq; -} - #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index e474f5c6e387..32f5d9a0b90e 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -32,19 +32,20 @@ extern char __vvar_page; #define DECLARE_VVAR(offset, type, name) \ - extern type vvar_ ## name __attribute__((visibility("hidden"))); + extern type vvar_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); #define VVAR(name) (vvar_ ## name) #define DEFINE_VVAR(type, name) \ - type name \ + type name[CS_BASES] \ __attribute__((section(".vvar_" #name), aligned(16))) __visible #endif /* DECLARE_VVAR(offset, type, name) */ -DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) +DECLARE_VVAR(128, struct vdso_data, _vdso_data) #undef DECLARE_VVAR diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 0ff3e294d0e5..10125358b9c4 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -3,6 +3,7 @@ */ +#include #include #include #include -- cgit v1.2.3 From f66501dc53e72079045a6a17e023b41316ede220 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:50 +0100 Subject: x86/vdso: Add clock_getres() entry point The generic vDSO library provides an implementation of clock_getres() that can be leveraged by each architecture. Add the clock_getres() VDSO entry point on x86. [ tglx: Massaged changelog and cleaned up the function signature formatting ] Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Cc: Shijith Thotton Cc: Andre Przywara Link: https://lkml.kernel.org/r/20190621095252.32307-24-vincenzo.frascino@arm.com --- arch/x86/entry/vdso/vclock_gettime.c | 17 +++++++++++++++++ arch/x86/entry/vdso/vdso.lds.S | 2 ++ arch/x86/entry/vdso/vdso32/vdso32.lds.S | 1 + arch/x86/include/asm/vdso/gettimeofday.h | 31 +++++++++++++++++++++++++++++++ 4 files changed, 51 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index f01a3f0787ca..1a648b509d46 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -36,6 +36,7 @@ time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); #if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64) /* both 64-bit and x32 use these */ extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res); int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { @@ -45,9 +46,18 @@ int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) int clock_gettime(clockid_t, struct __kernel_timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); +int __vdso_clock_getres(clockid_t clock, + struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock, res); +} +int clock_getres(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_getres"))); + #else /* i386 only */ extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); +extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res); int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) { @@ -57,4 +67,11 @@ int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) int clock_gettime(clockid_t, struct old_timespec32 *) __attribute__((weak, alias("__vdso_clock_gettime"))); +int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res) +{ + return __cvdso_clock_getres_time32(clock, res); +} + +int clock_getres(clockid_t, struct old_timespec32 *) + __attribute__((weak, alias("__vdso_clock_getres"))); #endif diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S index d3a2dce4cfa9..36b644e16272 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso.lds.S @@ -25,6 +25,8 @@ VERSION { __vdso_getcpu; time; __vdso_time; + clock_getres; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S index 422764a81d32..991b26cc855b 100644 --- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -26,6 +26,7 @@ VERSION __vdso_clock_gettime; __vdso_gettimeofday; __vdso_time; + __vdso_clock_getres; }; LINUX_2.5 { diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 0e2650fc191b..f92752d6cbcf 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -24,6 +24,8 @@ #define VDSO_HAS_TIME 1 +#define VDSO_HAS_CLOCK_GETRES 1 + #ifdef CONFIG_PARAVIRT_CLOCK extern u8 pvclock_page[PAGE_SIZE] __attribute__((visibility("hidden"))); @@ -60,6 +62,18 @@ long gettimeofday_fallback(struct __kernel_old_timeval *_tv, return ret; } +static __always_inline +long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + #else static __always_inline @@ -97,6 +111,23 @@ long gettimeofday_fallback(struct __kernel_old_timeval *_tv, return ret; } +static __always_inline long +clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + #endif #ifdef CONFIG_PARAVIRT_CLOCK -- cgit v1.2.3 From 22ca962288c0a1c9729e8e440b9bb9ad05df6db6 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 21 Jun 2019 10:52:51 +0100 Subject: x86/vdso: Add clock_gettime64() entry point Linux 5.1 gained the new clock_gettime64() syscall to address the Y2038 problem on 32bit systems. The x86 VDSO is missing support for this variant of clock_gettime(). Update the x86 specific vDSO library accordingly so it exposes the new time getter. [ tglx: Massaged changelog ] Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Cc: Shijith Thotton Cc: Andre Przywara Link: https://lkml.kernel.org/r/20190621095252.32307-25-vincenzo.frascino@arm.com --- arch/x86/entry/vdso/vclock_gettime.c | 8 ++++++++ arch/x86/entry/vdso/vdso32/vdso32.lds.S | 1 + 2 files changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 1a648b509d46..d9ff616bb0f6 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -67,6 +67,14 @@ int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) int clock_gettime(clockid_t, struct old_timespec32 *) __attribute__((weak, alias("__vdso_clock_gettime"))); +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int clock_gettime64(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime64"))); + int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res) { return __cvdso_clock_getres_time32(clock, res); diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S index 991b26cc855b..c7720995ab1a 100644 --- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -27,6 +27,7 @@ VERSION __vdso_gettimeofday; __vdso_time; __vdso_clock_getres; + __vdso_clock_gettime64; }; LINUX_2.5 { -- cgit v1.2.3 From ecf9db3d1f1a8fd2c335148891c3b044e9ce0628 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Jun 2019 15:08:18 -0700 Subject: x86/vdso: Give the [ph]vclock_page declarations real types Clean up the vDSO code a bit by giving pvclock_page and hvclock_page their actual types instead of u8[PAGE_SIZE]. This shouldn't materially affect the generated code. Heavily based on a patch from Linus. [ tglx: Adapted to the unified VDSO code ] Co-developed-by: Linus Torvalds Signed-off-by: Linus Torvalds Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/6920c5188f8658001af1fc56fd35b815706d300c.1561241273.git.luto@kernel.org --- arch/x86/include/asm/vdso/gettimeofday.h | 36 +++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index f92752d6cbcf..5b63f1f78a1f 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -26,13 +26,33 @@ #define VDSO_HAS_CLOCK_GETRES 1 +/* + * Declare the memory-mapped vclock data pages. These come from hypervisors. + * If we ever reintroduce something like direct access to an MMIO clock like + * the HPET again, it will go here as well. + * + * A load from any of these pages will segfault if the clock in question is + * disabled, so appropriate compiler barriers and checks need to be used + * to prevent stray loads. + * + * These declarations MUST NOT be const. The compiler will assume that + * an extern const variable has genuinely constant contents, and the + * resulting code won't work, since the whole point is that these pages + * change over time, possibly while we're accessing them. + */ + #ifdef CONFIG_PARAVIRT_CLOCK -extern u8 pvclock_page[PAGE_SIZE] +/* + * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO + * if the hypervisor tells us that all vCPUs can get valid data from the + * vCPU 0 page. + */ +extern struct pvclock_vsyscall_time_info pvclock_page __attribute__((visibility("hidden"))); #endif #ifdef CONFIG_HYPERV_TSCPAGE -extern u8 hvclock_page[PAGE_SIZE] +extern struct ms_hyperv_tsc_page hvclock_page __attribute__((visibility("hidden"))); #endif @@ -131,14 +151,9 @@ clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #endif #ifdef CONFIG_PARAVIRT_CLOCK -static const struct pvclock_vsyscall_time_info *get_pvti0(void) -{ - return (const struct pvclock_vsyscall_time_info *)&pvclock_page; -} - static u64 vread_pvclock(void) { - const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; + const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti; u32 version; u64 ret; @@ -180,10 +195,7 @@ static u64 vread_pvclock(void) #ifdef CONFIG_HYPERV_TSCPAGE static u64 vread_hvclock(void) { - const struct ms_hyperv_tsc_page *tsc_pg = - (const struct ms_hyperv_tsc_page *)&hvclock_page; - - return hv_read_tsc_page(tsc_pg); + return hv_read_tsc_page(&hvclock_page); } #endif -- cgit v1.2.3 From 5a354412567d7de81d69b6ac61c3b7fcebbe497e Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Thu, 13 Jun 2019 13:51:02 +0100 Subject: clocksource/drivers/arm_arch_timer: Extract elf_hwcap use to arch-helper Different mechanisms are used to test and set elf_hwcaps between ARM and ARM64, this results in the use of ifdeferry in this file when setting/testing for the EVTSTRM hwcap. Let's improve readability by extracting this to an arch helper. Signed-off-by: Andrew Murray Acked-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Daniel Lezcano --- arch/arm/include/asm/arch_timer.h | 10 ++++++++++ arch/arm64/include/asm/arch_timer.h | 13 +++++++++++++ drivers/clocksource/arm_arch_timer.c | 15 ++------------- 3 files changed, 25 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 4b66ecd6be99..99175812d903 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -124,6 +125,15 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } +static inline void arch_timer_set_evtstrm_feature(void) +{ + elf_hwcap |= HWCAP_EVTSTRM; +} + +static inline bool arch_timer_have_evtstrm_feature(void) +{ + return elf_hwcap & HWCAP_EVTSTRM; +} #endif #endif diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 50b3ab7ded4f..a847a3ee6cab 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -20,6 +20,7 @@ #define __ASM_ARCH_TIMER_H #include +#include #include #include @@ -240,4 +241,16 @@ static inline int arch_timer_arch_init(void) return 0; } +static inline void arch_timer_set_evtstrm_feature(void) +{ + cpu_set_named_feature(EVTSTRM); +#ifdef CONFIG_COMPAT + compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM; +#endif +} + +static inline bool arch_timer_have_evtstrm_feature(void) +{ + return cpu_have_named_feature(EVTSTRM); +} #endif diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 5c69c9a9a6a4..3c8afcd3444c 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -804,14 +804,7 @@ static void arch_timer_evtstrm_enable(int divider) cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) | ARCH_TIMER_VIRT_EVT_EN; arch_timer_set_cntkctl(cntkctl); -#ifdef CONFIG_ARM64 - cpu_set_named_feature(EVTSTRM); -#else - elf_hwcap |= HWCAP_EVTSTRM; -#endif -#ifdef CONFIG_COMPAT - compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM; -#endif + arch_timer_set_evtstrm_feature(); cpumask_set_cpu(smp_processor_id(), &evtstrm_available); } @@ -1040,11 +1033,7 @@ static int arch_timer_cpu_pm_notify(struct notifier_block *self, } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) { arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl)); -#ifdef CONFIG_ARM64 - if (cpu_have_named_feature(EVTSTRM)) -#else - if (elf_hwcap & HWCAP_EVTSTRM) -#endif + if (arch_timer_have_evtstrm_feature()) cpumask_set_cpu(smp_processor_id(), &evtstrm_available); } return NOTIFY_OK; -- cgit v1.2.3 From 94fee4d43752b6022428d9de402632904968e15b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 24 Jun 2019 14:58:12 +0100 Subject: arm64: vdso: Remove unnecessary asm-offsets.c definitions Since the VDSO code has moved to C from assembly, there is no need to define and maintain the corresponding asm offsets. Fixes: 28b1a824a4f4 ("arm64: vdso: Substitute gettimeofday() with C implementation") Signed-off-by: Catalin Marinas Signed-off-by: Thomas Gleixner Cc: Vincenzo Frascino Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Cc: Shijith Thotton Cc: Andre Przywara Link: https://lkml.kernel.org/r/20190624135812.GC29120@arrakis.emea.arm.com --- arch/arm64/kernel/asm-offsets.c | 39 --------------------------------------- 1 file changed, 39 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index e6f7409a78a4..214685760e1c 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -86,44 +85,6 @@ int main(void) BLANK(); DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); BLANK(); - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); - DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res)); - DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); - DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - BLANK(); - DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq)); - DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode)); - DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last)); - DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask)); - DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult)); - DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift)); - DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec)); - DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec)); - DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec)); - DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec)); - DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec)); - DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec)); - DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec)); - DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec)); - DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec)); - DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec)); - DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec)); - DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec)); - DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec)); - DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec)); - DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); - BLANK(); - DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); - BLANK(); - DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); - DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); - BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); -- cgit v1.2.3 From 6a5b78b32d10cd7901f639870eca304b270769f9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 24 Jun 2019 15:00:19 +0100 Subject: arm64: compat: No need for pre-ARMv7 barriers on an ARMv8 system Remove the deprecated (pre-ARMv7) compat barriers as they would not be used on an ARMv8 system. Fixes: a7f71a2c8903 ("arm64: compat: Add vDSO") Signed-off-by: Catalin Marinas Signed-off-by: Thomas Gleixner Cc: Vincenzo Frascino Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Cc: Shijith Thotton Cc: Andre Przywara Link: https://lkml.kernel.org/r/20190624140018.GD29120@arrakis.emea.arm.com --- arch/arm64/include/asm/vdso/compat_barrier.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index ea24ea856b07..fb60a88b5ed4 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -18,14 +18,7 @@ #undef dmb #endif -#if __LINUX_ARM_ARCH__ >= 7 #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") -#elif __LINUX_ARM_ARCH__ == 6 -#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ - : : "r" (0) : "memory") -#else -#define dmb(x) __asm__ __volatile__ ("" : : : "memory") -#endif #if __LINUX_ARM_ARCH__ >= 8 #define aarch32_smp_mb() dmb(ish) -- cgit v1.2.3 From 9d90b93bf325e015bbae31b83f16da5e4e17effa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Jun 2019 12:02:00 +0200 Subject: lib/vdso: Make delta calculation work correctly The x86 vdso implementation on which the generic vdso library is based on has subtle (unfortunately undocumented) twists: 1) The code assumes that the clocksource mask is U64_MAX which means that no bits are masked. Which is true for any valid x86 VDSO clocksource. Stupidly it still did the mask operation for no reason and at the wrong place right after reading the clocksource. 2) It contains a sanity check to catch the case where slightly unsynchronized TSC values can be observed which would cause the delta calculation to make a huge jump. It therefore checks whether the current TSC value is larger than the value on which the current conversion is based on. If it's not larger the base value is used to prevent time jumps. #1 Is not only stupid for the X86 case because it does the masking for no reason it is also completely wrong for clocksources with a smaller mask which can legitimately wrap around during a conversion period. The core timekeeping code does it correct by applying the mask after the delta calculation: (now - base) & mask #2 is equally broken for clocksources which have smaller masks and can wrap around during a conversion period because there the now > base check is just wrong and causes stale time stamps and time going backwards issues. Unbreak it by: 1) Removing the mask operation from the clocksource read which makes the fallback detection work for all clocksources 2) Replacing the conditional delta calculation with a overrideable inline function. #2 could reuse clocksource_delta() from the timekeeping code but that results in a significant performance hit for the x86 VSDO. The timekeeping core code must have the non optimized version as it has to operate correctly with clocksources which have smaller masks as well to handle the case where TSC is discarded as timekeeper clocksource and replaced by HPET or pmtimer. For the VDSO there is no replacement clocksource. If TSC is unusable the syscall is enforced which does the right thing. To accommodate to the needs of various architectures provide an override-able inline function which defaults to the regular delta calculation with masking: (now - base) & mask Override it for x86 with the non-masking and checking version. This unbreaks the ARM64 syscall fallback operation, allows to use clocksources with arbitrary width and preserves the performance optimization for x86. Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Cc: linux-arch@vger.kernel.org Cc: LAK Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: catalin.marinas@arm.com Cc: Will Deacon Cc: Arnd Bergmann Cc: linux@armlinux.org.uk Cc: Ralf Baechle Cc: paul.burton@mips.com Cc: Daniel Lezcano Cc: salyzyn@android.com Cc: pcc@google.com Cc: shuah@kernel.org Cc: 0x7f454c46@gmail.com Cc: linux@rasmusvillemoes.dk Cc: huw@codeweavers.com Cc: sthotton@marvell.com Cc: andre.przywara@arm.com Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1906261159230.32342@nanos.tec.linutronix.de --- arch/x86/include/asm/vdso/gettimeofday.h | 27 +++++++++++++++++++++++++++ lib/vdso/gettimeofday.c | 19 +++++++++++++++---- 2 files changed, 42 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 5b63f1f78a1f..a14039a59abd 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -229,6 +229,33 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return __vdso_data; } +/* + * x86 specific delta calculation. + * + * The regular implementation assumes that clocksource reads are globally + * monotonic. The TSC can be slightly off across sockets which can cause + * the regular delta calculation (@cycles - @last) to return a huge time + * jump. + * + * Therefore it needs to be verified that @cycles are greater than + * @last. If not then use @last, which is the base time of the current + * conversion period. + * + * This variant also removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + if (cycles > last) + return (cycles - last) * mult; + return 0; +} +#define vdso_calc_delta vdso_calc_delta + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index ef28cc5d7bff..2d1c1f241fd9 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -26,6 +26,18 @@ #include #endif /* ENABLE_COMPAT_VDSO */ +#ifndef vdso_calc_delta +/* + * Default implementation which works for all sane clocksources. That + * obviously excludes x86/TSC. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + return ((cycles - last) & mask) * mult; +} +#endif + static int do_hres(const struct vdso_data *vd, clockid_t clk, struct __kernel_timespec *ts) { @@ -35,14 +47,13 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, do { seq = vdso_read_begin(vd); - cycles = __arch_get_hw_counter(vd->clock_mode) & - vd->mask; + cycles = __arch_get_hw_counter(vd->clock_mode); ns = vdso_ts->nsec; last = vd->cycle_last; if (unlikely((s64)cycles < 0)) return clock_gettime_fallback(clk, ts); - if (cycles > last) - ns += (cycles - last) * vd->mult; + + ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); ns >>= vd->shift; sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq))); -- cgit v1.2.3 From 27e11a9fe2e2e7e0d13f854e89a71e488678fb17 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 25 Jun 2019 17:18:03 +0100 Subject: arm64: Fix __arch_get_hw_counter() implementation Provide the following fixes for the __arch_get_hw_counter() implementation on arm64: - Fallback on syscall when an unstable counter is detected. - Introduce isb()s before and after the counter read to avoid speculation of the counter value and of the seq lock respectively. The second isb() is a temporary solution that will be revisited in 5.3-rc1. These fixes restore the semantics that __arch_counter_get_cntvct() had on arm64. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: catalin.marinas@arm.com Cc: will.deacon@arm.com Cc: arnd@arndb.de Cc: linux@armlinux.org.uk Cc: ralf@linux-mips.org Cc: paul.burton@mips.com Cc: daniel.lezcano@linaro.org Cc: salyzyn@android.com Cc: pcc@google.com Cc: shuah@kernel.org Cc: 0x7f454c46@gmail.com Cc: linux@rasmusvillemoes.dk Cc: huw@codeweavers.com Cc: sthotton@marvell.com Cc: andre.przywara@arm.com Cc: Catalin Marinas Cc: Will Deacon Link: https://lkml.kernel.org/r/20190625161804.38713-2-vincenzo.frascino@arm.com --- arch/arm64/include/asm/vdso/gettimeofday.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 447ef417de45..b08f476b72b4 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -10,6 +10,8 @@ #include #include +#define __VDSO_USE_SYSCALL ULLONG_MAX + #define VDSO_HAS_CLOCK_GETRES 1 static __always_inline @@ -68,7 +70,24 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) { u64 res; + /* + * clock_mode == 0 implies that vDSO are enabled otherwise + * fallback on syscall. + */ + if (clock_mode) + return __VDSO_USE_SYSCALL; + + /* + * This isb() is required to prevent that the counter value + * is speculated. + */ + isb(); asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); + /* + * This isb() is required to prevent that the seq lock is + * speculated.# + */ + isb(); return res; } -- cgit v1.2.3 From 6241c4dc6ec56a7627b972959da8b492b765b209 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 25 Jun 2019 17:18:04 +0100 Subject: arm64: compat: Fix __arch_get_hw_counter() implementation Provide the following fixes for the __arch_get_hw_counter() implementation on arm64: - Fallback on syscall when an unstable counter is detected. - Introduce isb()s before and after the counter read to avoid speculation of the counter value and of the seq lock respectively. The second isb() is a temporary solution that will be revisited in 5.3-rc1. These fixes restore the semantics that __arch_counter_get_cntvct() had on arm64. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: catalin.marinas@arm.com Cc: will.deacon@arm.com Cc: arnd@arndb.de Cc: linux@armlinux.org.uk Cc: ralf@linux-mips.org Cc: paul.burton@mips.com Cc: daniel.lezcano@linaro.org Cc: salyzyn@android.com Cc: pcc@google.com Cc: shuah@kernel.org Cc: 0x7f454c46@gmail.com Cc: linux@rasmusvillemoes.dk Cc: huw@codeweavers.com Cc: sthotton@marvell.com Cc: andre.przywara@arm.com Cc: Catalin Marinas Cc: Will Deacon Link: https://lkml.kernel.org/r/20190625161804.38713-3-vincenzo.frascino@arm.com --- arch/arm64/include/asm/vdso/compat_gettimeofday.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 93dbd935b66d..f4812777f5c5 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -12,6 +12,8 @@ #include +#define __VDSO_USE_SYSCALL ULLONG_MAX + #define VDSO_HAS_CLOCK_GETRES 1 static __always_inline @@ -74,8 +76,24 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) { u64 res; + /* + * clock_mode == 0 implies that vDSO are enabled otherwise + * fallback on syscall. + */ + if (clock_mode) + return __VDSO_USE_SYSCALL; + + /* + * This isb() is required to prevent that the counter value + * is speculated. + */ isb(); asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res)); + /* + * This isb() is required to prevent that the seq lock is + * speculated. + */ + isb(); return res; } -- cgit v1.2.3 From 3acf4be235280f14d838581a750532219d67facc Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 26 Jun 2019 12:36:32 +0100 Subject: arm64: vdso: Fix compilation with clang older than 8 clang versions older than 8 do not support -mcmodel=tiny. Add a check to the vDSO Makefile for arm64 to remove the flag when these versions of the compiler are detected. Reported-by: Qian Cai Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Tested-by: Qian Cai Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: catalin.marinas@arm.com Cc: will.deacon@arm.com Cc: arnd@arndb.de Cc: linux@armlinux.org.uk Cc: ralf@linux-mips.org Cc: paul.burton@mips.com Cc: daniel.lezcano@linaro.org Cc: salyzyn@android.com Cc: pcc@google.com Cc: shuah@kernel.org Cc: 0x7f454c46@gmail.com Cc: linux@rasmusvillemoes.dk Cc: huw@codeweavers.com Cc: sthotton@marvell.com Cc: andre.przywara@arm.com Cc: luto@kernel.org Link: https://lkml.kernel.org/r/20190626113632.9295-1-vincenzo.frascino@arm.com --- arch/arm64/kernel/vdso/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index ec81d28aeb5d..4ab863045188 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -38,6 +38,13 @@ else CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y) endif +# Clang versions less than 8 do not support -mcmodel=tiny +ifeq ($(CONFIG_CC_IS_CLANG), y) + ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0) + CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny + endif +endif + # Disable gcov profiling for VDSO code GCOV_PROFILE := n -- cgit v1.2.3 From fd1fea6834d0f9f93062ae6685862908a9baed39 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 1 Jul 2019 04:25:56 +0000 Subject: clocksource/drivers: Make Hyper-V clocksource ISA agnostic Hyper-V clock/timer code and data structures are currently mixed in with other code in the ISA independent drivers/hv directory as well as the ISA dependent Hyper-V code under arch/x86. Consolidate this code and data structures into a Hyper-V clocksource driver to better follow the Linux model. In doing so, separate out the ISA dependent portions so the new clocksource driver works for x86 and for the in-process Hyper-V on ARM64 code. To start, move the existing clockevents code to create the new clocksource driver. Update the VMbus driver to call initialization and cleanup routines since the Hyper-V synthetic timers are not independently enumerated in ACPI. No behavior is changed and no new functionality is added. Suggested-by: Marc Zyngier Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Reviewed-by: Vitaly Kuznetsov Cc: "bp@alien8.de" Cc: "will.deacon@arm.com" Cc: "catalin.marinas@arm.com" Cc: "mark.rutland@arm.com" Cc: "linux-arm-kernel@lists.infradead.org" Cc: "gregkh@linuxfoundation.org" Cc: "linux-hyperv@vger.kernel.org" Cc: "olaf@aepfle.de" Cc: "apw@canonical.com" Cc: "jasowang@redhat.com" Cc: "marcelo.cerri@canonical.com" Cc: Sunil Muthuswamy Cc: KY Srinivasan Cc: "sashal@kernel.org" Cc: "vincenzo.frascino@arm.com" Cc: "linux-arch@vger.kernel.org" Cc: "linux-mips@vger.kernel.org" Cc: "linux-kselftest@vger.kernel.org" Cc: "arnd@arndb.de" Cc: "linux@armlinux.org.uk" Cc: "ralf@linux-mips.org" Cc: "paul.burton@mips.com" Cc: "daniel.lezcano@linaro.org" Cc: "salyzyn@android.com" Cc: "pcc@google.com" Cc: "shuah@kernel.org" Cc: "0x7f454c46@gmail.com" <0x7f454c46@gmail.com> Cc: "linux@rasmusvillemoes.dk" Cc: "huw@codeweavers.com" Cc: "sfr@canb.auug.org.au" Cc: "pbonzini@redhat.com" Cc: "rkrcmar@redhat.com" Cc: "kvm@vger.kernel.org" Link: https://lkml.kernel.org/r/1561955054-1838-2-git-send-email-mikelley@microsoft.com --- MAINTAINERS | 2 + arch/x86/include/asm/hyperv-tlfs.h | 6 ++ arch/x86/kernel/cpu/mshyperv.c | 4 +- drivers/clocksource/Makefile | 1 + drivers/clocksource/hyperv_timer.c | 200 +++++++++++++++++++++++++++++++++++++ drivers/hv/Kconfig | 3 + drivers/hv/hv.c | 156 +---------------------------- drivers/hv/hyperv_vmbus.h | 3 - drivers/hv/vmbus_drv.c | 42 ++++---- include/clocksource/hyperv_timer.h | 27 +++++ 10 files changed, 268 insertions(+), 176 deletions(-) create mode 100644 drivers/clocksource/hyperv_timer.c create mode 100644 include/clocksource/hyperv_timer.h (limited to 'arch') diff --git a/MAINTAINERS b/MAINTAINERS index a2812972b7b0..bfde42a76a95 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7313,6 +7313,7 @@ F: arch/x86/include/asm/trace/hyperv.h F: arch/x86/include/asm/hyperv-tlfs.h F: arch/x86/kernel/cpu/mshyperv.c F: arch/x86/hyperv +F: drivers/clocksource/hyperv_timer.c F: drivers/hid/hid-hyperv.c F: drivers/hv/ F: drivers/input/serio/hyperv-keyboard.c @@ -7323,6 +7324,7 @@ F: drivers/uio/uio_hv_generic.c F: drivers/video/fbdev/hyperv_fb.c F: drivers/iommu/hyperv_iommu.c F: net/vmw_vsock/hyperv_transport.c +F: include/clocksource/hyperv_timer.h F: include/linux/hyperv.h F: include/uapi/linux/hyperv.h F: tools/hv/ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cdf44aa9a501..af78cd72b8f3 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -401,6 +401,12 @@ enum HV_GENERIC_SET_FORMAT { #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* + * The Hyper-V TimeRefCount register and the TSC + * page provide a guest VM clock with 100ns tick rate + */ +#define HV_CLOCK_HZ (NSEC_PER_SEC/100) + typedef struct _HV_REFERENCE_TSC_PAGE { __u32 tsc_sequence; __u32 res1; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 7df29f08871b..1e5f7a03ddf5 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); ack_APIC_irq(); exiting_irq(); @@ -89,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) { *vector = HYPERV_STIMER0_VECTOR; - *irq = 0; /* Unused on x86/x64 */ + *irq = -1; /* Unused on x86/x64 */ hv_stimer0_handler = handler; return 0; } diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 5582252efb31..2e7936e7833f 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -86,3 +86,4 @@ obj-$(CONFIG_ATCPIT100_TIMER) += timer-atcpit100.o obj-$(CONFIG_RISCV_TIMER) += timer-riscv.o obj-$(CONFIG_CSKY_MP_TIMER) += timer-mp-csky.o obj-$(CONFIG_GX6605S_TIMER) += timer-gx6605s.o +obj-$(CONFIG_HYPERV_TIMER) += hyperv_timer.o diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c new file mode 100644 index 000000000000..68a28af31561 --- /dev/null +++ b/drivers/clocksource/hyperv_timer.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Clocksource driver for the synthetic counter and timers + * provided by the Hyper-V hypervisor to guest VMs, as described + * in the Hyper-V Top Level Functional Spec (TLFS). This driver + * is instruction set architecture independent. + * + * Copyright (C) 2019, Microsoft, Inc. + * + * Author: Michael Kelley + */ + +#include +#include +#include +#include +#include +#include +#include + +static struct clock_event_device __percpu *hv_clock_event; + +/* + * If false, we're using the old mechanism for stimer0 interrupts + * where it sends a VMbus message when it expires. The old + * mechanism is used when running on older versions of Hyper-V + * that don't support Direct Mode. While Hyper-V provides + * four stimer's per CPU, Linux uses only stimer0. + */ +static bool direct_mode_enabled; + +static int stimer0_irq; +static int stimer0_vector; +static int stimer0_message_sint; + +/* + * ISR for when stimer0 is operating in Direct Mode. Direct Mode + * does not use VMbus or any VMbus messages, so process here and not + * in the VMbus driver code. + */ +void hv_stimer0_isr(void) +{ + struct clock_event_device *ce; + + ce = this_cpu_ptr(hv_clock_event); + ce->event_handler(ce); +} +EXPORT_SYMBOL_GPL(hv_stimer0_isr); + +static int hv_ce_set_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + u64 current_tick; + + current_tick = hyperv_cs->read(NULL); + current_tick += delta; + hv_init_timer(0, current_tick); + return 0; +} + +static int hv_ce_shutdown(struct clock_event_device *evt) +{ + hv_init_timer(0, 0); + hv_init_timer_config(0, 0); + if (direct_mode_enabled) + hv_disable_stimer0_percpu_irq(stimer0_irq); + + return 0; +} + +static int hv_ce_set_oneshot(struct clock_event_device *evt) +{ + union hv_stimer_config timer_cfg; + + timer_cfg.as_uint64 = 0; + timer_cfg.enable = 1; + timer_cfg.auto_enable = 1; + if (direct_mode_enabled) { + /* + * When it expires, the timer will directly interrupt + * on the specified hardware vector/IRQ. + */ + timer_cfg.direct_mode = 1; + timer_cfg.apic_vector = stimer0_vector; + hv_enable_stimer0_percpu_irq(stimer0_irq); + } else { + /* + * When it expires, the timer will generate a VMbus message, + * to be handled by the normal VMbus interrupt handler. + */ + timer_cfg.direct_mode = 0; + timer_cfg.sintx = stimer0_message_sint; + } + hv_init_timer_config(0, timer_cfg.as_uint64); + return 0; +} + +/* + * hv_stimer_init - Per-cpu initialization of the clockevent + */ +void hv_stimer_init(unsigned int cpu) +{ + struct clock_event_device *ce; + + /* + * Synthetic timers are always available except on old versions of + * Hyper-V on x86. In that case, just return as Linux will use a + * clocksource based on emulated PIT or LAPIC timer hardware. + */ + if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)) + return; + + ce = per_cpu_ptr(hv_clock_event, cpu); + ce->name = "Hyper-V clockevent"; + ce->features = CLOCK_EVT_FEAT_ONESHOT; + ce->cpumask = cpumask_of(cpu); + ce->rating = 1000; + ce->set_state_shutdown = hv_ce_shutdown; + ce->set_state_oneshot = hv_ce_set_oneshot; + ce->set_next_event = hv_ce_set_next_event; + + clockevents_config_and_register(ce, + HV_CLOCK_HZ, + HV_MIN_DELTA_TICKS, + HV_MAX_MAX_DELTA_TICKS); +} +EXPORT_SYMBOL_GPL(hv_stimer_init); + +/* + * hv_stimer_cleanup - Per-cpu cleanup of the clockevent + */ +void hv_stimer_cleanup(unsigned int cpu) +{ + struct clock_event_device *ce; + + /* Turn off clockevent device */ + if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) { + ce = per_cpu_ptr(hv_clock_event, cpu); + hv_ce_shutdown(ce); + } +} +EXPORT_SYMBOL_GPL(hv_stimer_cleanup); + +/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ +int hv_stimer_alloc(int sint) +{ + int ret; + + hv_clock_event = alloc_percpu(struct clock_event_device); + if (!hv_clock_event) + return -ENOMEM; + + direct_mode_enabled = ms_hyperv.misc_features & + HV_STIMER_DIRECT_MODE_AVAILABLE; + if (direct_mode_enabled) { + ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, + hv_stimer0_isr); + if (ret) { + free_percpu(hv_clock_event); + hv_clock_event = NULL; + return ret; + } + } + + stimer0_message_sint = sint; + return 0; +} +EXPORT_SYMBOL_GPL(hv_stimer_alloc); + +/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */ +void hv_stimer_free(void) +{ + if (direct_mode_enabled && (stimer0_irq != 0)) { + hv_remove_stimer0_irq(stimer0_irq); + stimer0_irq = 0; + } + free_percpu(hv_clock_event); + hv_clock_event = NULL; +} +EXPORT_SYMBOL_GPL(hv_stimer_free); + +/* + * Do a global cleanup of clockevents for the cases of kexec and + * vmbus exit + */ +void hv_stimer_global_cleanup(void) +{ + int cpu; + struct clock_event_device *ce; + + if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) { + for_each_present_cpu(cpu) { + ce = per_cpu_ptr(hv_clock_event, cpu); + clockevents_unbind_device(ce, cpu); + } + } + hv_stimer_free(); +} +EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 1c1a2514d6f3..c423e57ae888 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -10,6 +10,9 @@ config HYPERV Select this option to run Linux as a Hyper-V client operating system. +config HYPERV_TIMER + def_bool HYPERV + config HYPERV_TSCPAGE def_bool HYPERV && X86_64 diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index a1ea482183e8..6188fb7dda42 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -16,27 +16,13 @@ #include #include #include +#include #include #include "hyperv_vmbus.h" /* The one and only */ struct hv_context hv_context; -/* - * If false, we're using the old mechanism for stimer0 interrupts - * where it sends a VMbus message when it expires. The old - * mechanism is used when running on older versions of Hyper-V - * that don't support Direct Mode. While Hyper-V provides - * four stimer's per CPU, Linux uses only stimer0. - */ -static bool direct_mode_enabled; -static int stimer0_irq; -static int stimer0_vector; - -#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */ -#define HV_MAX_MAX_DELTA_TICKS 0xffffffff -#define HV_MIN_DELTA_TICKS 1 - /* * hv_init - Main initialization routine. * @@ -47,9 +33,6 @@ int hv_init(void) hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context); if (!hv_context.cpu_context) return -ENOMEM; - - direct_mode_enabled = ms_hyperv.misc_features & - HV_STIMER_DIRECT_MODE_AVAILABLE; return 0; } @@ -88,89 +71,6 @@ int hv_post_message(union hv_connection_id connection_id, return status & 0xFFFF; } -/* - * ISR for when stimer0 is operating in Direct Mode. Direct Mode - * does not use VMbus or any VMbus messages, so process here and not - * in the VMbus driver code. - */ - -static void hv_stimer0_isr(void) -{ - struct hv_per_cpu_context *hv_cpu; - - hv_cpu = this_cpu_ptr(hv_context.cpu_context); - hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt); - add_interrupt_randomness(stimer0_vector, 0); -} - -static int hv_ce_set_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - u64 current_tick; - - WARN_ON(!clockevent_state_oneshot(evt)); - - current_tick = hyperv_cs->read(NULL); - current_tick += delta; - hv_init_timer(0, current_tick); - return 0; -} - -static int hv_ce_shutdown(struct clock_event_device *evt) -{ - hv_init_timer(0, 0); - hv_init_timer_config(0, 0); - if (direct_mode_enabled) - hv_disable_stimer0_percpu_irq(stimer0_irq); - - return 0; -} - -static int hv_ce_set_oneshot(struct clock_event_device *evt) -{ - union hv_stimer_config timer_cfg; - - timer_cfg.as_uint64 = 0; - timer_cfg.enable = 1; - timer_cfg.auto_enable = 1; - if (direct_mode_enabled) { - /* - * When it expires, the timer will directly interrupt - * on the specified hardware vector/IRQ. - */ - timer_cfg.direct_mode = 1; - timer_cfg.apic_vector = stimer0_vector; - hv_enable_stimer0_percpu_irq(stimer0_irq); - } else { - /* - * When it expires, the timer will generate a VMbus message, - * to be handled by the normal VMbus interrupt handler. - */ - timer_cfg.direct_mode = 0; - timer_cfg.sintx = VMBUS_MESSAGE_SINT; - } - hv_init_timer_config(0, timer_cfg.as_uint64); - return 0; -} - -static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu) -{ - dev->name = "Hyper-V clockevent"; - dev->features = CLOCK_EVT_FEAT_ONESHOT; - dev->cpumask = cpumask_of(cpu); - dev->rating = 1000; - /* - * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will - * result in clockevents_config_and_register() taking additional - * references to the hv_vmbus module making it impossible to unload. - */ - - dev->set_state_shutdown = hv_ce_shutdown; - dev->set_state_oneshot = hv_ce_set_oneshot; - dev->set_next_event = hv_ce_set_next_event; -} - - int hv_synic_alloc(void) { int cpu; @@ -199,14 +99,6 @@ int hv_synic_alloc(void) tasklet_init(&hv_cpu->msg_dpc, vmbus_on_msg_dpc, (unsigned long) hv_cpu); - hv_cpu->clk_evt = kzalloc(sizeof(struct clock_event_device), - GFP_KERNEL); - if (hv_cpu->clk_evt == NULL) { - pr_err("Unable to allocate clock event device\n"); - goto err; - } - hv_init_clockevent_device(hv_cpu->clk_evt, cpu); - hv_cpu->synic_message_page = (void *)get_zeroed_page(GFP_ATOMIC); if (hv_cpu->synic_message_page == NULL) { @@ -229,11 +121,6 @@ int hv_synic_alloc(void) INIT_LIST_HEAD(&hv_cpu->chan_list); } - if (direct_mode_enabled && - hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, - hv_stimer0_isr)) - goto err; - return 0; err: /* @@ -252,7 +139,6 @@ void hv_synic_free(void) struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); - kfree(hv_cpu->clk_evt); free_page((unsigned long)hv_cpu->synic_event_page); free_page((unsigned long)hv_cpu->synic_message_page); free_page((unsigned long)hv_cpu->post_msg_page); @@ -311,36 +197,9 @@ int hv_synic_init(unsigned int cpu) hv_set_synic_state(sctrl.as_uint64); - /* - * Register the per-cpu clockevent source. - */ - if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) - clockevents_config_and_register(hv_cpu->clk_evt, - HV_TIMER_FREQUENCY, - HV_MIN_DELTA_TICKS, - HV_MAX_MAX_DELTA_TICKS); - return 0; -} - -/* - * hv_synic_clockevents_cleanup - Cleanup clockevent devices - */ -void hv_synic_clockevents_cleanup(void) -{ - int cpu; + hv_stimer_init(cpu); - if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)) - return; - - if (direct_mode_enabled) - hv_remove_stimer0_irq(stimer0_irq); - - for_each_present_cpu(cpu) { - struct hv_per_cpu_context *hv_cpu - = per_cpu_ptr(hv_context.cpu_context, cpu); - - clockevents_unbind_device(hv_cpu->clk_evt, cpu); - } + return 0; } /* @@ -388,14 +247,7 @@ int hv_synic_cleanup(unsigned int cpu) if (channel_found && vmbus_connection.conn_state == CONNECTED) return -EBUSY; - /* Turn off clockevent device */ - if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) { - struct hv_per_cpu_context *hv_cpu - = this_cpu_ptr(hv_context.cpu_context); - - clockevents_unbind_device(hv_cpu->clk_evt, cpu); - hv_ce_shutdown(hv_cpu->clk_evt); - } + hv_stimer_cleanup(cpu); hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index b8e1ff05f110..362e70e9d145 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -138,7 +138,6 @@ struct hv_per_cpu_context { * per-cpu list of the channels based on their CPU affinity. */ struct list_head chan_list; - struct clock_event_device *clk_evt; }; struct hv_context { @@ -176,8 +175,6 @@ extern int hv_synic_init(unsigned int cpu); extern int hv_synic_cleanup(unsigned int cpu); -extern void hv_synic_clockevents_cleanup(void); - /* Interface */ void hv_ringbuffer_pre_init(struct vmbus_channel *channel); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 92b1874b3eb3..72d5a7cde7ea 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "hyperv_vmbus.h" struct vmbus_dynid { @@ -955,17 +956,6 @@ static void vmbus_onmessage_work(struct work_struct *work) kfree(ctx); } -static void hv_process_timer_expiration(struct hv_message *msg, - struct hv_per_cpu_context *hv_cpu) -{ - struct clock_event_device *dev = hv_cpu->clk_evt; - - if (dev->event_handler) - dev->event_handler(dev); - - vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); -} - void vmbus_on_msg_dpc(unsigned long data) { struct hv_per_cpu_context *hv_cpu = (void *)data; @@ -1159,9 +1149,10 @@ static void vmbus_isr(void) /* Check if there are actual msgs to be processed */ if (msg->header.message_type != HVMSG_NONE) { - if (msg->header.message_type == HVMSG_TIMER_EXPIRED) - hv_process_timer_expiration(msg, hv_cpu); - else + if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { + hv_stimer0_isr(); + vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); + } else tasklet_schedule(&hv_cpu->msg_dpc); } @@ -1263,14 +1254,19 @@ static int vmbus_bus_init(void) ret = hv_synic_alloc(); if (ret) goto err_alloc; + + ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT); + if (ret < 0) + goto err_alloc; + /* - * Initialize the per-cpu interrupt state and - * connect to the host. + * Initialize the per-cpu interrupt state and stimer state. + * Then connect to the host. */ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", hv_synic_init, hv_synic_cleanup); if (ret < 0) - goto err_alloc; + goto err_cpuhp; hyperv_cpuhp_online = ret; ret = vmbus_connect(); @@ -1318,6 +1314,8 @@ static int vmbus_bus_init(void) err_connect: cpuhp_remove_state(hyperv_cpuhp_online); +err_cpuhp: + hv_stimer_free(); err_alloc: hv_synic_free(); hv_remove_vmbus_irq(); @@ -2064,7 +2062,7 @@ static struct acpi_driver vmbus_acpi_driver = { static void hv_kexec_handler(void) { - hv_synic_clockevents_cleanup(); + hv_stimer_global_cleanup(); vmbus_initiate_unload(false); vmbus_connection.conn_state = DISCONNECTED; /* Make sure conn_state is set as hv_synic_cleanup checks for it */ @@ -2075,6 +2073,8 @@ static void hv_kexec_handler(void) static void hv_crash_handler(struct pt_regs *regs) { + int cpu; + vmbus_initiate_unload(true); /* * In crash handler we can't schedule synic cleanup for all CPUs, @@ -2082,7 +2082,9 @@ static void hv_crash_handler(struct pt_regs *regs) * for kdump. */ vmbus_connection.conn_state = DISCONNECTED; - hv_synic_cleanup(smp_processor_id()); + cpu = smp_processor_id(); + hv_stimer_cleanup(cpu); + hv_synic_cleanup(cpu); hyperv_cleanup(); }; @@ -2131,7 +2133,7 @@ static void __exit vmbus_exit(void) hv_remove_kexec_handler(); hv_remove_crash_handler(); vmbus_connection.conn_state = DISCONNECTED; - hv_synic_clockevents_cleanup(); + hv_stimer_global_cleanup(); vmbus_disconnect(); hv_remove_vmbus_irq(); for_each_online_cpu(cpu) { diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h new file mode 100644 index 000000000000..0cd73f7bc992 --- /dev/null +++ b/include/clocksource/hyperv_timer.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Definitions for the clocksource provided by the Hyper-V + * hypervisor to guest VMs, as described in the Hyper-V Top + * Level Functional Spec (TLFS). + * + * Copyright (C) 2019, Microsoft, Inc. + * + * Author: Michael Kelley + */ + +#ifndef __CLKSOURCE_HYPERV_TIMER_H +#define __CLKSOURCE_HYPERV_TIMER_H + +#define HV_MAX_MAX_DELTA_TICKS 0xffffffff +#define HV_MIN_DELTA_TICKS 1 + +/* Routines called by the VMbus driver */ +extern int hv_stimer_alloc(int sint); +extern void hv_stimer_free(void); +extern void hv_stimer_init(unsigned int cpu); +extern void hv_stimer_cleanup(unsigned int cpu); +extern void hv_stimer_global_cleanup(void); +extern void hv_stimer0_isr(void); + +#endif -- cgit v1.2.3 From dd2cb348613b44f9d948b068775e159aad298599 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 1 Jul 2019 04:26:06 +0000 Subject: clocksource/drivers: Continue making Hyper-V clocksource ISA agnostic Continue consolidating Hyper-V clock and timer code into an ISA independent Hyper-V clocksource driver. Move the existing clocksource code under drivers/hv and arch/x86 to the new clocksource driver while separating out the ISA dependencies. Update Hyper-V initialization to call initialization and cleanup routines since the Hyper-V synthetic clock is not independently enumerated in ACPI. Update Hyper-V clocksource users in KVM and VDSO to get definitions from the new include file. No behavior is changed and no new functionality is added. Suggested-by: Marc Zyngier Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Reviewed-by: Vitaly Kuznetsov Cc: "bp@alien8.de" Cc: "will.deacon@arm.com" Cc: "catalin.marinas@arm.com" Cc: "mark.rutland@arm.com" Cc: "linux-arm-kernel@lists.infradead.org" Cc: "gregkh@linuxfoundation.org" Cc: "linux-hyperv@vger.kernel.org" Cc: "olaf@aepfle.de" Cc: "apw@canonical.com" Cc: "jasowang@redhat.com" Cc: "marcelo.cerri@canonical.com" Cc: Sunil Muthuswamy Cc: KY Srinivasan Cc: "sashal@kernel.org" Cc: "vincenzo.frascino@arm.com" Cc: "linux-arch@vger.kernel.org" Cc: "linux-mips@vger.kernel.org" Cc: "linux-kselftest@vger.kernel.org" Cc: "arnd@arndb.de" Cc: "linux@armlinux.org.uk" Cc: "ralf@linux-mips.org" Cc: "paul.burton@mips.com" Cc: "daniel.lezcano@linaro.org" Cc: "salyzyn@android.com" Cc: "pcc@google.com" Cc: "shuah@kernel.org" Cc: "0x7f454c46@gmail.com" <0x7f454c46@gmail.com> Cc: "linux@rasmusvillemoes.dk" Cc: "huw@codeweavers.com" Cc: "sfr@canb.auug.org.au" Cc: "pbonzini@redhat.com" Cc: "rkrcmar@redhat.com" Cc: "kvm@vger.kernel.org" Link: https://lkml.kernel.org/r/1561955054-1838-3-git-send-email-mikelley@microsoft.com --- arch/x86/entry/vdso/vma.c | 2 +- arch/x86/hyperv/hv_init.c | 91 +------------------- arch/x86/include/asm/mshyperv.h | 81 +++--------------- arch/x86/include/asm/vdso/gettimeofday.h | 2 +- arch/x86/kvm/x86.c | 1 + drivers/clocksource/hyperv_timer.c | 139 +++++++++++++++++++++++++++++++ drivers/hv/hv_util.c | 1 + include/clocksource/hyperv_timer.h | 80 ++++++++++++++++++ 8 files changed, 237 insertions(+), 160 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 8db1f594e8b1..349a61d8bf34 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 1608050e9df9..0e033ef11a9f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -17,64 +17,13 @@ #include #include #include -#include #include #include #include - -#ifdef CONFIG_HYPERV_TSCPAGE - -static struct ms_hyperv_tsc_page *tsc_pg; - -struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return tsc_pg; -} -EXPORT_SYMBOL_GPL(hv_get_tsc_page); - -static u64 read_hv_clock_tsc(struct clocksource *arg) -{ - u64 current_tick = hv_read_tsc_page(tsc_pg); - - if (current_tick == U64_MAX) - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - - return current_tick; -} - -static struct clocksource hyperv_cs_tsc = { - .name = "hyperv_clocksource_tsc_page", - .rating = 400, - .read = read_hv_clock_tsc, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; -#endif - -static u64 read_hv_clock_msr(struct clocksource *arg) -{ - u64 current_tick; - /* - * Read the partition counter to get the current tick count. This count - * is set to 0 when the partition is created and is incremented in - * 100 nanosecond units. - */ - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - return current_tick; -} - -static struct clocksource hyperv_cs_msr = { - .name = "hyperv_clocksource_msr", - .rating = 400, - .read = read_hv_clock_msr, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; +#include void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); -struct clocksource *hyperv_cs; -EXPORT_SYMBOL_GPL(hyperv_cs); u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); @@ -343,42 +292,8 @@ void __init hyperv_init(void) x86_init.pci.arch_init = hv_pci_init; - /* - * Register Hyper-V specific clocksource. - */ -#ifdef CONFIG_HYPERV_TSCPAGE - if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) { - union hv_x64_msr_hypercall_contents tsc_msr; - - tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); - if (!tsc_pg) - goto register_msr_cs; - - hyperv_cs = &hyperv_cs_tsc; - - rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - tsc_msr.enable = 1; - tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); - - wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK; - - clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); - return; - } -register_msr_cs: -#endif - /* - * For 32 bit guests just use the MSR based mechanism for reading - * the partition counter. - */ - - hyperv_cs = &hyperv_cs_msr; - if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE) - clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); - + /* Register Hyper-V specific clocksource */ + hv_init_clocksource(); return; remove_cpuhp_state: diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cc60e617931c..f4fa8a9d5d0b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -105,6 +105,17 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_crash_ctl(val) \ rdmsrl(HV_X64_MSR_CRASH_CTL, val) +#define hv_get_time_ref_count(val) \ + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val) + +#define hv_get_reference_tsc(val) \ + rdmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_reference_tsc(val) \ + wrmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_clocksource_vdso(val) \ + ((val).archdata.vclock_mode = VCLOCK_HVCLOCK) +#define hv_get_raw_timer() rdtsc_ordered() + void hyperv_callback_vector(void); void hyperv_reenlightenment_vector(void); #ifdef CONFIG_TRACING @@ -133,7 +144,6 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) -extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; @@ -387,73 +397,4 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, } #endif /* CONFIG_HYPERV */ -#ifdef CONFIG_HYPERV_TSCPAGE -struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - u64 scale, offset; - u32 sequence; - - /* - * The protocol for reading Hyper-V TSC page is specified in Hypervisor - * Top-Level Functional Specification ver. 3.0 and above. To get the - * reference time we must do the following: - * - READ ReferenceTscSequence - * A special '0' value indicates the time source is unreliable and we - * need to use something else. The currently published specification - * versions (up to 4.0b) contain a mistake and wrongly claim '-1' - * instead of '0' as the special value, see commit c35b82ef0294. - * - ReferenceTime = - * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset - * - READ ReferenceTscSequence again. In case its value has changed - * since our first reading we need to discard ReferenceTime and repeat - * the whole sequence as the hypervisor was updating the page in - * between. - */ - do { - sequence = READ_ONCE(tsc_pg->tsc_sequence); - if (!sequence) - return U64_MAX; - /* - * Make sure we read sequence before we read other values from - * TSC page. - */ - smp_rmb(); - - scale = READ_ONCE(tsc_pg->tsc_scale); - offset = READ_ONCE(tsc_pg->tsc_offset); - *cur_tsc = rdtsc_ordered(); - - /* - * Make sure we read sequence after we read all other values - * from TSC page. - */ - smp_rmb(); - - } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - - return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; -} - -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) -{ - u64 cur_tsc; - - return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); -} - -#else -static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return NULL; -} - -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - BUG(); - return U64_MAX; -} -#endif #endif diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index a14039a59abd..ae91429129a6 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -18,7 +18,7 @@ #include #include #include -#include +#include #define __vdso_data (VVAR(_vdso_data)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8ec676029365..5e1db26b5e15 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -67,6 +67,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 68a28af31561..ba2c79e6a0ee 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include #include @@ -198,3 +200,140 @@ void hv_stimer_global_cleanup(void) hv_stimer_free(); } EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); + +/* + * Code and definitions for the Hyper-V clocksources. Two + * clocksources are defined: one that reads the Hyper-V defined MSR, and + * the other that uses the TSC reference page feature as defined in the + * TLFS. The MSR version is for compatibility with old versions of + * Hyper-V and 32-bit x86. The TSC reference page version is preferred. + */ + +struct clocksource *hyperv_cs; +EXPORT_SYMBOL_GPL(hyperv_cs); + +#ifdef CONFIG_HYPERV_TSCPAGE + +static struct ms_hyperv_tsc_page *tsc_pg; + +struct ms_hyperv_tsc_page *hv_get_tsc_page(void) +{ + return tsc_pg; +} +EXPORT_SYMBOL_GPL(hv_get_tsc_page); + +static u64 notrace read_hv_sched_clock_tsc(void) +{ + u64 current_tick = hv_read_tsc_page(tsc_pg); + + if (current_tick == U64_MAX) + hv_get_time_ref_count(current_tick); + + return current_tick; +} + +static u64 read_hv_clock_tsc(struct clocksource *arg) +{ + return read_hv_sched_clock_tsc(); +} + +static struct clocksource hyperv_cs_tsc = { + .name = "hyperv_clocksource_tsc_page", + .rating = 400, + .read = read_hv_clock_tsc, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; +#endif + +static u64 notrace read_hv_sched_clock_msr(void) +{ + u64 current_tick; + /* + * Read the partition counter to get the current tick count. This count + * is set to 0 when the partition is created and is incremented in + * 100 nanosecond units. + */ + hv_get_time_ref_count(current_tick); + return current_tick; +} + +static u64 read_hv_clock_msr(struct clocksource *arg) +{ + return read_hv_sched_clock_msr(); +} + +static struct clocksource hyperv_cs_msr = { + .name = "hyperv_clocksource_msr", + .rating = 400, + .read = read_hv_clock_msr, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +#ifdef CONFIG_HYPERV_TSCPAGE +static bool __init hv_init_tsc_clocksource(void) +{ + u64 tsc_msr; + phys_addr_t phys_addr; + + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) + return false; + + tsc_pg = vmalloc(PAGE_SIZE); + if (!tsc_pg) + return false; + + hyperv_cs = &hyperv_cs_tsc; + phys_addr = page_to_phys(vmalloc_to_page(tsc_pg)); + + /* + * The Hyper-V TLFS specifies to preserve the value of reserved + * bits in registers. So read the existing value, preserve the + * low order 12 bits, and add in the guest physical address + * (which already has at least the low 12 bits set to zero since + * it is page aligned). Also set the "enable" bit, which is bit 0. + */ + hv_get_reference_tsc(tsc_msr); + tsc_msr &= GENMASK_ULL(11, 0); + tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; + hv_set_reference_tsc(tsc_msr); + + hv_set_clocksource_vdso(hyperv_cs_tsc); + clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); + + /* sched_clock_register is needed on ARM64 but is a no-op on x86 */ + sched_clock_register(read_hv_sched_clock_tsc, 64, HV_CLOCK_HZ); + return true; +} +#else +static bool __init hv_init_tsc_clocksource(void) +{ + return false; +} +#endif + + +void __init hv_init_clocksource(void) +{ + /* + * Try to set up the TSC page clocksource. If it succeeds, we're + * done. Otherwise, set up the MSR clocksoruce. At least one of + * these will always be available except on very old versions of + * Hyper-V on x86. In that case we won't have a Hyper-V + * clocksource, but Linux will still run with a clocksource based + * on the emulated PIT or LAPIC timer. + */ + if (hv_init_tsc_clocksource()) + return; + + if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)) + return; + + hyperv_cs = &hyperv_cs_msr; + clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); + + /* sched_clock_register is needed on ARM64 but is a no-op on x86 */ + sched_clock_register(read_hv_sched_clock_msr, 64, HV_CLOCK_HZ); +} +EXPORT_SYMBOL_GPL(hv_init_clocksource); diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 7d3d31f099ea..e32681ee7b9f 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "hyperv_vmbus.h" diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index 0cd73f7bc992..a821deb8ecb2 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -13,6 +13,10 @@ #ifndef __CLKSOURCE_HYPERV_TIMER_H #define __CLKSOURCE_HYPERV_TIMER_H +#include +#include +#include + #define HV_MAX_MAX_DELTA_TICKS 0xffffffff #define HV_MIN_DELTA_TICKS 1 @@ -24,4 +28,80 @@ extern void hv_stimer_cleanup(unsigned int cpu); extern void hv_stimer_global_cleanup(void); extern void hv_stimer0_isr(void); +#if IS_ENABLED(CONFIG_HYPERV) +extern struct clocksource *hyperv_cs; +extern void hv_init_clocksource(void); +#endif /* CONFIG_HYPERV */ + +#ifdef CONFIG_HYPERV_TSCPAGE +extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); + +static inline notrace u64 +hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc) +{ + u64 scale, offset; + u32 sequence; + + /* + * The protocol for reading Hyper-V TSC page is specified in Hypervisor + * Top-Level Functional Specification ver. 3.0 and above. To get the + * reference time we must do the following: + * - READ ReferenceTscSequence + * A special '0' value indicates the time source is unreliable and we + * need to use something else. The currently published specification + * versions (up to 4.0b) contain a mistake and wrongly claim '-1' + * instead of '0' as the special value, see commit c35b82ef0294. + * - ReferenceTime = + * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset + * - READ ReferenceTscSequence again. In case its value has changed + * since our first reading we need to discard ReferenceTime and repeat + * the whole sequence as the hypervisor was updating the page in + * between. + */ + do { + sequence = READ_ONCE(tsc_pg->tsc_sequence); + if (!sequence) + return U64_MAX; + /* + * Make sure we read sequence before we read other values from + * TSC page. + */ + smp_rmb(); + + scale = READ_ONCE(tsc_pg->tsc_scale); + offset = READ_ONCE(tsc_pg->tsc_offset); + *cur_tsc = hv_get_raw_timer(); + + /* + * Make sure we read sequence after we read all other values + * from TSC page. + */ + smp_rmb(); + + } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); + + return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; +} + +static inline notrace u64 +hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) +{ + u64 cur_tsc; + + return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); +} + +#else /* CONFIG_HYPERV_TSC_PAGE */ +static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) +{ + return NULL; +} + +static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, + u64 *cur_tsc) +{ + return U64_MAX; +} +#endif /* CONFIG_HYPERV_TSCPAGE */ + #endif -- cgit v1.2.3