diff options
author | Maxime Ripard | 2023-05-09 15:03:40 +0200 |
---|---|---|
committer | Maxime Ripard | 2023-05-09 15:03:40 +0200 |
commit | ff32fcca64437f679a2bf1c0a19d5def389a18e2 (patch) | |
tree | 122863d5d6159b30fd6834cbe599f8ce1b9e8144 /arch/loongarch | |
parent | 79c87edd18ec49f5b6fb40175bd1b1fea9398fdb (diff) | |
parent | ac9a78681b921877518763ba0e89202254349d1b (diff) |
Merge drm/drm-next into drm-misc-next
Start the 6.5 release cycle.
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Diffstat (limited to 'arch/loongarch')
53 files changed, 1813 insertions, 366 deletions
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 7fd51257e0ed..d38b066fc931 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -10,6 +10,7 @@ config LOONGARCH select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST @@ -53,8 +54,8 @@ config LOONGARCH select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT - select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANT_OPTIMIZE_VMEMMAP select ARCH_WANTS_NO_INSTR select BUILDTIME_TABLE_SORT select COMMON_CLK @@ -80,6 +81,7 @@ config LOONGARCH select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GPIOLIB + select HAS_IOPORT select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER @@ -92,6 +94,7 @@ config LOONGARCH select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_ARGS + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN @@ -99,6 +102,7 @@ config LOONGARCH select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ARG_ACCESS_API + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GENERIC_VDSO @@ -117,6 +121,8 @@ config LOONGARCH select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ + select HAVE_SAMPLE_FTRACE_DIRECT + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_SETUP_PER_CPU_AREA if NUMA select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS @@ -420,12 +426,9 @@ config NODES_SHIFT config ARCH_FORCE_MAX_ORDER int "Maximum zone order" - range 14 64 if PAGE_SIZE_64KB - default "14" if PAGE_SIZE_64KB - range 12 64 if PAGE_SIZE_16KB - default "12" if PAGE_SIZE_16KB - range 11 64 - default "11" + default "13" if PAGE_SIZE_64KB + default "11" if PAGE_SIZE_16KB + default "10" help The kernel memory allocator divides physically contiguous memory blocks into "zones", where each zone is a power of two number of @@ -434,9 +437,6 @@ config ARCH_FORCE_MAX_ORDER blocks of physically contiguous memory, then you may need to increase this value. - This config option is actually maximum order plus one. For example, - a value of 11 means that the largest free memory block is 2^10 pages. - The page size is not necessarily 4KB. Keep this in mind when choosing a value for this option. @@ -447,6 +447,22 @@ config ARCH_IOREMAP protection support. However, you can enable LoongArch DMW-based ioremap() for better performance. +config ARCH_WRITECOMBINE + bool "Enable WriteCombine (WUC) for ioremap()" + help + LoongArch maintains cache coherency in hardware, but when paired + with LS7A chipsets the WUC attribute (Weak-ordered UnCached, which + is similar to WriteCombine) is out of the scope of cache coherency + machanism for PCIe devices (this is a PCIe protocol violation, which + may be fixed in newer chipsets). + + This means WUC can only used for write-only memory regions now, so + this option is disabled by default, making WUC silently fallback to + SUC for ioremap(). You can enable this option if the kernel is ensured + to run on hardware without this bug. + + You can override this setting via writecombine=on/off boot parameter. + config ARCH_STRICT_ALIGN bool "Enable -mstrict-align to prevent unaligned accesses" if EXPERT default y diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index f71edf574101..a27e264bdaa5 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -115,6 +115,8 @@ endif libs-y += arch/loongarch/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +drivers-y += arch/loongarch/crypto/ + # suspend and hibernation support drivers-$(CONFIG_PM) += arch/loongarch/power/ diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index e18213f01cc4..6cd26dd3c134 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -487,7 +487,6 @@ CONFIG_CHELSIO_T4=m CONFIG_E1000=y CONFIG_E1000E=y CONFIG_IGB=y -CONFIG_IXGB=y CONFIG_IXGBE=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set diff --git a/arch/loongarch/crypto/Kconfig b/arch/loongarch/crypto/Kconfig new file mode 100644 index 000000000000..200a6e8b43b1 --- /dev/null +++ b/arch/loongarch/crypto/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "Accelerated Cryptographic Algorithms for CPU (loongarch)" + +config CRYPTO_CRC32_LOONGARCH + tristate "CRC32c and CRC32" + select CRC32 + select CRYPTO_HASH + help + CRC32c and CRC32 CRC algorithms + + Architecture: LoongArch with CRC32 instructions + +endmenu diff --git a/arch/loongarch/crypto/Makefile b/arch/loongarch/crypto/Makefile new file mode 100644 index 000000000000..d22613d27ce9 --- /dev/null +++ b/arch/loongarch/crypto/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for LoongArch crypto files.. +# + +obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) += crc32-loongarch.o diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c new file mode 100644 index 000000000000..1f2a2c3839bc --- /dev/null +++ b/arch/loongarch/crypto/crc32-loongarch.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * crc32.c - CRC32 and CRC32C using LoongArch crc* instructions + * + * Module based on mips/crypto/crc32-mips.c + * + * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org> + * Copyright (C) 2018 MIPS Tech, LLC + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ + +#include <linux/module.h> +#include <crypto/internal/hash.h> + +#include <asm/cpu-features.h> +#include <asm/unaligned.h> + +#define _CRC32(crc, value, size, type) \ +do { \ + __asm__ __volatile__( \ + #type ".w." #size ".w" " %0, %1, %0\n\t"\ + : "+r" (crc) \ + : "r" (value) \ + : "memory"); \ +} while (0) + +#define CRC32(crc, value, size) _CRC32(crc, value, size, crc) +#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc) + +static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) +{ + u32 crc = crc_; + + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32(crc, value, w); + p += sizeof(u32); + len -= sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32(crc, value, b); + } + + return crc; +} + +static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) +{ + u32 crc = crc_; + + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32C(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32C(crc, value, w); + p += sizeof(u32); + len -= sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32C(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32C(crc, value, b); + } + + return crc; +} + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +struct chksum_ctx { + u32 key; +}; + +struct chksum_desc_ctx { + u32 crc; +}; + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = mctx->key; + + return 0; +} + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set the seed. + */ +static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(tfm); + + if (keylen != sizeof(mctx->key)) + return -EINVAL; + + mctx->key = get_unaligned_le32(key); + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc32_loongarch_hw(ctx->crc, data, length); + return 0; +} + +static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc32c_loongarch_hw(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + put_unaligned_le32(ctx->crc, out); + return 0; +} + +static int chksumc_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + put_unaligned_le32(~ctx->crc, out); + return 0; +} + +static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) +{ + put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out); + return 0; +} + +static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) +{ + put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(ctx->crc, data, len, out); +} + +static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksumc_finup(ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + + return __chksum_finup(mctx->key, data, length, out); +} + +static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + + return __chksumc_finup(mctx->key, data, length, out); +} + +static int chksum_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = 0; + return 0; +} + +static int chksumc_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = ~0; + return 0; +} + +static struct shash_alg crc32_alg = { + .digestsize = CHKSUM_DIGEST_SIZE, + .setkey = chksum_setkey, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-loongarch", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_alignmask = 0, + .cra_ctxsize = sizeof(struct chksum_ctx), + .cra_module = THIS_MODULE, + .cra_init = chksum_cra_init, + } +}; + +static struct shash_alg crc32c_alg = { + .digestsize = CHKSUM_DIGEST_SIZE, + .setkey = chksum_setkey, + .init = chksum_init, + .update = chksumc_update, + .final = chksumc_final, + .finup = chksumc_finup, + .digest = chksumc_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-loongarch", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_alignmask = 0, + .cra_ctxsize = sizeof(struct chksum_ctx), + .cra_module = THIS_MODULE, + .cra_init = chksumc_cra_init, + } +}; + +static int __init crc32_mod_init(void) +{ + int err; + + if (!cpu_has(CPU_FEATURE_CRC32)) + return 0; + + err = crypto_register_shash(&crc32_alg); + if (err) + return err; + + err = crypto_register_shash(&crc32c_alg); + if (err) + return err; + + return 0; +} + +static void __exit crc32_mod_exit(void) +{ + if (!cpu_has(CPU_FEATURE_CRC32)) + return; + + crypto_unregister_shash(&crc32_alg); + crypto_unregister_shash(&crc32c_alg); +} + +module_init(crc32_mod_init); +module_exit(crc32_mod_exit); + +MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>"); +MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>"); +MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 4198753aa1d0..976a810352c6 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -41,8 +41,11 @@ extern void loongarch_suspend_enter(void); static inline unsigned long acpi_get_wakeup_address(void) { +#ifdef CONFIG_SUSPEND extern void loongarch_wakeup_start(void); return (unsigned long)loongarch_wakeup_start; +#endif + return 0UL; } #endif /* _ASM_LOONGARCH_ACPI_H */ diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index 8fb699b4d40a..5c9c03bdf915 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -71,9 +71,9 @@ extern unsigned long vm_map_base; #define _ATYPE32_ int #define _ATYPE64_ __s64 #ifdef CONFIG_64BIT -#define _CONST64_(x) x ## L +#define _CONST64_(x) x ## UL #else -#define _CONST64_(x) x ## LL +#define _CONST64_(x) x ## ULL #endif #endif diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index 0051b526ac6d..c60796869b2b 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -13,7 +13,6 @@ const char *get_system_type(void); extern void init_environ(void); extern void memblock_init(void); extern void platform_init(void); -extern void plat_swiotlb_setup(void); extern int __init init_numa_memory(void); struct loongson_board_info { diff --git a/arch/loongarch/include/asm/checksum.h b/arch/loongarch/include/asm/checksum.h new file mode 100644 index 000000000000..cabbf6af44c4 --- /dev/null +++ b/arch/loongarch/include/asm/checksum.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2016 ARM Ltd. + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ +#ifndef __ASM_CHECKSUM_H +#define __ASM_CHECKSUM_H + +#include <linux/bitops.h> +#include <linux/in6.h> + +#define _HAVE_ARCH_IPV6_CSUM +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum sum); + +/* + * turns a 32-bit partial checksum (e.g. from csum_partial) into a + * 1's complement 16-bit checksum. + */ +static inline __sum16 csum_fold(__wsum sum) +{ + u32 tmp = (__force u32)sum; + + /* + * swap the two 16-bit halves of sum + * if there is a carry from adding the two 16-bit halves, + * it will carry from the lower half into the upper half, + * giving us the correct sum in the upper half. + */ + return (__force __sum16)(~(tmp + rol32(tmp, 16)) >> 16); +} +#define csum_fold csum_fold + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. ihl is the number + * of 32-bit words and is always >= 5. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + u64 sum; + __uint128_t tmp; + int n = ihl; /* we want it signed */ + + tmp = *(const __uint128_t *)iph; + iph += 16; + n -= 4; + tmp += ((tmp >> 64) | (tmp << 64)); + sum = tmp >> 64; + do { + sum += *(const u32 *)iph; + iph += 4; + } while (--n > 0); + + sum += ror64(sum, 32); + return csum_fold((__force __wsum)(sum >> 32)); +} +#define ip_fast_csum ip_fast_csum + +extern unsigned int do_csum(const unsigned char *buff, int len); +#define do_csum do_csum + +#include <asm-generic/checksum.h> + +#endif /* __ASM_CHECKSUM_H */ diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index ecfa6cf79806..979fde61bba8 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -62,7 +62,7 @@ static inline unsigned int __xchg_small(volatile void *ptr, unsigned int val, } static __always_inline unsigned long -__xchg(volatile void *ptr, unsigned long x, int size) +__arch_xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { case 1: @@ -87,7 +87,7 @@ __xchg(volatile void *ptr, unsigned long x, int size) __typeof__(*(ptr)) __res; \ \ __res = (__typeof__(*(ptr))) \ - __xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \ + __arch_xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \ \ __res; \ }) diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index b07974218393..f6177f133477 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -42,6 +42,7 @@ #define cpu_has_fpu cpu_opt(LOONGARCH_CPU_FPU) #define cpu_has_lsx cpu_opt(LOONGARCH_CPU_LSX) #define cpu_has_lasx cpu_opt(LOONGARCH_CPU_LASX) +#define cpu_has_crc32 cpu_opt(LOONGARCH_CPU_CRC32) #define cpu_has_complex cpu_opt(LOONGARCH_CPU_COMPLEX) #define cpu_has_crypto cpu_opt(LOONGARCH_CPU_CRYPTO) #define cpu_has_lvz cpu_opt(LOONGARCH_CPU_LVZ) diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index c3da91759472..88773d849e33 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -78,25 +78,26 @@ enum cpu_type_enum { #define CPU_FEATURE_FPU 3 /* CPU has FPU */ #define CPU_FEATURE_LSX 4 /* CPU has LSX (128-bit SIMD) */ #define CPU_FEATURE_LASX 5 /* CPU has LASX (256-bit SIMD) */ -#define CPU_FEATURE_COMPLEX 6 /* CPU has Complex instructions */ -#define CPU_FEATURE_CRYPTO 7 /* CPU has Crypto instructions */ -#define CPU_FEATURE_LVZ 8 /* CPU has Virtualization extension */ -#define CPU_FEATURE_LBT_X86 9 /* CPU has X86 Binary Translation */ -#define CPU_FEATURE_LBT_ARM 10 /* CPU has ARM Binary Translation */ -#define CPU_FEATURE_LBT_MIPS 11 /* CPU has MIPS Binary Translation */ -#define CPU_FEATURE_TLB 12 /* CPU has TLB */ -#define CPU_FEATURE_CSR 13 /* CPU has CSR */ -#define CPU_FEATURE_WATCH 14 /* CPU has watchpoint registers */ -#define CPU_FEATURE_VINT 15 /* CPU has vectored interrupts */ -#define CPU_FEATURE_CSRIPI 16 /* CPU has CSR-IPI */ -#define CPU_FEATURE_EXTIOI 17 /* CPU has EXT-IOI */ -#define CPU_FEATURE_PREFETCH 18 /* CPU has prefetch instructions */ -#define CPU_FEATURE_PMP 19 /* CPU has perfermance counter */ -#define CPU_FEATURE_SCALEFREQ 20 /* CPU supports cpufreq scaling */ -#define CPU_FEATURE_FLATMODE 21 /* CPU has flat mode */ -#define CPU_FEATURE_EIODECODE 22 /* CPU has EXTIOI interrupt pin decode mode */ -#define CPU_FEATURE_GUESTID 23 /* CPU has GuestID feature */ -#define CPU_FEATURE_HYPERVISOR 24 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_CRC32 6 /* CPU has CRC32 instructions */ +#define CPU_FEATURE_COMPLEX 7 /* CPU has Complex instructions */ +#define CPU_FEATURE_CRYPTO 8 /* CPU has Crypto instructions */ +#define CPU_FEATURE_LVZ 9 /* CPU has Virtualization extension */ +#define CPU_FEATURE_LBT_X86 10 /* CPU has X86 Binary Translation */ +#define CPU_FEATURE_LBT_ARM 11 /* CPU has ARM Binary Translation */ +#define CPU_FEATURE_LBT_MIPS 12 /* CPU has MIPS Binary Translation */ +#define CPU_FEATURE_TLB 13 /* CPU has TLB */ +#define CPU_FEATURE_CSR 14 /* CPU has CSR */ +#define CPU_FEATURE_WATCH 15 /* CPU has watchpoint registers */ +#define CPU_FEATURE_VINT 16 /* CPU has vectored interrupts */ +#define CPU_FEATURE_CSRIPI 17 /* CPU has CSR-IPI */ +#define CPU_FEATURE_EXTIOI 18 /* CPU has EXT-IOI */ +#define CPU_FEATURE_PREFETCH 19 /* CPU has prefetch instructions */ +#define CPU_FEATURE_PMP 20 /* CPU has perfermance counter */ +#define CPU_FEATURE_SCALEFREQ 21 /* CPU supports cpufreq scaling */ +#define CPU_FEATURE_FLATMODE 22 /* CPU has flat mode */ +#define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */ +#define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */ +#define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -104,6 +105,7 @@ enum cpu_type_enum { #define LOONGARCH_CPU_FPU BIT_ULL(CPU_FEATURE_FPU) #define LOONGARCH_CPU_LSX BIT_ULL(CPU_FEATURE_LSX) #define LOONGARCH_CPU_LASX BIT_ULL(CPU_FEATURE_LASX) +#define LOONGARCH_CPU_CRC32 BIT_ULL(CPU_FEATURE_CRC32) #define LOONGARCH_CPU_COMPLEX BIT_ULL(CPU_FEATURE_COMPLEX) #define LOONGARCH_CPU_CRYPTO BIT_ULL(CPU_FEATURE_CRYPTO) #define LOONGARCH_CPU_LVZ BIT_ULL(CPU_FEATURE_LVZ) diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 358b254d9c1d..192f8e35d912 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -21,6 +21,9 @@ struct sigcontext; +extern void kernel_fpu_begin(void); +extern void kernel_fpu_end(void); + extern void _init_fpu(unsigned int); extern void _save_fp(struct loongarch_fpu *); extern void _restore_fp(struct loongarch_fpu *); diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 3418d32d4fc7..23e2ba78dcb0 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -54,9 +54,46 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs * return &fregs->regs; } +static __always_inline unsigned long +ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) +{ + return instruction_pointer(&fregs->regs); +} + +static __always_inline void +ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) +{ + regs_set_return_value(&fregs->regs, ip); +} + +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(&(fregs)->regs, n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(&(fregs)->regs) +#define ftrace_regs_return_value(fregs) \ + regs_return_value(&(fregs)->regs) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(&(fregs)->regs, ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(&(fregs)->regs) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) + #define ftrace_graph_func ftrace_graph_func void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static inline void +__arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) +{ + regs->regs[13] = addr; /* t1 */ +} + +#define arch_ftrace_set_direct_caller(fregs, addr) \ + __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index a04fe755d719..b09887ffcd15 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -121,6 +121,8 @@ enum reg2bstrd_op { }; enum reg3_op { + asrtle_op = 0x02, + asrtgt_op = 0x03, addw_op = 0x20, addd_op = 0x21, subw_op = 0x22, @@ -176,6 +178,30 @@ enum reg3_op { amord_op = 0x70c7, amxorw_op = 0x70c8, amxord_op = 0x70c9, + fldgts_op = 0x70e8, + fldgtd_op = 0x70e9, + fldles_op = 0x70ea, + fldled_op = 0x70eb, + fstgts_op = 0x70ec, + fstgtd_op = 0x70ed, + fstles_op = 0x70ee, + fstled_op = 0x70ef, + ldgtb_op = 0x70f0, + ldgth_op = 0x70f1, + ldgtw_op = 0x70f2, + ldgtd_op = 0x70f3, + ldleb_op = 0x70f4, + ldleh_op = 0x70f5, + ldlew_op = 0x70f6, + ldled_op = 0x70f7, + stgtb_op = 0x70f8, + stgth_op = 0x70f9, + stgtw_op = 0x70fa, + stgtd_op = 0x70fb, + stleb_op = 0x70fc, + stleh_op = 0x70fd, + stlew_op = 0x70fe, + stled_op = 0x70ff, }; enum reg3sa2_op { diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 402a7d9e3a53..545e2708fbf7 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -54,8 +54,10 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, * @offset: bus address of the memory * @size: size of the resource to map */ +extern pgprot_t pgprot_wc; + #define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_WUC)) + ioremap_prot((offset), (size), pgprot_val(pgprot_wc)) #define ioremap_cache(offset, size) \ ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h index 65fbbae9fc4d..83e995b30e47 100644 --- a/arch/loongarch/include/asm/local.h +++ b/arch/loongarch/include/asm/local.h @@ -56,8 +56,17 @@ static inline long local_sub_return(long i, local_t *l) return result; } -#define local_cmpxchg(l, o, n) \ - ((long)cmpxchg_local(&((l)->a.counter), (o), (n))) +static inline long local_cmpxchg(local_t *l, long old, long new) +{ + return cmpxchg_local(&l->a.counter, old, new); +} + +static inline bool local_try_cmpxchg(local_t *l, long *old, long new) +{ + typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old; + return try_cmpxchg_local(&l->a.counter, __old, new); +} + #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n))) /** diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 65b7dcdea16d..b3323ab5b78d 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -117,7 +117,7 @@ static inline u32 read_cpucfg(u32 reg) #define CPUCFG1_EP BIT(22) #define CPUCFG1_RPLV BIT(23) #define CPUCFG1_HUGEPG BIT(24) -#define CPUCFG1_IOCSRBRD BIT(25) +#define CPUCFG1_CRC32 BIT(25) #define CPUCFG1_MSGINT BIT(26) #define LOONGARCH_CPUCFG2 0x2 @@ -311,8 +311,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define CSR_ECFG_VS_WIDTH 3 #define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT) #define CSR_ECFG_IM_SHIFT 0 -#define CSR_ECFG_IM_WIDTH 13 -#define CSR_ECFG_IM (_ULCAST_(0x1fff) << CSR_ECFG_IM_SHIFT) +#define CSR_ECFG_IM_WIDTH 14 +#define CSR_ECFG_IM (_ULCAST_(0x3fff) << CSR_ECFG_IM_SHIFT) #define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ #define CSR_ESTAT_ESUBCODE_SHIFT 22 @@ -322,8 +322,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define CSR_ESTAT_EXC_WIDTH 6 #define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT) #define CSR_ESTAT_IS_SHIFT 0 -#define CSR_ESTAT_IS_WIDTH 15 -#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT) +#define CSR_ESTAT_IS_WIDTH 14 +#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT) #define LOONGARCH_CSR_ERA 0x6 /* ERA */ @@ -423,9 +423,9 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define CSR_ASID_ASID_WIDTH 10 #define CSR_ASID_ASID (_ULCAST_(0x3ff) << CSR_ASID_ASID_SHIFT) -#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[47] = 0 */ +#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[VALEN-1] = 0 */ -#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[47] = 1 */ +#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[VALEN-1] = 1 */ #define LOONGARCH_CSR_PGD 0x1b /* Page table base */ @@ -1090,7 +1090,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define ECFGF_IPI (_ULCAST_(1) << ECFGB_IPI) #define ECFGF(hwirq) (_ULCAST_(1) << hwirq) -#define ESTATF_IP 0x00001fff +#define ESTATF_IP 0x00003fff #define LOONGARCH_IOCSR_FEATURES 0x8 #define IOCSRF_TEMP BIT_ULL(0) @@ -1397,7 +1397,7 @@ __BUILD_CSR_OP(tlbidx) #define EXSUBCODE_ADEF 0 /* Fetch Instruction */ #define EXSUBCODE_ADEM 1 /* Access Memory*/ #define EXCCODE_ALE 9 /* Unalign Access */ -#define EXCCODE_OOB 10 /* Out of bounds */ +#define EXCCODE_BCE 10 /* Bounds Check Error */ #define EXCCODE_SYS 11 /* System call */ #define EXCCODE_BP 12 /* Breakpoint */ #define EXCCODE_INE 13 /* Inst. Not Exist */ @@ -1408,33 +1408,38 @@ __BUILD_CSR_OP(tlbidx) #define EXCCODE_FPE 18 /* Floating Point Exception */ #define EXCSUBCODE_FPE 0 /* Floating Point Exception */ #define EXCSUBCODE_VFPE 1 /* Vector Exception */ -#define EXCCODE_WATCH 19 /* Watch address reference */ +#define EXCCODE_WATCH 19 /* WatchPoint Exception */ + #define EXCSUBCODE_WPEF 0 /* ... on Instruction Fetch */ + #define EXCSUBCODE_WPEM 1 /* ... on Memory Accesses */ #define EXCCODE_BTDIS 20 /* Binary Trans. Disabled */ #define EXCCODE_BTE 21 /* Binary Trans. Exception */ -#define EXCCODE_PSI 22 /* Guest Privileged Error */ -#define EXCCODE_HYP 23 /* Hypercall */ +#define EXCCODE_GSPR 22 /* Guest Privileged Error */ +#define EXCCODE_HVC 23 /* Hypercall */ #define EXCCODE_GCM 24 /* Guest CSR modified */ #define EXCSUBCODE_GCSC 0 /* Software caused */ #define EXCSUBCODE_GCHC 1 /* Hardware caused */ #define EXCCODE_SE 25 /* Security */ -#define EXCCODE_INT_START 64 -#define EXCCODE_SIP0 64 -#define EXCCODE_SIP1 65 -#define EXCCODE_IP0 66 -#define EXCCODE_IP1 67 -#define EXCCODE_IP2 68 -#define EXCCODE_IP3 69 -#define EXCCODE_IP4 70 -#define EXCCODE_IP5 71 -#define EXCCODE_IP6 72 -#define EXCCODE_IP7 73 -#define EXCCODE_PMC 74 /* Performance Counter */ -#define EXCCODE_TIMER 75 -#define EXCCODE_IPI 76 -#define EXCCODE_NMI 77 -#define EXCCODE_INT_END 78 -#define EXCCODE_INT_NUM (EXCCODE_INT_END - EXCCODE_INT_START) +/* Interrupt numbers */ +#define INT_SWI0 0 /* Software Interrupts */ +#define INT_SWI1 1 +#define INT_HWI0 2 /* Hardware Interrupts */ +#define INT_HWI1 3 +#define INT_HWI2 4 +#define INT_HWI3 5 +#define INT_HWI4 6 +#define INT_HWI5 7 +#define INT_HWI6 8 +#define INT_HWI7 9 +#define INT_PCOV 10 /* Performance Counter Overflow */ +#define INT_TI 11 /* Timer */ +#define INT_IPI 12 +#define INT_NMI 13 + +/* ExcCodes corresponding to interrupts */ +#define EXCCODE_INT_NUM (INT_NMI + 1) +#define EXCCODE_INT_START 64 +#define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1) /* FPU register names */ #define LOONGARCH_FCSR0 $r0 diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h index 438f09d4ccf4..88554f92e010 100644 --- a/arch/loongarch/include/asm/module.lds.h +++ b/arch/loongarch/include/asm/module.lds.h @@ -2,8 +2,8 @@ /* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ SECTIONS { . = ALIGN(4); - .got : { BYTE(0) } - .plt : { BYTE(0) } - .plt.idx : { BYTE(0) } - .ftrace_trampoline : { BYTE(0) } + .got 0 : { BYTE(0) } + .plt 0 : { BYTE(0) } + .plt.idx 0 : { BYTE(0) } + .ftrace_trampoline 0 : { BYTE(0) } } diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index d761db943335..35f0958163ac 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -154,6 +154,11 @@ static inline long regs_return_value(struct pt_regs *regs) return regs->regs[4]; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val) +{ + regs->regs[4] = val; +} + #define instruction_pointer(regs) ((regs)->csr_era) #define profile_pc(regs) instruction_pointer(regs) diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index d82687390b4a..416b653bccb4 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -99,7 +99,7 @@ static inline void __cpu_die(unsigned int cpu) loongson_cpu_die(cpu); } -extern void play_dead(void); +extern void __noreturn play_dead(void); #endif #endif /* __ASM_SMP_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index cc48ed262021..82d811b5c6e9 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -47,11 +47,12 @@ struct user_fp_state { }; struct user_watch_state { - uint16_t dbg_info; + uint64_t dbg_info; struct { uint64_t addr; uint64_t mask; uint32_t ctrl; + uint32_t pad; } dbg_regs[8]; }; diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 78d4e3384305..9a72d91cd104 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -13,7 +13,7 @@ obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o -obj-$(CONFIG_CPU_HAS_FPU) += fpu.o +obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 3a3fce2d7846..5adf0f736c6d 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -60,7 +60,7 @@ static inline void set_elf_platform(int cpu, const char *plat) /* MAP BASE */ unsigned long vm_map_base; -EXPORT_SYMBOL_GPL(vm_map_base); +EXPORT_SYMBOL(vm_map_base); static void cpu_probe_addrbits(struct cpuinfo_loongarch *c) { @@ -94,13 +94,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH; - elf_hwcap = HWCAP_LOONGARCH_CPUCFG | HWCAP_LOONGARCH_CRC32; + elf_hwcap = HWCAP_LOONGARCH_CPUCFG; config = read_cpucfg(LOONGARCH_CPUCFG1); if (config & CPUCFG1_UAL) { c->options |= LOONGARCH_CPU_UAL; elf_hwcap |= HWCAP_LOONGARCH_UAL; } + if (config & CPUCFG1_CRC32) { + c->options |= LOONGARCH_CPU_CRC32; + elf_hwcap |= HWCAP_LOONGARCH_CRC32; + } + config = read_cpucfg(LOONGARCH_CPUCFG2); if (config & CPUCFG2_LAM) { diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 4a3ef8516ccc..73858c9029cc 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -30,19 +30,12 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate) return 0; } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - #ifdef CONFIG_MODULES -static inline int __get_mod(struct module **mod, unsigned long addr) +static bool reachable_by_bl(unsigned long addr, unsigned long pc) { - preempt_disable(); - *mod = __module_text_address(addr); - preempt_enable(); + long offset = (long)addr - (long)pc; - if (WARN_ON(!(*mod))) - return -EINVAL; - - return 0; + return offset >= -SZ_128M && offset < SZ_128M; } static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) @@ -58,51 +51,88 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) return NULL; } -static unsigned long get_plt_addr(struct module *mod, unsigned long addr) +/* + * Find the address the callsite must branch to in order to reach '*addr'. + * + * Due to the limited range of 'bl' instruction, modules may be placed too far + * away to branch directly and we must use a PLT. + * + * Returns true when '*addr' contains a reachable target address, or has been + * modified to contain a PLT address. Returns false otherwise. + */ +static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr) { + unsigned long pc = rec->ip + LOONGARCH_INSN_SIZE; struct plt_entry *plt; - plt = get_ftrace_plt(mod, addr); + /* + * If a custom trampoline is unreachable, rely on the ftrace_regs_caller + * trampoline which knows how to indirectly reach that trampoline through + * ops->direct_call. + */ + if (*addr != FTRACE_ADDR && *addr != FTRACE_REGS_ADDR && !reachable_by_bl(*addr, pc)) + *addr = FTRACE_REGS_ADDR; + + /* + * When the target is within range of the 'bl' instruction, use 'addr' + * as-is and branch to that directly. + */ + if (reachable_by_bl(*addr, pc)) + return true; + + /* + * 'mod' is only set at module load time, but if we end up + * dealing with an out-of-range condition, we can assume it + * is due to a module being loaded far away from the kernel. + * + * NOTE: __module_text_address() must be called with preemption + * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * retains its validity throughout the remainder of this code. + */ + if (!mod) { + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + } + + if (WARN_ON(!mod)) + return false; + + plt = get_ftrace_plt(mod, *addr); if (!plt) { - pr_err("ftrace: no module PLT for %ps\n", (void *)addr); - return -EINVAL; + pr_err("ftrace: no module PLT for %ps\n", (void *)*addr); + return false; } - return (unsigned long)plt; + *addr = (unsigned long)plt; + return true; +} +#else /* !CONFIG_MODULES */ +static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, unsigned long *addr) +{ + return true; } #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { u32 old, new; unsigned long pc; - long offset __maybe_unused; pc = rec->ip + LOONGARCH_INSN_SIZE; -#ifdef CONFIG_MODULES - offset = (long)pc - (long)addr; - - if (offset < -SZ_128M || offset >= SZ_128M) { - int ret; - struct module *mod; - - ret = __get_mod(&mod, pc); - if (ret) - return ret; - - addr = get_plt_addr(mod, addr); + if (!ftrace_find_callable_addr(rec, NULL, &addr)) + return -EINVAL; - old_addr = get_plt_addr(mod, old_addr); - } -#endif + if (!ftrace_find_callable_addr(rec, NULL, &old_addr)) + return -EINVAL; new = larch_insn_gen_bl(pc, addr); old = larch_insn_gen_bl(pc, old_addr); return ftrace_modify_code(pc, old, new, true); } - #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ int ftrace_update_ftrace_func(ftrace_func_t func) @@ -153,24 +183,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { u32 old, new; unsigned long pc; - long offset __maybe_unused; pc = rec->ip + LOONGARCH_INSN_SIZE; -#ifdef CONFIG_MODULES - offset = (long)pc - (long)addr; - - if (offset < -SZ_128M || offset >= SZ_128M) { - int ret; - struct module *mod; - - ret = __get_mod(&mod, pc); - if (ret) - return ret; - - addr = get_plt_addr(mod, addr); - } -#endif + if (!ftrace_find_callable_addr(rec, NULL, &addr)) + return -EINVAL; old = larch_insn_gen_nop(); new = larch_insn_gen_bl(pc, addr); @@ -182,24 +199,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long ad { u32 old, new; unsigned long pc; - long offset __maybe_unused; pc = rec->ip + LOONGARCH_INSN_SIZE; -#ifdef CONFIG_MODULES - offset = (long)pc - (long)addr; - - if (offset < -SZ_128M || offset >= SZ_128M) { - int ret; - struct module *mod; - - ret = __get_mod(&mod, pc); - if (ret) - return ret; - - addr = get_plt_addr(mod, addr); - } -#endif + if (!ftrace_find_callable_addr(rec, NULL, &addr)) + return -EINVAL; new = larch_insn_gen_nop(); old = larch_insn_gen_bl(pc, addr); diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 44ff1ff64260..78f066384657 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -82,6 +82,7 @@ SYM_FUNC_END(except_vec_cex) BUILD_HANDLER ade ade badv BUILD_HANDLER ale ale badv + BUILD_HANDLER bce bce none BUILD_HANDLER bp bp none BUILD_HANDLER fpe fpe fcsr BUILD_HANDLER fpu fpu none diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 0524bf1169b7..883e5066ae44 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -92,7 +92,7 @@ static int __init get_ipi_irq(void) struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); if (d) - return irq_create_mapping(d, EXCCODE_IPI - EXCCODE_INT_START); + return irq_create_mapping(d, INT_IPI); return -EINVAL; } diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c new file mode 100644 index 000000000000..5c46ae8c6cac --- /dev/null +++ b/arch/loongarch/kernel/kfpu.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +#include <linux/cpu.h> +#include <linux/init.h> +#include <asm/fpu.h> +#include <asm/smp.h> + +static DEFINE_PER_CPU(bool, in_kernel_fpu); + +void kernel_fpu_begin(void) +{ + preempt_disable(); + + WARN_ON(this_cpu_read(in_kernel_fpu)); + + this_cpu_write(in_kernel_fpu, true); + + if (!is_fpu_owner()) + enable_fpu(); + else + _save_fp(¤t->thread.fpu); + + write_fcsr(LOONGARCH_FCSR0, 0); +} +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + WARN_ON(!this_cpu_read(in_kernel_fpu)); + + if (!is_fpu_owner()) + disable_fpu(); + else + _restore_fp(¤t->thread.fpu); + + this_cpu_write(in_kernel_fpu, false); + + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S index bbabf06244c2..c7d961fc72c2 100644 --- a/arch/loongarch/kernel/mcount_dyn.S +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -42,7 +42,6 @@ .if \allregs PTR_S tp, sp, PT_R2 PTR_S t0, sp, PT_R12 - PTR_S t1, sp, PT_R13 PTR_S t2, sp, PT_R14 PTR_S t3, sp, PT_R15 PTR_S t4, sp, PT_R16 @@ -64,6 +63,8 @@ PTR_S zero, sp, PT_R0 .endif PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */ + move t1, zero + PTR_S t1, sp, PT_R13 PTR_ADDI t8, sp, PT_SIZE PTR_S t8, sp, PT_R3 .endm @@ -104,8 +105,12 @@ ftrace_common_return: PTR_L a7, sp, PT_R11 PTR_L fp, sp, PT_R22 PTR_L t0, sp, PT_ERA + PTR_L t1, sp, PT_R13 PTR_ADDI sp, sp, PT_SIZE + bnez t1, .Ldirect jr t0 +.Ldirect: + jr t1 SYM_CODE_END(ftrace_common) SYM_CODE_START(ftrace_caller) @@ -147,3 +152,9 @@ SYM_CODE_START(return_to_handler) jr ra SYM_CODE_END(return_to_handler) #endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_CODE_START(ftrace_stub_direct_tramp) + jr t0 +SYM_CODE_END(ftrace_stub_direct_tramp) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 707bd32e5c4f..ff28f99b47d7 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -461,7 +461,7 @@ static int get_pmc_irq(void) struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); if (d) - return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START); + return irq_create_mapping(d, INT_PCOV); return -EINVAL; } diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 5c67cc4fd56d..0d82907b5404 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -76,6 +76,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_fpu) seq_printf(m, " fpu"); if (cpu_has_lsx) seq_printf(m, " lsx"); if (cpu_has_lasx) seq_printf(m, " lasx"); + if (cpu_has_crc32) seq_printf(m, " crc32"); if (cpu_has_complex) seq_printf(m, " complex"); if (cpu_has_crypto) seq_printf(m, " crypto"); if (cpu_has_lvz) seq_printf(m, " lvz"); diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index fa2443c7afb2..b71e17c1cc0c 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -62,7 +62,7 @@ unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); #ifdef CONFIG_HOTPLUG_CPU -void arch_cpu_idle_dead(void) +void __noreturn arch_cpu_idle_dead(void) { play_dead(); } diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 06bceae7d104..5fcffb452367 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -391,10 +391,10 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, return 0; } -static int ptrace_hbp_get_resource_info(unsigned int note_type, u16 *info) +static int ptrace_hbp_get_resource_info(unsigned int note_type, u64 *info) { u8 num; - u16 reg = 0; + u64 reg = 0; switch (note_type) { case NT_LOONGARCH_HW_BREAK: @@ -524,15 +524,16 @@ static int ptrace_hbp_set_addr(unsigned int note_type, return modify_user_hw_breakpoint(bp, &attr); } -#define PTRACE_HBP_CTRL_SZ sizeof(u32) #define PTRACE_HBP_ADDR_SZ sizeof(u64) #define PTRACE_HBP_MASK_SZ sizeof(u64) +#define PTRACE_HBP_CTRL_SZ sizeof(u32) +#define PTRACE_HBP_PAD_SZ sizeof(u32) static int hw_break_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - u16 info; + u64 info; u32 ctrl; u64 addr, mask; int ret, idx = 0; @@ -545,7 +546,7 @@ static int hw_break_get(struct task_struct *target, membuf_write(&to, &info, sizeof(info)); - /* (address, ctrl) registers */ + /* (address, mask, ctrl) registers */ while (to.left) { ret = ptrace_hbp_get_addr(note_type, target, idx, &addr); if (ret) @@ -562,6 +563,7 @@ static int hw_break_get(struct task_struct *target, membuf_store(&to, addr); membuf_store(&to, mask); membuf_store(&to, ctrl); + membuf_zero(&to, sizeof(u32)); idx++; } @@ -582,7 +584,7 @@ static int hw_break_set(struct task_struct *target, offset = offsetof(struct user_watch_state, dbg_regs); user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset); - /* (address, ctrl) registers */ + /* (address, mask, ctrl) registers */ limit = regset->n * regset->size; while (count && offset < limit) { if (count < PTRACE_HBP_ADDR_SZ) @@ -602,7 +604,7 @@ static int hw_break_set(struct task_struct *target, break; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask, - offset, offset + PTRACE_HBP_ADDR_SZ); + offset, offset + PTRACE_HBP_MASK_SZ); if (ret) return ret; @@ -611,8 +613,8 @@ static int hw_break_set(struct task_struct *target, return ret; offset += PTRACE_HBP_MASK_SZ; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask, - offset, offset + PTRACE_HBP_MASK_SZ); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, + offset, offset + PTRACE_HBP_CTRL_SZ); if (ret) return ret; @@ -620,6 +622,11 @@ static int hw_break_set(struct task_struct *target, if (ret) return ret; offset += PTRACE_HBP_CTRL_SZ; + + user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + offset, offset + PTRACE_HBP_PAD_SZ); + offset += PTRACE_HBP_PAD_SZ; + idx++; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index bae84ccf6d36..4444b13418f0 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -160,6 +160,27 @@ static void __init smbios_parse(void) dmi_walk(find_tokens, NULL); } +#ifdef CONFIG_ARCH_WRITECOMBINE +pgprot_t pgprot_wc = PAGE_KERNEL_WUC; +#else +pgprot_t pgprot_wc = PAGE_KERNEL_SUC; +#endif + +EXPORT_SYMBOL(pgprot_wc); + +static int __init setup_writecombine(char *p) +{ + if (!strcmp(p, "on")) + pgprot_wc = PAGE_KERNEL_WUC; + else if (!strcmp(p, "off")) + pgprot_wc = PAGE_KERNEL_SUC; + else + pr_warn("Unknown writecombine setting \"%s\".\n", p); + + return 0; +} +early_param("writecombine", setup_writecombine); + static int usermem __initdata; static int __init early_parse_mem(char *p) @@ -368,8 +389,8 @@ static void __init arch_mem_init(char **cmdline_p) /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate - * low memory as small as possible before plat_swiotlb_setup(), so - * make sparse_init() using top-down allocation. + * low memory as small as possible before swiotlb_init(), so make + * sparse_init() using top-down allocation. */ memblock_set_bottom_up(false); sparse_init(); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 8c6e227cb29d..ed167e244cda 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -155,11 +155,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) * it goes straight through and wastes no time serializing * anything. Worst case is that we lose a reschedule ... */ -void smp_send_reschedule(int cpu) +void arch_smp_send_reschedule(int cpu) { loongson_send_ipi_single(cpu, SMP_RESCHEDULE); } -EXPORT_SYMBOL_GPL(smp_send_reschedule); +EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { @@ -336,7 +336,7 @@ void play_dead(void) iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); init_fn(); - unreachable(); + BUG(); } #endif diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 3a690f96f00c..2463d2fea21f 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -30,7 +30,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, regs->regs[1] = 0; for (unwind_start(&state, task, regs); - !unwind_done(&state); unwind_next_frame(&state)) { + !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || !consume_entry(cookie, addr)) break; diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 4351f69d9950..f377e50f3c66 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -133,7 +133,7 @@ static int get_timer_irq(void) struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); if (d) - return irq_create_mapping(d, EXCCODE_TIMER - EXCCODE_INT_START); + return irq_create_mapping(d, INT_TI); return -EINVAL; } diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index de8ebe20b666..8db26e4ca447 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -3,6 +3,7 @@ * Author: Huacai Chen <chenhuacai@loongson.cn> * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/bug.h> #include <linux/compiler.h> @@ -35,6 +36,7 @@ #include <asm/break.h> #include <asm/cpu.h> #include <asm/fpu.h> +#include <asm/inst.h> #include <asm/loongarch.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> @@ -50,6 +52,7 @@ extern asmlinkage void handle_ade(void); extern asmlinkage void handle_ale(void); +extern asmlinkage void handle_bce(void); extern asmlinkage void handle_sys(void); extern asmlinkage void handle_bp(void); extern asmlinkage void handle_ri(void); @@ -153,53 +156,210 @@ static void show_code(unsigned int *pc, bool user) pr_cont("\n"); } -static void __show_regs(const struct pt_regs *regs) +static void print_bool_fragment(const char *key, unsigned long val, bool first) { - const int field = 2 * sizeof(unsigned long); - unsigned int excsubcode; - unsigned int exccode; - int i; + /* e.g. "+PG", "-DA" */ + pr_cont("%s%c%s", first ? "" : " ", val ? '+' : '-', key); +} - show_regs_print_info(KERN_DEFAULT); +static void print_plv_fragment(const char *key, int val) +{ + /* e.g. "PLV0", "PPLV3" */ + pr_cont("%s%d", key, val); +} - /* - * Saved main processor registers - */ - for (i = 0; i < 32; ) { - if ((i % 4) == 0) - printk("$%2d :", i); - pr_cont(" %0*lx", field, regs->regs[i]); +static void print_memory_type_fragment(const char *key, unsigned long val) +{ + const char *humanized_type; - i++; - if ((i % 4) == 0) - pr_cont("\n"); + switch (val) { + case 0: + humanized_type = "SUC"; + break; + case 1: + humanized_type = "CC"; + break; + case 2: + humanized_type = "WUC"; + break; + default: + pr_cont(" %s=Reserved(%lu)", key, val); + return; } + /* e.g. " DATM=WUC" */ + pr_cont(" %s=%s", key, humanized_type); +} + +static void print_intr_fragment(const char *key, unsigned long val) +{ + /* e.g. "LIE=0-1,3,5-7" */ + pr_cont("%s=%*pbl", key, EXCCODE_INT_NUM, &val); +} + +static void print_crmd(unsigned long x) +{ + printk(" CRMD: %08lx (", x); + print_plv_fragment("PLV", (int) FIELD_GET(CSR_CRMD_PLV, x)); + print_bool_fragment("IE", FIELD_GET(CSR_CRMD_IE, x), false); + print_bool_fragment("DA", FIELD_GET(CSR_CRMD_DA, x), false); + print_bool_fragment("PG", FIELD_GET(CSR_CRMD_PG, x), false); + print_memory_type_fragment("DACF", FIELD_GET(CSR_CRMD_DACF, x)); + print_memory_type_fragment("DACM", FIELD_GET(CSR_CRMD_DACM, x)); + print_bool_fragment("WE", FIELD_GET(CSR_CRMD_WE, x), false); + pr_cont(")\n"); +} + +static void print_prmd(unsigned long x) +{ + printk(" PRMD: %08lx (", x); + print_plv_fragment("PPLV", (int) FIELD_GET(CSR_PRMD_PPLV, x)); + print_bool_fragment("PIE", FIELD_GET(CSR_PRMD_PIE, x), false); + print_bool_fragment("PWE", FIELD_GET(CSR_PRMD_PWE, x), false); + pr_cont(")\n"); +} + +static void print_euen(unsigned long x) +{ + printk(" EUEN: %08lx (", x); + print_bool_fragment("FPE", FIELD_GET(CSR_EUEN_FPEN, x), true); + print_bool_fragment("SXE", FIELD_GET(CSR_EUEN_LSXEN, x), false); + print_bool_fragment("ASXE", FIELD_GET(CSR_EUEN_LASXEN, x), false); + print_bool_fragment("BTE", FIELD_GET(CSR_EUEN_LBTEN, x), false); + pr_cont(")\n"); +} + +static void print_ecfg(unsigned long x) +{ + printk(" ECFG: %08lx (", x); + print_intr_fragment("LIE", FIELD_GET(CSR_ECFG_IM, x)); + pr_cont(" VS=%d)\n", (int) FIELD_GET(CSR_ECFG_VS, x)); +} + +static const char *humanize_exc_name(unsigned int ecode, unsigned int esubcode) +{ + /* + * LoongArch users and developers are probably more familiar with + * those names found in the ISA manual, so we are going to print out + * the latter. This will require some mapping. + */ + switch (ecode) { + case EXCCODE_RSV: return "INT"; + case EXCCODE_TLBL: return "PIL"; + case EXCCODE_TLBS: return "PIS"; + case EXCCODE_TLBI: return "PIF"; + case EXCCODE_TLBM: return "PME"; + case EXCCODE_TLBNR: return "PNR"; + case EXCCODE_TLBNX: return "PNX"; + case EXCCODE_TLBPE: return "PPI"; + case EXCCODE_ADE: + switch (esubcode) { + case EXSUBCODE_ADEF: return "ADEF"; + case EXSUBCODE_ADEM: return "ADEM"; + } + break; + case EXCCODE_ALE: return "ALE"; + case EXCCODE_BCE: return "BCE"; + case EXCCODE_SYS: return "SYS"; + case EXCCODE_BP: return "BRK"; + case EXCCODE_INE: return "INE"; + case EXCCODE_IPE: return "IPE"; + case EXCCODE_FPDIS: return "FPD"; + case EXCCODE_LSXDIS: return "SXD"; + case EXCCODE_LASXDIS: return "ASXD"; + case EXCCODE_FPE: + switch (esubcode) { + case EXCSUBCODE_FPE: return "FPE"; + case EXCSUBCODE_VFPE: return "VFPE"; + } + break; + case EXCCODE_WATCH: + switch (esubcode) { + case EXCSUBCODE_WPEF: return "WPEF"; + case EXCSUBCODE_WPEM: return "WPEM"; + } + break; + case EXCCODE_BTDIS: return "BTD"; + case EXCCODE_BTE: return "BTE"; + case EXCCODE_GSPR: return "GSPR"; + case EXCCODE_HVC: return "HVC"; + case EXCCODE_GCM: + switch (esubcode) { + case EXCSUBCODE_GCSC: return "GCSC"; + case EXCSUBCODE_GCHC: return "GCHC"; + } + break; /* - * Saved csr registers + * The manual did not mention the EXCCODE_SE case, but print out it + * nevertheless. */ - printk("era : %0*lx %pS\n", field, regs->csr_era, - (void *) regs->csr_era); - printk("ra : %0*lx %pS\n", field, regs->regs[1], - (void *) regs->regs[1]); + case EXCCODE_SE: return "SE"; + } - printk("CSR crmd: %08lx ", regs->csr_crmd); - printk("CSR prmd: %08lx ", regs->csr_prmd); - printk("CSR euen: %08lx ", regs->csr_euen); - printk("CSR ecfg: %08lx ", regs->csr_ecfg); - printk("CSR estat: %08lx ", regs->csr_estat); + return "???"; +} - pr_cont("\n"); +static void print_estat(unsigned long x) +{ + unsigned int ecode = FIELD_GET(CSR_ESTAT_EXC, x); + unsigned int esubcode = FIELD_GET(CSR_ESTAT_ESUBCODE, x); + + printk("ESTAT: %08lx [%s] (", x, humanize_exc_name(ecode, esubcode)); + print_intr_fragment("IS", FIELD_GET(CSR_ESTAT_IS, x)); + pr_cont(" ECode=%d EsubCode=%d)\n", (int) ecode, (int) esubcode); +} + +static void __show_regs(const struct pt_regs *regs) +{ + const int field = 2 * sizeof(unsigned long); + unsigned int exccode = FIELD_GET(CSR_ESTAT_EXC, regs->csr_estat); + + show_regs_print_info(KERN_DEFAULT); + + /* Print saved GPRs except $zero (substituting with PC/ERA) */ +#define GPR_FIELD(x) field, regs->regs[x] + printk("pc %0*lx ra %0*lx tp %0*lx sp %0*lx\n", + field, regs->csr_era, GPR_FIELD(1), GPR_FIELD(2), GPR_FIELD(3)); + printk("a0 %0*lx a1 %0*lx a2 %0*lx a3 %0*lx\n", + GPR_FIELD(4), GPR_FIELD(5), GPR_FIELD(6), GPR_FIELD(7)); + printk("a4 %0*lx a5 %0*lx a6 %0*lx a7 %0*lx\n", + GPR_FIELD(8), GPR_FIELD(9), GPR_FIELD(10), GPR_FIELD(11)); + printk("t0 %0*lx t1 %0*lx t2 %0*lx t3 %0*lx\n", + GPR_FIELD(12), GPR_FIELD(13), GPR_FIELD(14), GPR_FIELD(15)); + printk("t4 %0*lx t5 %0*lx t6 %0*lx t7 %0*lx\n", + GPR_FIELD(16), GPR_FIELD(17), GPR_FIELD(18), GPR_FIELD(19)); + printk("t8 %0*lx u0 %0*lx s9 %0*lx s0 %0*lx\n", + GPR_FIELD(20), GPR_FIELD(21), GPR_FIELD(22), GPR_FIELD(23)); + printk("s1 %0*lx s2 %0*lx s3 %0*lx s4 %0*lx\n", + GPR_FIELD(24), GPR_FIELD(25), GPR_FIELD(26), GPR_FIELD(27)); + printk("s5 %0*lx s6 %0*lx s7 %0*lx s8 %0*lx\n", + GPR_FIELD(28), GPR_FIELD(29), GPR_FIELD(30), GPR_FIELD(31)); + + /* The slot for $zero is reused as the syscall restart flag */ + if (regs->regs[0]) + printk("syscall restart flag: %0*lx\n", GPR_FIELD(0)); + + if (user_mode(regs)) { + printk(" ra: %0*lx\n", GPR_FIELD(1)); + printk(" ERA: %0*lx\n", field, regs->csr_era); + } else { + printk(" ra: %0*lx %pS\n", GPR_FIELD(1), (void *) regs->regs[1]); + printk(" ERA: %0*lx %pS\n", field, regs->csr_era, (void *) regs->csr_era); + } +#undef GPR_FIELD - exccode = ((regs->csr_estat) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; - excsubcode = ((regs->csr_estat) & CSR_ESTAT_ESUBCODE) >> CSR_ESTAT_ESUBCODE_SHIFT; - printk("ExcCode : %x (SubCode %x)\n", exccode, excsubcode); + /* Print saved important CSRs */ + print_crmd(regs->csr_crmd); + print_prmd(regs->csr_prmd); + print_euen(regs->csr_euen); + print_ecfg(regs->csr_ecfg); + print_estat(regs->csr_estat); if (exccode >= EXCCODE_TLBL && exccode <= EXCCODE_ALE) - printk("BadVA : %0*lx\n", field, regs->csr_badvaddr); + printk(" BADV: %0*lx\n", field, regs->csr_badvaddr); - printk("PrId : %08x (%s)\n", read_cpucfg(LOONGARCH_CPUCFG0), - cpu_family_string()); + printk(" PRID: %08x (%s, %s)\n", read_cpucfg(LOONGARCH_CPUCFG0), + cpu_family_string(), cpu_full_name_string()); } void show_regs(struct pt_regs *regs) @@ -430,6 +590,95 @@ static void bug_handler(struct pt_regs *regs) } } +asmlinkage void noinstr do_bce(struct pt_regs *regs) +{ + bool user = user_mode(regs); + unsigned long era = exception_era(regs); + u64 badv = 0, lower = 0, upper = ULONG_MAX; + union loongarch_instruction insn; + irqentry_state_t state = irqentry_enter(regs); + + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_enable(); + + current->thread.trap_nr = read_csr_excode(); + + die_if_kernel("Bounds check error in kernel code", regs); + + /* + * Pull out the address that failed bounds checking, and the lower / + * upper bound, by minimally looking at the faulting instruction word + * and reading from the correct register. + */ + if (__get_inst(&insn.word, (u32 *)era, user)) + goto bad_era; + + switch (insn.reg3_format.opcode) { + case asrtle_op: + if (insn.reg3_format.rd != 0) + break; /* not asrtle */ + badv = regs->regs[insn.reg3_format.rj]; + upper = regs->regs[insn.reg3_format.rk]; + break; + + case asrtgt_op: + if (insn.reg3_format.rd != 0) + break; /* not asrtgt */ + badv = regs->regs[insn.reg3_format.rj]; + lower = regs->regs[insn.reg3_format.rk]; + break; + + case ldleb_op: + case ldleh_op: + case ldlew_op: + case ldled_op: + case stleb_op: + case stleh_op: + case stlew_op: + case stled_op: + case fldles_op: + case fldled_op: + case fstles_op: + case fstled_op: + badv = regs->regs[insn.reg3_format.rj]; + upper = regs->regs[insn.reg3_format.rk]; + break; + + case ldgtb_op: + case ldgth_op: + case ldgtw_op: + case ldgtd_op: + case stgtb_op: + case stgth_op: + case stgtw_op: + case stgtd_op: + case fldgts_op: + case fldgtd_op: + case fstgts_op: + case fstgtd_op: + badv = regs->regs[insn.reg3_format.rj]; + lower = regs->regs[insn.reg3_format.rk]; + break; + } + + force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper); + +out: + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_disable(); + + irqentry_exit(regs, state); + return; + +bad_era: + /* + * Cannot pull out the instruction word, hence cannot provide more + * info than a regular SIGSEGV in this case. + */ + force_sig(SIGSEGV); + goto out; +} + asmlinkage void noinstr do_bp(struct pt_regs *regs) { bool user = user_mode(regs); @@ -792,11 +1041,12 @@ void __init trap_init(void) long i; /* Set interrupt vector handler */ - for (i = EXCCODE_INT_START; i < EXCCODE_INT_END; i++) + for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++) set_handler(i * VECSIZE, handle_vint, VECSIZE); set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE); set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE); + set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE); set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE); set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE); set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE); diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c index a463d6961344..ba324ba76fa1 100644 --- a/arch/loongarch/kernel/unwind.c +++ b/arch/loongarch/kernel/unwind.c @@ -28,5 +28,6 @@ bool default_next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); + state->error = true; return false; } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 9095fde8e55d..55afc27320e1 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -211,7 +211,7 @@ static bool next_frame(struct unwind_state *state) pc = regs->csr_era; if (user_mode(regs) || !__kernel_text_address(pc)) - return false; + goto out; state->first = true; state->pc = pc; @@ -226,6 +226,8 @@ static bool next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); +out: + state->error = true; return false; } diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index 40bde632900f..d60d4e096cfa 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -4,4 +4,6 @@ # lib-y += delay.o memset.o memcpy.o memmove.o \ - clear_user.o copy_user.o dump_tlb.o unaligned.o + clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o + +obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 2dc48e61a2c8..fd1d62b244f2 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -13,7 +13,14 @@ .irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): - addi.d a0, a1, (\to) * (-8) + sub.d a0, a2, a0 + addi.d a0, a0, (\to) * (-8) + jr ra +.endr + +.irp to, 0, 2, 4 +.L_fixup_handle_s\to\(): + addi.d a0, a1, -\to jr ra .endr @@ -44,7 +51,7 @@ SYM_FUNC_START(__clear_user_generic) 2: move a0, a1 jr ra - _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 1b, .L_fixup_handle_s0 SYM_FUNC_END(__clear_user_generic) /* @@ -54,12 +61,21 @@ SYM_FUNC_END(__clear_user_generic) * a1: size */ SYM_FUNC_START(__clear_user_fast) - beqz a1, 10f + sltui t0, a1, 9 + bnez t0, .Lsmall - ori a2, zero, 64 - blt a1, a2, 9f + add.d a2, a0, a1 +0: st.d zero, a0, 0 + + /* align up address */ + addi.d a0, a0, 8 + bstrins.d a0, zero, 2, 0 + + addi.d a3, a2, -64 + bgeu a0, a3, .Llt64 /* set 64 bytes at a time */ +.Lloop64: 1: st.d zero, a0, 0 2: st.d zero, a0, 8 3: st.d zero, a0, 16 @@ -68,24 +84,95 @@ SYM_FUNC_START(__clear_user_fast) 6: st.d zero, a0, 40 7: st.d zero, a0, 48 8: st.d zero, a0, 56 - addi.d a0, a0, 64 - addi.d a1, a1, -64 - bge a1, a2, 1b - - beqz a1, 10f + bltu a0, a3, .Lloop64 /* set the remaining bytes */ -9: st.b zero, a0, 0 - addi.d a0, a0, 1 - addi.d a1, a1, -1 - bgt a1, zero, 9b +.Llt64: + addi.d a3, a2, -32 + bgeu a0, a3, .Llt32 +9: st.d zero, a0, 0 +10: st.d zero, a0, 8 +11: st.d zero, a0, 16 +12: st.d zero, a0, 24 + addi.d a0, a0, 32 + +.Llt32: + addi.d a3, a2, -16 + bgeu a0, a3, .Llt16 +13: st.d zero, a0, 0 +14: st.d zero, a0, 8 + addi.d a0, a0, 16 + +.Llt16: + addi.d a3, a2, -8 + bgeu a0, a3, .Llt8 +15: st.d zero, a0, 0 + +.Llt8: +16: st.d zero, a2, -8 /* return */ -10: move a0, a1 + move a0, zero + jr ra + + .align 4 +.Lsmall: + pcaddi t0, 4 + slli.d a2, a1, 4 + add.d t0, t0, a2 + jr t0 + + .align 4 + move a0, zero + jr ra + + .align 4 +17: st.b zero, a0, 0 + move a0, zero + jr ra + + .align 4 +18: st.h zero, a0, 0 + move a0, zero + jr ra + + .align 4 +19: st.h zero, a0, 0 +20: st.b zero, a0, 2 + move a0, zero + jr ra + + .align 4 +21: st.w zero, a0, 0 + move a0, zero + jr ra + + .align 4 +22: st.w zero, a0, 0 +23: st.b zero, a0, 4 + move a0, zero + jr ra + + .align 4 +24: st.w zero, a0, 0 +25: st.h zero, a0, 4 + move a0, zero + jr ra + + .align 4 +26: st.w zero, a0, 0 +27: st.w zero, a0, 3 + move a0, zero + jr ra + + .align 4 +28: st.d zero, a0, 0 + move a0, zero jr ra /* fixup and ex_table */ + _asm_extable 0b, .L_fixup_handle_0 _asm_extable 1b, .L_fixup_handle_0 _asm_extable 2b, .L_fixup_handle_1 _asm_extable 3b, .L_fixup_handle_2 @@ -95,4 +182,23 @@ SYM_FUNC_START(__clear_user_fast) _asm_extable 7b, .L_fixup_handle_6 _asm_extable 8b, .L_fixup_handle_7 _asm_extable 9b, .L_fixup_handle_0 + _asm_extable 10b, .L_fixup_handle_1 + _asm_extable 11b, .L_fixup_handle_2 + _asm_extable 12b, .L_fixup_handle_3 + _asm_extable 13b, .L_fixup_handle_0 + _asm_extable 14b, .L_fixup_handle_1 + _asm_extable 15b, .L_fixup_handle_0 + _asm_extable 16b, .L_fixup_handle_1 + _asm_extable 17b, .L_fixup_handle_s0 + _asm_extable 18b, .L_fixup_handle_s0 + _asm_extable 19b, .L_fixup_handle_s0 + _asm_extable 20b, .L_fixup_handle_s2 + _asm_extable 21b, .L_fixup_handle_s0 + _asm_extable 22b, .L_fixup_handle_s0 + _asm_extable 23b, .L_fixup_handle_s4 + _asm_extable 24b, .L_fixup_handle_s0 + _asm_extable 25b, .L_fixup_handle_s4 + _asm_extable 26b, .L_fixup_handle_s0 + _asm_extable 27b, .L_fixup_handle_s4 + _asm_extable 28b, .L_fixup_handle_s0 SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 55ac6020a1ad..b21f6d5d38f5 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -13,7 +13,14 @@ .irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): - addi.d a0, a2, (\to) * (-8) + sub.d a0, a2, a0 + addi.d a0, a0, (\to) * (-8) + jr ra +.endr + +.irp to, 0, 2, 4 +.L_fixup_handle_s\to\(): + addi.d a0, a2, -\to jr ra .endr @@ -47,8 +54,8 @@ SYM_FUNC_START(__copy_user_generic) 3: move a0, a2 jr ra - _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_0 + _asm_extable 1b, .L_fixup_handle_s0 + _asm_extable 2b, .L_fixup_handle_s0 SYM_FUNC_END(__copy_user_generic) /* @@ -59,65 +66,209 @@ SYM_FUNC_END(__copy_user_generic) * a2: n */ SYM_FUNC_START(__copy_user_fast) - beqz a2, 19f + sltui t0, a2, 9 + bnez t0, .Lsmall - ori a3, zero, 64 - blt a2, a3, 17f + add.d a3, a1, a2 + add.d a2, a0, a2 +0: ld.d t0, a1, 0 +1: st.d t0, a0, 0 - /* copy 64 bytes at a time */ -1: ld.d t0, a1, 0 -2: ld.d t1, a1, 8 -3: ld.d t2, a1, 16 -4: ld.d t3, a1, 24 -5: ld.d t4, a1, 32 -6: ld.d t5, a1, 40 -7: ld.d t6, a1, 48 -8: ld.d t7, a1, 56 -9: st.d t0, a0, 0 -10: st.d t1, a0, 8 -11: st.d t2, a0, 16 -12: st.d t3, a0, 24 -13: st.d t4, a0, 32 -14: st.d t5, a0, 40 -15: st.d t6, a0, 48 -16: st.d t7, a0, 56 + /* align up destination address */ + andi t1, a0, 7 + sub.d t0, zero, t1 + addi.d t0, t0, 8 + add.d a1, a1, t0 + add.d a0, a0, t0 - addi.d a0, a0, 64 - addi.d a1, a1, 64 - addi.d a2, a2, -64 - bge a2, a3, 1b + addi.d a4, a3, -64 + bgeu a1, a4, .Llt64 - beqz a2, 19f + /* copy 64 bytes at a time */ +.Lloop64: +2: ld.d t0, a1, 0 +3: ld.d t1, a1, 8 +4: ld.d t2, a1, 16 +5: ld.d t3, a1, 24 +6: ld.d t4, a1, 32 +7: ld.d t5, a1, 40 +8: ld.d t6, a1, 48 +9: ld.d t7, a1, 56 + addi.d a1, a1, 64 +10: st.d t0, a0, 0 +11: st.d t1, a0, 8 +12: st.d t2, a0, 16 +13: st.d t3, a0, 24 +14: st.d t4, a0, 32 +15: st.d t5, a0, 40 +16: st.d t6, a0, 48 +17: st.d t7, a0, 56 + addi.d a0, a0, 64 + bltu a1, a4, .Lloop64 /* copy the remaining bytes */ -17: ld.b t0, a1, 0 -18: st.b t0, a0, 0 - addi.d a0, a0, 1 - addi.d a1, a1, 1 - addi.d a2, a2, -1 - bgt a2, zero, 17b +.Llt64: + addi.d a4, a3, -32 + bgeu a1, a4, .Llt32 +18: ld.d t0, a1, 0 +19: ld.d t1, a1, 8 +20: ld.d t2, a1, 16 +21: ld.d t3, a1, 24 + addi.d a1, a1, 32 +22: st.d t0, a0, 0 +23: st.d t1, a0, 8 +24: st.d t2, a0, 16 +25: st.d t3, a0, 24 + addi.d a0, a0, 32 + +.Llt32: + addi.d a4, a3, -16 + bgeu a1, a4, .Llt16 +26: ld.d t0, a1, 0 +27: ld.d t1, a1, 8 + addi.d a1, a1, 16 +28: st.d t0, a0, 0 +29: st.d t1, a0, 8 + addi.d a0, a0, 16 + +.Llt16: + addi.d a4, a3, -8 + bgeu a1, a4, .Llt8 +30: ld.d t0, a1, 0 +31: st.d t0, a0, 0 + +.Llt8: +32: ld.d t0, a3, -8 +33: st.d t0, a2, -8 /* return */ -19: move a0, a2 + move a0, zero + jr ra + + .align 5 +.Lsmall: + pcaddi t0, 8 + slli.d a3, a2, 5 + add.d t0, t0, a3 + jr t0 + + .align 5 + move a0, zero + jr ra + + .align 5 +34: ld.b t0, a1, 0 +35: st.b t0, a0, 0 + move a0, zero + jr ra + + .align 5 +36: ld.h t0, a1, 0 +37: st.h t0, a0, 0 + move a0, zero + jr ra + + .align 5 +38: ld.h t0, a1, 0 +39: ld.b t1, a1, 2 +40: st.h t0, a0, 0 +41: st.b t1, a0, 2 + move a0, zero + jr ra + + .align 5 +42: ld.w t0, a1, 0 +43: st.w t0, a0, 0 + move a0, zero + jr ra + + .align 5 +44: ld.w t0, a1, 0 +45: ld.b t1, a1, 4 +46: st.w t0, a0, 0 +47: st.b t1, a0, 4 + move a0, zero + jr ra + + .align 5 +48: ld.w t0, a1, 0 +49: ld.h t1, a1, 4 +50: st.w t0, a0, 0 +51: st.h t1, a0, 4 + move a0, zero + jr ra + + .align 5 +52: ld.w t0, a1, 0 +53: ld.w t1, a1, 3 +54: st.w t0, a0, 0 +55: st.w t1, a0, 3 + move a0, zero + jr ra + + .align 5 +56: ld.d t0, a1, 0 +57: st.d t0, a0, 0 + move a0, zero jr ra /* fixup and ex_table */ + _asm_extable 0b, .L_fixup_handle_0 _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_1 - _asm_extable 3b, .L_fixup_handle_2 - _asm_extable 4b, .L_fixup_handle_3 - _asm_extable 5b, .L_fixup_handle_4 - _asm_extable 6b, .L_fixup_handle_5 - _asm_extable 7b, .L_fixup_handle_6 - _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 2b, .L_fixup_handle_0 + _asm_extable 3b, .L_fixup_handle_0 + _asm_extable 4b, .L_fixup_handle_0 + _asm_extable 5b, .L_fixup_handle_0 + _asm_extable 6b, .L_fixup_handle_0 + _asm_extable 7b, .L_fixup_handle_0 + _asm_extable 8b, .L_fixup_handle_0 _asm_extable 9b, .L_fixup_handle_0 - _asm_extable 10b, .L_fixup_handle_1 - _asm_extable 11b, .L_fixup_handle_2 - _asm_extable 12b, .L_fixup_handle_3 - _asm_extable 13b, .L_fixup_handle_4 - _asm_extable 14b, .L_fixup_handle_5 - _asm_extable 15b, .L_fixup_handle_6 - _asm_extable 16b, .L_fixup_handle_7 - _asm_extable 17b, .L_fixup_handle_0 + _asm_extable 10b, .L_fixup_handle_0 + _asm_extable 11b, .L_fixup_handle_1 + _asm_extable 12b, .L_fixup_handle_2 + _asm_extable 13b, .L_fixup_handle_3 + _asm_extable 14b, .L_fixup_handle_4 + _asm_extable 15b, .L_fixup_handle_5 + _asm_extable 16b, .L_fixup_handle_6 + _asm_extable 17b, .L_fixup_handle_7 _asm_extable 18b, .L_fixup_handle_0 + _asm_extable 19b, .L_fixup_handle_0 + _asm_extable 20b, .L_fixup_handle_0 + _asm_extable 21b, .L_fixup_handle_0 + _asm_extable 22b, .L_fixup_handle_0 + _asm_extable 23b, .L_fixup_handle_1 + _asm_extable 24b, .L_fixup_handle_2 + _asm_extable 25b, .L_fixup_handle_3 + _asm_extable 26b, .L_fixup_handle_0 + _asm_extable 27b, .L_fixup_handle_0 + _asm_extable 28b, .L_fixup_handle_0 + _asm_extable 29b, .L_fixup_handle_1 + _asm_extable 30b, .L_fixup_handle_0 + _asm_extable 31b, .L_fixup_handle_0 + _asm_extable 32b, .L_fixup_handle_0 + _asm_extable 33b, .L_fixup_handle_1 + _asm_extable 34b, .L_fixup_handle_s0 + _asm_extable 35b, .L_fixup_handle_s0 + _asm_extable 36b, .L_fixup_handle_s0 + _asm_extable 37b, .L_fixup_handle_s0 + _asm_extable 38b, .L_fixup_handle_s0 + _asm_extable 39b, .L_fixup_handle_s0 + _asm_extable 40b, .L_fixup_handle_s0 + _asm_extable 41b, .L_fixup_handle_s2 + _asm_extable 42b, .L_fixup_handle_s0 + _asm_extable 43b, .L_fixup_handle_s0 + _asm_extable 44b, .L_fixup_handle_s0 + _asm_extable 45b, .L_fixup_handle_s0 + _asm_extable 46b, .L_fixup_handle_s0 + _asm_extable 47b, .L_fixup_handle_s4 + _asm_extable 48b, .L_fixup_handle_s0 + _asm_extable 49b, .L_fixup_handle_s0 + _asm_extable 50b, .L_fixup_handle_s0 + _asm_extable 51b, .L_fixup_handle_s4 + _asm_extable 52b, .L_fixup_handle_s0 + _asm_extable 53b, .L_fixup_handle_s0 + _asm_extable 54b, .L_fixup_handle_s0 + _asm_extable 55b, .L_fixup_handle_s4 + _asm_extable 56b, .L_fixup_handle_s0 + _asm_extable 57b, .L_fixup_handle_s0 SYM_FUNC_END(__copy_user_fast) diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c new file mode 100644 index 000000000000..a5e84b403c3b --- /dev/null +++ b/arch/loongarch/lib/csum.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2019-2020 Arm Ltd. + +#include <linux/compiler.h> +#include <linux/kasan-checks.h> +#include <linux/kernel.h> + +#include <net/checksum.h> + +static u64 accumulate(u64 sum, u64 data) +{ + sum += data; + if (sum < data) + sum += 1; + return sum; +} + +/* + * We over-read the buffer and this makes KASAN unhappy. Instead, disable + * instrumentation and call kasan explicitly. + */ +unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len) +{ + unsigned int offset, shift, sum; + const u64 *ptr; + u64 data, sum64 = 0; + + if (unlikely(len == 0)) + return 0; + + offset = (unsigned long)buff & 7; + /* + * This is to all intents and purposes safe, since rounding down cannot + * result in a different page or cache line being accessed, and @buff + * should absolutely not be pointing to anything read-sensitive. We do, + * however, have to be careful not to piss off KASAN, which means using + * unchecked reads to accommodate the head and tail, for which we'll + * compensate with an explicit check up-front. + */ + kasan_check_read(buff, len); + ptr = (u64 *)(buff - offset); + len = len + offset - 8; + + /* + * Head: zero out any excess leading bytes. Shifting back by the same + * amount should be at least as fast as any other way of handling the + * odd/even alignment, and means we can ignore it until the very end. + */ + shift = offset * 8; + data = *ptr++; + data = (data >> shift) << shift; + + /* + * Body: straightforward aligned loads from here on (the paired loads + * underlying the quadword type still only need dword alignment). The + * main loop strictly excludes the tail, so the second loop will always + * run at least once. + */ + while (unlikely(len > 64)) { + __uint128_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = *(__uint128_t *)ptr; + tmp2 = *(__uint128_t *)(ptr + 2); + tmp3 = *(__uint128_t *)(ptr + 4); + tmp4 = *(__uint128_t *)(ptr + 6); + + len -= 64; + ptr += 8; + + /* This is the "don't dump the carry flag into a GPR" idiom */ + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp2 += (tmp2 >> 64) | (tmp2 << 64); + tmp3 += (tmp3 >> 64) | (tmp3 << 64); + tmp4 += (tmp4 >> 64) | (tmp4 << 64); + tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64); + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64); + tmp3 += (tmp3 >> 64) | (tmp3 << 64); + tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64); + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp1 = ((tmp1 >> 64) << 64) | sum64; + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + sum64 = tmp1 >> 64; + } + while (len > 8) { + __uint128_t tmp; + + sum64 = accumulate(sum64, data); + tmp = *(__uint128_t *)ptr; + + len -= 16; + ptr += 2; + + data = tmp >> 64; + sum64 = accumulate(sum64, tmp); + } + if (len > 0) { + sum64 = accumulate(sum64, data); + data = *ptr; + len -= 8; + } + /* + * Tail: zero any over-read bytes similarly to the head, again + * preserving odd/even alignment. + */ + shift = len * -8; + data = (data << shift) >> shift; + sum64 = accumulate(sum64, data); + + /* Finally, folding */ + sum64 += (sum64 >> 32) | (sum64 << 32); + sum = sum64 >> 32; + sum += (sum >> 16) | (sum << 16); + if (offset & 1) + return (u16)swab32(sum); + + return sum >> 16; +} + +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + __uint128_t src, dst; + u64 sum = (__force u64)csum; + + src = *(const __uint128_t *)saddr->s6_addr; + dst = *(const __uint128_t *)daddr->s6_addr; + + sum += (__force u32)htonl(len); + sum += (u32)proto << 24; + src += (src >> 64) | (src << 64); + dst += (dst >> 64) | (dst << 64); + + sum = accumulate(sum, src >> 64); + sum = accumulate(sum, dst >> 64); + + sum += ((sum >> 32) | (sum << 32)); + return csum_fold((__force __wsum)(sum >> 32)); +} +EXPORT_SYMBOL(csum_ipv6_magic); diff --git a/arch/loongarch/lib/error-inject.c b/arch/loongarch/lib/error-inject.c new file mode 100644 index 000000000000..afc9e1c7c973 --- /dev/null +++ b/arch/loongarch/lib/error-inject.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/error-injection.h> +#include <linux/kprobes.h> + +void override_function_with_return(struct pt_regs *regs) +{ + instruction_pointer_set(regs, regs->regs[1]); +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index 3b7e1dec7109..39ce6621c704 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -44,6 +44,66 @@ SYM_FUNC_START(__memcpy_generic) SYM_FUNC_END(__memcpy_generic) _ASM_NOKPROBE(__memcpy_generic) + .align 5 +SYM_FUNC_START_NOALIGN(__memcpy_small) + pcaddi t0, 8 + slli.d a2, a2, 5 + add.d t0, t0, a2 + jr t0 + + .align 5 +0: jr ra + + .align 5 +1: ld.b t0, a1, 0 + st.b t0, a0, 0 + jr ra + + .align 5 +2: ld.h t0, a1, 0 + st.h t0, a0, 0 + jr ra + + .align 5 +3: ld.h t0, a1, 0 + ld.b t1, a1, 2 + st.h t0, a0, 0 + st.b t1, a0, 2 + jr ra + + .align 5 +4: ld.w t0, a1, 0 + st.w t0, a0, 0 + jr ra + + .align 5 +5: ld.w t0, a1, 0 + ld.b t1, a1, 4 + st.w t0, a0, 0 + st.b t1, a0, 4 + jr ra + + .align 5 +6: ld.w t0, a1, 0 + ld.h t1, a1, 4 + st.w t0, a0, 0 + st.h t1, a0, 4 + jr ra + + .align 5 +7: ld.w t0, a1, 0 + ld.w t1, a1, 3 + st.w t0, a0, 0 + st.w t1, a0, 3 + jr ra + + .align 5 +8: ld.d t0, a1, 0 + st.d t0, a0, 0 + jr ra +SYM_FUNC_END(__memcpy_small) +_ASM_NOKPROBE(__memcpy_small) + /* * void *__memcpy_fast(void *dst, const void *src, size_t n) * @@ -52,14 +112,27 @@ _ASM_NOKPROBE(__memcpy_generic) * a2: n */ SYM_FUNC_START(__memcpy_fast) - move a3, a0 - beqz a2, 3f + sltui t0, a2, 9 + bnez t0, __memcpy_small + + add.d a3, a1, a2 + add.d a2, a0, a2 + ld.d a6, a1, 0 + ld.d a7, a3, -8 + + /* align up destination address */ + andi t1, a0, 7 + sub.d t0, zero, t1 + addi.d t0, t0, 8 + add.d a1, a1, t0 + add.d a5, a0, t0 - ori a4, zero, 64 - blt a2, a4, 2f + addi.d a4, a3, -64 + bgeu a1, a4, .Llt64 /* copy 64 bytes at a time */ -1: ld.d t0, a1, 0 +.Lloop64: + ld.d t0, a1, 0 ld.d t1, a1, 8 ld.d t2, a1, 16 ld.d t3, a1, 24 @@ -67,32 +140,54 @@ SYM_FUNC_START(__memcpy_fast) ld.d t5, a1, 40 ld.d t6, a1, 48 ld.d t7, a1, 56 - st.d t0, a0, 0 - st.d t1, a0, 8 - st.d t2, a0, 16 - st.d t3, a0, 24 - st.d t4, a0, 32 - st.d t5, a0, 40 - st.d t6, a0, 48 - st.d t7, a0, 56 - - addi.d a0, a0, 64 addi.d a1, a1, 64 - addi.d a2, a2, -64 - bge a2, a4, 1b - - beqz a2, 3f + st.d t0, a5, 0 + st.d t1, a5, 8 + st.d t2, a5, 16 + st.d t3, a5, 24 + st.d t4, a5, 32 + st.d t5, a5, 40 + st.d t6, a5, 48 + st.d t7, a5, 56 + addi.d a5, a5, 64 + bltu a1, a4, .Lloop64 /* copy the remaining bytes */ -2: ld.b t0, a1, 0 - st.b t0, a0, 0 - addi.d a0, a0, 1 - addi.d a1, a1, 1 - addi.d a2, a2, -1 - bgt a2, zero, 2b +.Llt64: + addi.d a4, a3, -32 + bgeu a1, a4, .Llt32 + ld.d t0, a1, 0 + ld.d t1, a1, 8 + ld.d t2, a1, 16 + ld.d t3, a1, 24 + addi.d a1, a1, 32 + st.d t0, a5, 0 + st.d t1, a5, 8 + st.d t2, a5, 16 + st.d t3, a5, 24 + addi.d a5, a5, 32 + +.Llt32: + addi.d a4, a3, -16 + bgeu a1, a4, .Llt16 + ld.d t0, a1, 0 + ld.d t1, a1, 8 + addi.d a1, a1, 16 + st.d t0, a5, 0 + st.d t1, a5, 8 + addi.d a5, a5, 16 + +.Llt16: + addi.d a4, a3, -8 + bgeu a1, a4, .Llt8 + ld.d t0, a1, 0 + st.d t0, a5, 0 + +.Llt8: + st.d a6, a0, 0 + st.d a7, a2, -8 /* return */ -3: move a0, a3 jr ra SYM_FUNC_END(__memcpy_fast) _ASM_NOKPROBE(__memcpy_fast) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S index b796c3d6da05..45b725ba7867 100644 --- a/arch/loongarch/lib/memmove.S +++ b/arch/loongarch/lib/memmove.S @@ -11,23 +11,9 @@ #include <asm/regdef.h> SYM_FUNC_START(memmove) - blt a0, a1, 1f /* dst < src, memcpy */ - blt a1, a0, 3f /* src < dst, rmemcpy */ + blt a0, a1, memcpy /* dst < src, memcpy */ + blt a1, a0, rmemcpy /* src < dst, rmemcpy */ jr ra /* dst == src, return */ - - /* if (src - dst) < 64, copy 1 byte at a time */ -1: ori a3, zero, 64 - sub.d t0, a1, a0 - blt t0, a3, 2f - b memcpy -2: b __memcpy_generic - - /* if (dst - src) < 64, copy 1 byte at a time */ -3: ori a3, zero, 64 - sub.d t0, a0, a1 - blt t0, a3, 4f - b rmemcpy -4: b __rmemcpy_generic SYM_FUNC_END(memmove) _ASM_NOKPROBE(memmove) @@ -76,50 +62,80 @@ _ASM_NOKPROBE(__rmemcpy_generic) * a2: n */ SYM_FUNC_START(__rmemcpy_fast) - move a3, a0 - beqz a2, 3f + sltui t0, a2, 9 + bnez t0, __memcpy_small - add.d a0, a0, a2 - add.d a1, a1, a2 + add.d a3, a1, a2 + add.d a2, a0, a2 + ld.d a6, a1, 0 + ld.d a7, a3, -8 + + /* align up destination address */ + andi t1, a2, 7 + sub.d a3, a3, t1 + sub.d a5, a2, t1 - ori a4, zero, 64 - blt a2, a4, 2f + addi.d a4, a1, 64 + bgeu a4, a3, .Llt64 /* copy 64 bytes at a time */ -1: ld.d t0, a1, -8 - ld.d t1, a1, -16 - ld.d t2, a1, -24 - ld.d t3, a1, -32 - ld.d t4, a1, -40 - ld.d t5, a1, -48 - ld.d t6, a1, -56 - ld.d t7, a1, -64 - st.d t0, a0, -8 - st.d t1, a0, -16 - st.d t2, a0, -24 - st.d t3, a0, -32 - st.d t4, a0, -40 - st.d t5, a0, -48 - st.d t6, a0, -56 - st.d t7, a0, -64 - - addi.d a0, a0, -64 - addi.d a1, a1, -64 - addi.d a2, a2, -64 - bge a2, a4, 1b - - beqz a2, 3f +.Lloop64: + ld.d t0, a3, -8 + ld.d t1, a3, -16 + ld.d t2, a3, -24 + ld.d t3, a3, -32 + ld.d t4, a3, -40 + ld.d t5, a3, -48 + ld.d t6, a3, -56 + ld.d t7, a3, -64 + addi.d a3, a3, -64 + st.d t0, a5, -8 + st.d t1, a5, -16 + st.d t2, a5, -24 + st.d t3, a5, -32 + st.d t4, a5, -40 + st.d t5, a5, -48 + st.d t6, a5, -56 + st.d t7, a5, -64 + addi.d a5, a5, -64 + bltu a4, a3, .Lloop64 /* copy the remaining bytes */ -2: ld.b t0, a1, -1 - st.b t0, a0, -1 - addi.d a0, a0, -1 - addi.d a1, a1, -1 - addi.d a2, a2, -1 - bgt a2, zero, 2b +.Llt64: + addi.d a4, a1, 32 + bgeu a4, a3, .Llt32 + ld.d t0, a3, -8 + ld.d t1, a3, -16 + ld.d t2, a3, -24 + ld.d t3, a3, -32 + addi.d a3, a3, -32 + st.d t0, a5, -8 + st.d t1, a5, -16 + st.d t2, a5, -24 + st.d t3, a5, -32 + addi.d a5, a5, -32 + +.Llt32: + addi.d a4, a1, 16 + bgeu a4, a3, .Llt16 + ld.d t0, a3, -8 + ld.d t1, a3, -16 + addi.d a3, a3, -16 + st.d t0, a5, -8 + st.d t1, a5, -16 + addi.d a5, a5, -16 + +.Llt16: + addi.d a4, a1, 8 + bgeu a4, a3, .Llt8 + ld.d t0, a3, -8 + st.d t0, a5, -8 + +.Llt8: + st.d a6, a0, 0 + st.d a7, a2, -8 /* return */ -3: move a0, a3 jr ra SYM_FUNC_END(__rmemcpy_fast) _ASM_NOKPROBE(__rmemcpy_fast) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index a9eb732ab2ad..b39c6194e3ae 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -56,39 +56,107 @@ _ASM_NOKPROBE(__memset_generic) * a2: n */ SYM_FUNC_START(__memset_fast) - move a3, a0 - beqz a2, 3f - - ori a4, zero, 64 - blt a2, a4, 2f - /* fill a1 to 64 bits */ fill_to_64 a1 - /* set 64 bytes at a time */ -1: st.d a1, a0, 0 - st.d a1, a0, 8 - st.d a1, a0, 16 - st.d a1, a0, 24 - st.d a1, a0, 32 - st.d a1, a0, 40 - st.d a1, a0, 48 - st.d a1, a0, 56 + sltui t0, a2, 9 + bnez t0, .Lsmall - addi.d a0, a0, 64 - addi.d a2, a2, -64 - bge a2, a4, 1b + add.d a2, a0, a2 + st.d a1, a0, 0 - beqz a2, 3f + /* align up address */ + addi.d a3, a0, 8 + bstrins.d a3, zero, 2, 0 + + addi.d a4, a2, -64 + bgeu a3, a4, .Llt64 + + /* set 64 bytes at a time */ +.Lloop64: + st.d a1, a3, 0 + st.d a1, a3, 8 + st.d a1, a3, 16 + st.d a1, a3, 24 + st.d a1, a3, 32 + st.d a1, a3, 40 + st.d a1, a3, 48 + st.d a1, a3, 56 + addi.d a3, a3, 64 + bltu a3, a4, .Lloop64 /* set the remaining bytes */ -2: st.b a1, a0, 0 - addi.d a0, a0, 1 - addi.d a2, a2, -1 - bgt a2, zero, 2b +.Llt64: + addi.d a4, a2, -32 + bgeu a3, a4, .Llt32 + st.d a1, a3, 0 + st.d a1, a3, 8 + st.d a1, a3, 16 + st.d a1, a3, 24 + addi.d a3, a3, 32 + +.Llt32: + addi.d a4, a2, -16 + bgeu a3, a4, .Llt16 + st.d a1, a3, 0 + st.d a1, a3, 8 + addi.d a3, a3, 16 + +.Llt16: + addi.d a4, a2, -8 + bgeu a3, a4, .Llt8 + st.d a1, a3, 0 + +.Llt8: + st.d a1, a2, -8 /* return */ -3: move a0, a3 + jr ra + + .align 4 +.Lsmall: + pcaddi t0, 4 + slli.d a2, a2, 4 + add.d t0, t0, a2 + jr t0 + + .align 4 +0: jr ra + + .align 4 +1: st.b a1, a0, 0 + jr ra + + .align 4 +2: st.h a1, a0, 0 + jr ra + + .align 4 +3: st.h a1, a0, 0 + st.b a1, a0, 2 + jr ra + + .align 4 +4: st.w a1, a0, 0 + jr ra + + .align 4 +5: st.w a1, a0, 0 + st.b a1, a0, 4 + jr ra + + .align 4 +6: st.w a1, a0, 0 + st.h a1, a0, 4 + jr ra + + .align 4 +7: st.w a1, a0, 0 + st.w a1, a0, 3 + jr ra + + .align 4 +8: st.d a1, a0, 0 jr ra SYM_FUNC_END(__memset_fast) _ASM_NOKPROBE(__memset_fast) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index e018aed34586..3b7d8129570b 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -41,7 +41,7 @@ * don't have to care about aliases on other CPUs. */ unsigned long empty_zero_page, zero_page_mask; -EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); void setup_zero_pages(void) @@ -270,7 +270,7 @@ pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; #endif #ifndef __PAGETABLE_PMD_FOLDED pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss; -EXPORT_SYMBOL_GPL(invalid_pmd_table); +EXPORT_SYMBOL(invalid_pmd_table); #endif pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; EXPORT_SYMBOL(invalid_pte_table); diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 288003a9f0ca..db9342b2d0e6 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1022,6 +1022,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext emit_atomic(insn, ctx); break; + /* Speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + default: pr_err("bpf_jit: unknown opcode %02x\n", code); return -EINVAL; @@ -1248,3 +1252,9 @@ out: return prog; } + +/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return true; +} diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S index 90da899c06a1..e2fc3b4e31f0 100644 --- a/arch/loongarch/power/suspend_asm.S +++ b/arch/loongarch/power/suspend_asm.S @@ -80,6 +80,10 @@ SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL) JUMP_VIRT_ADDR t0, t1 + /* Enable PG */ + li.w t0, 0xb0 # PLV=0, IE=0, PG=1 + csrwr t0, LOONGARCH_CSR_CRMD + la.pcrel t0, acpi_saved_sp ld.d sp, t0, 0 SETUP_WAKEUP diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index d89e2ac75f7b..461240ab4436 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Objects to go into the VDSO. -# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before -# the inclusion of generic Makefile. -ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT +# Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o |